Continuous voice_assistant and silence detection (#4892)

This commit is contained in:
Jesse Hills
2023-05-31 16:30:53 +12:00
committed by GitHub
parent f9f335e692
commit 1ea5d90ea3
12 changed files with 176 additions and 25 deletions
@@ -69,17 +69,42 @@ void VoiceAssistant::setup() {
void VoiceAssistant::loop() {
#ifdef USE_SPEAKER
if (this->speaker_ == nullptr) {
if (this->speaker_ != nullptr) {
uint8_t buf[1024];
auto len = this->socket_->read(buf, sizeof(buf));
if (len == -1) {
return;
}
this->speaker_->play(buf, len);
this->set_timeout("data-incoming", 200, [this]() {
if (this->continuous_) {
this->request_start(true);
}
});
return;
}
uint8_t buf[1024];
auto len = this->socket_->read(buf, sizeof(buf));
if (len == -1) {
return;
}
this->speaker_->play(buf, len);
#endif
#ifdef USE_MEDIA_PLAYER
if (this->media_player_ != nullptr) {
if (!this->playing_tts_ ||
this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_PLAYING) {
return;
}
this->set_timeout("playing-media", 1000, [this]() {
this->playing_tts_ = false;
if (this->continuous_) {
this->request_start(true);
}
});
return;
}
#endif
// Set a 1 second timeout to start the voice assistant again.
this->set_timeout("continuous-no-sound", 1000, [this]() {
if (this->continuous_) {
this->request_start(true);
}
});
}
void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) {
@@ -100,14 +125,19 @@ void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) {
}
this->running_ = true;
this->mic_->start();
this->listening_trigger_->trigger();
}
void VoiceAssistant::request_start() {
void VoiceAssistant::request_start(bool continuous) {
ESP_LOGD(TAG, "Requesting start...");
if (!api::global_api_server->start_voice_assistant()) {
if (!api::global_api_server->start_voice_assistant(this->conversation_id_)) {
ESP_LOGW(TAG, "Could not request start.");
this->error_trigger_->trigger("not-connected", "Could not request start.");
this->continuous_ = false;
return;
}
this->continuous_ = continuous;
this->set_timeout("reset-conversation_id", 5 * 60 * 1000, [this]() { this->conversation_id_ = ""; });
}
void VoiceAssistant::signal_stop() {
@@ -136,9 +166,18 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
return;
}
ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());
this->signal_stop();
this->stt_end_trigger_->trigger(text);
break;
}
case api::enums::VOICE_ASSISTANT_INTENT_END: {
for (auto arg : msg.data) {
if (arg.name == "conversation_id") {
this->conversation_id_ = std::move(arg.value);
}
}
break;
}
case api::enums::VOICE_ASSISTANT_TTS_START: {
std::string text;
for (auto arg : msg.data) {
@@ -166,6 +205,12 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
return;
}
ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());
#ifdef USE_MEDIA_PLAYER
if (this->media_player_ != nullptr) {
this->playing_tts_ = true;
this->media_player_->make_call().set_media_url(url).perform();
}
#endif
this->tts_end_trigger_->trigger(url);
break;
}
@@ -184,6 +229,8 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
}
}
ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());
this->continuous_ = false;
this->signal_stop();
this->error_trigger_->trigger(code, message);
}
default: