ESPHome: /opt/build/esphome/esphome/components/voice_assistant/voice_assistant.cpp Source File

 #include "voice_assistant.h"
 #include "esphome/core/defines.h"

 #ifdef USE_VOICE_ASSISTANT

 #include "esphome/core/log.h"

 #include <cinttypes>
 #include <cstdio>

 namespace esphome {
 namespace voice_assistant {

 static const char *const TAG = "voice_assistant";

 #ifdef SAMPLE_RATE_HZ
 #undef SAMPLE_RATE_HZ
 #endif

 static const size_t SAMPLE_RATE_HZ = 16000;
 static const size_t INPUT_BUFFER_SIZE = 32 * SAMPLE_RATE_HZ / 1000;  // 32ms * 16kHz / 1000ms
 static const size_t BUFFER_SIZE = 512 * SAMPLE_RATE_HZ / 1000;
 static const size_t SEND_BUFFER_SIZE = INPUT_BUFFER_SIZE * sizeof(int16_t);
 static const size_t RECEIVE_SIZE = 1024;
 static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;

 VoiceAssistant::VoiceAssistant() { global_voice_assistant = this; }

 float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }

 bool VoiceAssistant::start_udp_socket_() {
   this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
   if (this->socket_ == nullptr) {
     ESP_LOGE(TAG, "Could not create socket");
     this->mark_failed();
     return false;
   }
   int enable = 1;
   int err = this->socket_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
   if (err != 0) {
     ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err);
     // we can still continue
   }
   err = this->socket_->setblocking(false);
   if (err != 0) {
     ESP_LOGE(TAG, "Socket unable to set nonblocking mode: errno %d", err);
     this->mark_failed();
     return false;
   }

 #ifdef USE_SPEAKER
   if (this->speaker_ != nullptr) {
     struct sockaddr_storage server;

     socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), 6055);
     if (sl == 0) {
       ESP_LOGE(TAG, "Socket unable to set sockaddr: errno %d", errno);
       this->mark_failed();
       return false;
     }

     err = this->socket_->bind((struct sockaddr *) &server, sizeof(server));
     if (err != 0) {
       ESP_LOGE(TAG, "Socket unable to bind: errno %d", errno);
       this->mark_failed();
       return false;
     }
   }
 #endif
   this->udp_socket_running_ = true;
   return true;
 }

 bool VoiceAssistant::allocate_buffers_() {
   if (this->send_buffer_ != nullptr) {
     return true;  // Already allocated
   }

 #ifdef USE_SPEAKER
   if (this->speaker_ != nullptr) {
     ExternalRAMAllocator<uint8_t> speaker_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
     this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE);
     if (this->speaker_buffer_ == nullptr) {
       ESP_LOGW(TAG, "Could not allocate speaker buffer");
       return false;
     }
   }
 #endif

   ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
   this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE);
   if (this->input_buffer_ == nullptr) {
     ESP_LOGW(TAG, "Could not allocate input buffer");
     return false;
   }

 #ifdef USE_ESP_ADF
   this->vad_instance_ = vad_create(VAD_MODE_4);
 #endif

   this->ring_buffer_ = RingBuffer::create(BUFFER_SIZE * sizeof(int16_t));
   if (this->ring_buffer_ == nullptr) {
     ESP_LOGW(TAG, "Could not allocate ring buffer");
     return false;
   }

   ExternalRAMAllocator<uint8_t> send_allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
   this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE);
   if (send_buffer_ == nullptr) {
     ESP_LOGW(TAG, "Could not allocate send buffer");
     return false;
   }

   return true;
 }

 void VoiceAssistant::clear_buffers_() {
   if (this->send_buffer_ != nullptr) {
     memset(this->send_buffer_, 0, SEND_BUFFER_SIZE);
   }

   if (this->input_buffer_ != nullptr) {
     memset(this->input_buffer_, 0, INPUT_BUFFER_SIZE * sizeof(int16_t));
   }

   if (this->ring_buffer_ != nullptr) {
     this->ring_buffer_->reset();
   }

 #ifdef USE_SPEAKER
   if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
     memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);

     this->speaker_buffer_size_ = 0;
     this->speaker_buffer_index_ = 0;
     this->speaker_bytes_received_ = 0;
   }
 #endif
 }

 void VoiceAssistant::deallocate_buffers_() {
   ExternalRAMAllocator<uint8_t> send_deallocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
   send_deallocator.deallocate(this->send_buffer_, SEND_BUFFER_SIZE);
   this->send_buffer_ = nullptr;

   if (this->ring_buffer_ != nullptr) {
     this->ring_buffer_.reset();
     this->ring_buffer_ = nullptr;
   }

 #ifdef USE_ESP_ADF
   if (this->vad_instance_ != nullptr) {
     vad_destroy(this->vad_instance_);
     this->vad_instance_ = nullptr;
   }
 #endif

   ExternalRAMAllocator<int16_t> input_deallocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
   input_deallocator.deallocate(this->input_buffer_, INPUT_BUFFER_SIZE);
   this->input_buffer_ = nullptr;

 #ifdef USE_SPEAKER
   if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
     ExternalRAMAllocator<uint8_t> speaker_deallocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
     speaker_deallocator.deallocate(this->speaker_buffer_, SPEAKER_BUFFER_SIZE);
     this->speaker_buffer_ = nullptr;
   }
 #endif
 }

 void VoiceAssistant::reset_conversation_id() {
   this->conversation_id_ = "";
   ESP_LOGD(TAG, "reset conversation ID");
 }

 int VoiceAssistant::read_microphone_() {
   size_t bytes_read = 0;
   if (this->mic_->is_running()) {  // Read audio into input buffer
     bytes_read = this->mic_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
     if (bytes_read == 0) {
       memset(this->input_buffer_, 0, INPUT_BUFFER_SIZE * sizeof(int16_t));
       return 0;
     }
     // Write audio into ring buffer
     this->ring_buffer_->write((void *) this->input_buffer_, bytes_read);
   } else {
     ESP_LOGD(TAG, "microphone not running");
   }
   return bytes_read;
 }

 void VoiceAssistant::loop() {
   if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
       this->state_ != State::STOPPING_MICROPHONE) {
     if (this->mic_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
       this->set_state_(State::STOP_MICROPHONE, State::IDLE);
     } else {
       this->set_state_(State::IDLE, State::IDLE);
     }
     this->continuous_ = false;
     this->signal_stop_();
     this->clear_buffers_();
     return;
   }
   switch (this->state_) {
     case State::IDLE: {
       if (this->continuous_ && this->desired_state_ == State::IDLE) {
         this->idle_trigger_->trigger();
 #ifdef USE_ESP_ADF
         if (this->use_wake_word_) {
           this->set_state_(State::START_MICROPHONE, State::WAIT_FOR_VAD);
         } else
 #endif
         {
           this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);
         }
       } else {
         this->high_freq_.stop();
       }
       break;
     }
     case State::START_MICROPHONE: {
       ESP_LOGD(TAG, "Starting Microphone");
       if (!this->allocate_buffers_()) {
         this->status_set_error("Failed to allocate buffers");
         return;
       }
       if (this->status_has_error()) {
         this->status_clear_error();
       }
       this->clear_buffers_();

       this->mic_->start();
       this->high_freq_.start();
       this->set_state_(State::STARTING_MICROPHONE);
       break;
     }
     case State::STARTING_MICROPHONE: {
       if (this->mic_->is_running()) {
         this->set_state_(this->desired_state_);
       }
       break;
     }
 #ifdef USE_ESP_ADF
     case State::WAIT_FOR_VAD: {
       this->read_microphone_();
       ESP_LOGD(TAG, "Waiting for speech...");
       this->set_state_(State::WAITING_FOR_VAD);
       break;
     }
     case State::WAITING_FOR_VAD: {
       size_t bytes_read = this->read_microphone_();
       if (bytes_read > 0) {
         vad_state_t vad_state =
             vad_process(this->vad_instance_, this->input_buffer_, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS);
         if (vad_state == VAD_SPEECH) {
           if (this->vad_counter_ < this->vad_threshold_) {
             this->vad_counter_++;
           } else {
             ESP_LOGD(TAG, "VAD detected speech");
             this->set_state_(State::START_PIPELINE, State::STREAMING_MICROPHONE);

             // Reset for next time
             this->vad_counter_ = 0;
           }
         } else {
           if (this->vad_counter_ > 0) {
             this->vad_counter_--;
           }
         }
       }
       break;
     }
 #endif
     case State::START_PIPELINE: {
       this->read_microphone_();
       ESP_LOGD(TAG, "Requesting start...");
       uint32_t flags = 0;
       if (this->use_wake_word_)
         flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD;
       if (this->silence_detection_)
         flags |= api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD;
       api::VoiceAssistantAudioSettings audio_settings;
       audio_settings.noise_suppression_level = this->noise_suppression_level_;
       audio_settings.auto_gain = this->auto_gain_;
       audio_settings.volume_multiplier = this->volume_multiplier_;

       api::VoiceAssistantRequest msg;
       msg.start = true;
       msg.conversation_id = this->conversation_id_;
       msg.flags = flags;
       msg.audio_settings = audio_settings;
       msg.wake_word_phrase = this->wake_word_;
       this->wake_word_ = "";

       if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) {
         ESP_LOGW(TAG, "Could not request start");
         this->error_trigger_->trigger("not-connected", "Could not request start");
         this->continuous_ = false;
         this->set_state_(State::IDLE, State::IDLE);
         break;
       }
       this->set_state_(State::STARTING_PIPELINE);
       this->set_timeout("reset-conversation_id", this->conversation_timeout_,
                         [this]() { this->reset_conversation_id(); });
       break;
     }
     case State::STARTING_PIPELINE: {
       this->read_microphone_();
       break;  // State changed when udp server port received
     }
     case State::STREAMING_MICROPHONE: {
       this->read_microphone_();
       size_t available = this->ring_buffer_->available();
       while (available >= SEND_BUFFER_SIZE) {
         size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
         if (this->audio_mode_ == AUDIO_MODE_API) {
           api::VoiceAssistantAudio msg;
           msg.data.assign((char *) this->send_buffer_, read_bytes);
           this->api_client_->send_voice_assistant_audio(msg);
         } else {
           if (!this->udp_socket_running_) {
             if (!this->start_udp_socket_()) {
               this->set_state_(State::STOP_MICROPHONE, State::IDLE);
               break;
             }
           }
           this->socket_->sendto(this->send_buffer_, read_bytes, 0, (struct sockaddr *) &this->dest_addr_,
                                 sizeof(this->dest_addr_));
         }
         available = this->ring_buffer_->available();
       }

       break;
     }
     case State::STOP_MICROPHONE: {
       if (this->mic_->is_running()) {
         this->mic_->stop();
         this->set_state_(State::STOPPING_MICROPHONE);
       } else {
         this->set_state_(this->desired_state_);
       }
       break;
     }
     case State::STOPPING_MICROPHONE: {
       if (this->mic_->is_stopped()) {
         this->set_state_(this->desired_state_);
       }
       break;
     }
     case State::AWAITING_RESPONSE: {
       break;  // State changed by events
     }
     case State::STREAMING_RESPONSE: {
       bool playing = false;
 #ifdef USE_SPEAKER
       if (this->speaker_ != nullptr) {
         ssize_t received_len = 0;
         if (this->audio_mode_ == AUDIO_MODE_UDP) {
           if (this->speaker_buffer_index_ + RECEIVE_SIZE < SPEAKER_BUFFER_SIZE) {
             received_len = this->socket_->read(this->speaker_buffer_ + this->speaker_buffer_index_, RECEIVE_SIZE);
             if (received_len > 0) {
               this->speaker_buffer_index_ += received_len;
               this->speaker_buffer_size_ += received_len;
               this->speaker_bytes_received_ += received_len;
             }
           } else {
             ESP_LOGD(TAG, "Receive buffer full");
           }
         }
         // Build a small buffer of audio before sending to the speaker
         bool end_of_stream = this->stream_ended_ && (this->audio_mode_ == AUDIO_MODE_API || received_len < 0);
         if (this->speaker_bytes_received_ > RECEIVE_SIZE * 4 || end_of_stream)
           this->write_speaker_();
         if (this->wait_for_stream_end_) {
           this->cancel_timeout("playing");
           if (end_of_stream) {
             ESP_LOGD(TAG, "End of audio stream received");
             this->cancel_timeout("speaker-timeout");
             this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED);
           }
           break;  // We dont want to timeout here as the STREAM_END event will take care of that.
         }
         playing = this->speaker_->is_running();
       }
 #endif
 #ifdef USE_MEDIA_PLAYER
       if (this->media_player_ != nullptr) {
         playing = (this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING);
       }
 #endif
       if (playing) {
         this->start_playback_timeout_();
       }
       break;
     }
     case State::RESPONSE_FINISHED: {
 #ifdef USE_SPEAKER
       if (this->speaker_ != nullptr) {
         if (this->speaker_buffer_size_ > 0) {
           this->write_speaker_();
           break;
         }
         if (this->speaker_->has_buffered_data() || this->speaker_->is_running()) {
           break;
         }
         ESP_LOGD(TAG, "Speaker has finished outputting all audio");
         this->speaker_->stop();
         this->cancel_timeout("speaker-timeout");
         this->cancel_timeout("playing");

         this->clear_buffers_();

         this->wait_for_stream_end_ = false;
         this->stream_ended_ = false;

         this->tts_stream_end_trigger_->trigger();
       }
 #endif
       this->set_state_(State::IDLE, State::IDLE);
       break;
     }
     default:
       break;
   }
 }

 #ifdef USE_SPEAKER
 void VoiceAssistant::write_speaker_() {
   if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
     if (this->speaker_buffer_size_ > 0) {
       size_t write_chunk = std::min<size_t>(this->speaker_buffer_size_, 4 * 1024);
       size_t written = this->speaker_->play(this->speaker_buffer_, write_chunk);
       if (written > 0) {
         memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written);
         this->speaker_buffer_size_ -= written;
         this->speaker_buffer_index_ -= written;
         this->set_timeout("speaker-timeout", 5000, [this]() { this->speaker_->stop(); });
       } else {
         ESP_LOGV(TAG, "Speaker buffer full, trying again next loop");
       }
     }
   }
 }
 #endif

 void VoiceAssistant::client_subscription(api::APIConnection *client, bool subscribe) {
   if (!subscribe) {
     if (this->api_client_ == nullptr || client != this->api_client_) {
       ESP_LOGE(TAG, "Client attempting to unsubscribe that is not the current API Client");
       return;
     }
     this->api_client_ = nullptr;
     this->client_disconnected_trigger_->trigger();
     return;
   }

   if (this->api_client_ != nullptr) {
     ESP_LOGE(TAG, "Multiple API Clients attempting to connect to Voice Assistant");
     ESP_LOGE(TAG, "Current client: %s", this->api_client_->get_client_combined_info().c_str());
     ESP_LOGE(TAG, "New client: %s", client->get_client_combined_info().c_str());
     return;
   }

   this->api_client_ = client;
   this->client_connected_trigger_->trigger();
 }

 static const LogString *voice_assistant_state_to_string(State state) {
   switch (state) {
     case State::IDLE:
       return LOG_STR("IDLE");
     case State::START_MICROPHONE:
       return LOG_STR("START_MICROPHONE");
     case State::STARTING_MICROPHONE:
       return LOG_STR("STARTING_MICROPHONE");
     case State::WAIT_FOR_VAD:
       return LOG_STR("WAIT_FOR_VAD");
     case State::WAITING_FOR_VAD:
       return LOG_STR("WAITING_FOR_VAD");
     case State::START_PIPELINE:
       return LOG_STR("START_PIPELINE");
     case State::STARTING_PIPELINE:
       return LOG_STR("STARTING_PIPELINE");
     case State::STREAMING_MICROPHONE:
       return LOG_STR("STREAMING_MICROPHONE");
     case State::STOP_MICROPHONE:
       return LOG_STR("STOP_MICROPHONE");
     case State::STOPPING_MICROPHONE:
       return LOG_STR("STOPPING_MICROPHONE");
     case State::AWAITING_RESPONSE:
       return LOG_STR("AWAITING_RESPONSE");
     case State::STREAMING_RESPONSE:
       return LOG_STR("STREAMING_RESPONSE");
     case State::RESPONSE_FINISHED:
       return LOG_STR("RESPONSE_FINISHED");
     default:
       return LOG_STR("UNKNOWN");
   }
 };

 void VoiceAssistant::set_state_(State state) {
   State old_state = this->state_;
   this->state_ = state;
   ESP_LOGD(TAG, "State changed from %s to %s", LOG_STR_ARG(voice_assistant_state_to_string(old_state)),
            LOG_STR_ARG(voice_assistant_state_to_string(state)));
 }

 void VoiceAssistant::set_state_(State state, State desired_state) {
   this->set_state_(state);
   this->desired_state_ = desired_state;
   ESP_LOGD(TAG, "Desired state set to %s", LOG_STR_ARG(voice_assistant_state_to_string(desired_state)));
 }

 void VoiceAssistant::failed_to_start() {
   ESP_LOGE(TAG, "Failed to start server. See Home Assistant logs for more details.");
   this->error_trigger_->trigger("failed-to-start", "Failed to start server. See Home Assistant logs for more details.");
   this->set_state_(State::STOP_MICROPHONE, State::IDLE);
 }

 void VoiceAssistant::start_streaming() {
   if (this->state_ != State::STARTING_PIPELINE) {
     this->signal_stop_();
     return;
   }

   ESP_LOGD(TAG, "Client started, streaming microphone");
   this->audio_mode_ = AUDIO_MODE_API;

   if (this->mic_->is_running()) {
     this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
   } else {
     this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
   }
 }

 void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t port) {
   if (this->state_ != State::STARTING_PIPELINE) {
     this->signal_stop_();
     return;
   }

   ESP_LOGD(TAG, "Client started, streaming microphone");
   this->audio_mode_ = AUDIO_MODE_UDP;

   memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));
   if (this->dest_addr_.ss_family == AF_INET) {
     ((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);
   }
 #if LWIP_IPV6
   else if (this->dest_addr_.ss_family == AF_INET6) {
     ((struct sockaddr_in6 *) &this->dest_addr_)->sin6_port = htons(port);
   }
 #endif
   else {
     ESP_LOGW(TAG, "Unknown address family: %d", this->dest_addr_.ss_family);
     return;
   }

   if (this->mic_->is_running()) {
     this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
   } else {
     this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
   }
 }

 void VoiceAssistant::request_start(bool continuous, bool silence_detection) {
   if (this->api_client_ == nullptr) {
     ESP_LOGE(TAG, "No API client connected");
     this->set_state_(State::IDLE, State::IDLE);
     this->continuous_ = false;
     return;
   }
   if (this->state_ == State::IDLE) {
     this->continuous_ = continuous;
     this->silence_detection_ = silence_detection;
 #ifdef USE_ESP_ADF
     if (this->use_wake_word_) {
       this->set_state_(State::START_MICROPHONE, State::WAIT_FOR_VAD);
     } else
 #endif
     {
       this->set_state_(State::START_MICROPHONE, State::START_PIPELINE);
     }
   }
 }

 void VoiceAssistant::request_stop() {
   this->continuous_ = false;

   switch (this->state_) {
     case State::IDLE:
       break;
     case State::START_MICROPHONE:
     case State::STARTING_MICROPHONE:
     case State::WAIT_FOR_VAD:
     case State::WAITING_FOR_VAD:
     case State::START_PIPELINE:
       this->set_state_(State::STOP_MICROPHONE, State::IDLE);
       break;
     case State::STARTING_PIPELINE:
     case State::STREAMING_MICROPHONE:
       this->signal_stop_();
       this->set_state_(State::STOP_MICROPHONE, State::IDLE);
       break;
     case State::STOP_MICROPHONE:
     case State::STOPPING_MICROPHONE:
       this->desired_state_ = State::IDLE;
       break;
     case State::AWAITING_RESPONSE:
       this->signal_stop_();
       break;
     case State::STREAMING_RESPONSE:
     case State::RESPONSE_FINISHED:
       break;  // Let the incoming audio stream finish then it will go to idle.
   }
 }

 void VoiceAssistant::signal_stop_() {
   memset(&this->dest_addr_, 0, sizeof(this->dest_addr_));
   if (this->api_client_ == nullptr) {
     return;
   }
   ESP_LOGD(TAG, "Signaling stop...");
   api::VoiceAssistantRequest msg;
   msg.start = false;
   this->api_client_->send_voice_assistant_request(msg);
 }

 void VoiceAssistant::start_playback_timeout_() {
   this->set_timeout("playing", 100, [this]() {
     this->cancel_timeout("speaker-timeout");
     this->set_state_(State::IDLE, State::IDLE);

     api::VoiceAssistantAnnounceFinished msg;
     msg.success = true;
     this->api_client_->send_voice_assistant_announce_finished(msg);
   });
 }

 void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
   ESP_LOGD(TAG, "Event Type: %" PRId32, msg.event_type);
   switch (msg.event_type) {
     case api::enums::VOICE_ASSISTANT_RUN_START:
       ESP_LOGD(TAG, "Assist Pipeline running");
       this->defer([this]() { this->start_trigger_->trigger(); });
       break;
     case api::enums::VOICE_ASSISTANT_WAKE_WORD_START:
       break;
     case api::enums::VOICE_ASSISTANT_WAKE_WORD_END: {
       ESP_LOGD(TAG, "Wake word detected");
       this->defer([this]() { this->wake_word_detected_trigger_->trigger(); });
       break;
     }
     case api::enums::VOICE_ASSISTANT_STT_START:
       ESP_LOGD(TAG, "STT started");
       this->defer([this]() { this->listening_trigger_->trigger(); });
       break;
     case api::enums::VOICE_ASSISTANT_STT_END: {
       std::string text;
       for (auto arg : msg.data) {
         if (arg.name == "text") {
           text = std::move(arg.value);
         }
       }
       if (text.empty()) {
         ESP_LOGW(TAG, "No text in STT_END event");
         return;
       }
       ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());
       this->defer([this, text]() { this->stt_end_trigger_->trigger(text); });
       break;
     }
     case api::enums::VOICE_ASSISTANT_INTENT_START:
       ESP_LOGD(TAG, "Intent started");
       this->defer([this]() { this->intent_start_trigger_->trigger(); });
       break;
     case api::enums::VOICE_ASSISTANT_INTENT_END: {
       for (auto arg : msg.data) {
         if (arg.name == "conversation_id") {
           this->conversation_id_ = std::move(arg.value);
         }
       }
       this->defer([this]() { this->intent_end_trigger_->trigger(); });
       break;
     }
     case api::enums::VOICE_ASSISTANT_TTS_START: {
       std::string text;
       for (auto arg : msg.data) {
         if (arg.name == "text") {
           text = std::move(arg.value);
         }
       }
       if (text.empty()) {
         ESP_LOGW(TAG, "No text in TTS_START event");
         return;
       }
       ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());
       this->defer([this, text]() {
         this->tts_start_trigger_->trigger(text);
 #ifdef USE_SPEAKER
         if (this->speaker_ != nullptr) {
           this->speaker_->start();
         }
 #endif
       });
       break;
     }
     case api::enums::VOICE_ASSISTANT_TTS_END: {
       std::string url;
       for (auto arg : msg.data) {
         if (arg.name == "url") {
           url = std::move(arg.value);
         }
       }
       if (url.empty()) {
         ESP_LOGW(TAG, "No url in TTS_END event");
         return;
       }
       ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());
       this->defer([this, url]() {
 #ifdef USE_MEDIA_PLAYER
         if (this->media_player_ != nullptr) {
           this->media_player_->make_call().set_media_url(url).set_announcement(true).perform();
           // Start the playback timeout, as the media player state isn't immediately updated
           this->start_playback_timeout_();
         }
 #endif
         this->tts_end_trigger_->trigger(url);
       });
       State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE;
       this->set_state_(new_state, new_state);
       break;
     }
     case api::enums::VOICE_ASSISTANT_RUN_END: {
       ESP_LOGD(TAG, "Assist Pipeline ended");
       if ((this->state_ == State::STARTING_PIPELINE) || (this->state_ == State::AWAITING_RESPONSE)) {
         // Pipeline ended before starting microphone
         // Or there wasn't a TTS start event ("nevermind")
         this->set_state_(State::IDLE, State::IDLE);
       } else if (this->state_ == State::STREAMING_MICROPHONE) {
         this->ring_buffer_->reset();
 #ifdef USE_ESP_ADF
         if (this->use_wake_word_) {
           // No need to stop the microphone since we didn't use the speaker
           this->set_state_(State::WAIT_FOR_VAD, State::WAITING_FOR_VAD);
         } else
 #endif
         {
           this->set_state_(State::IDLE, State::IDLE);
         }
       }
       this->defer([this]() { this->end_trigger_->trigger(); });
       break;
     }
     case api::enums::VOICE_ASSISTANT_ERROR: {
       std::string code = "";
       std::string message = "";
       for (auto arg : msg.data) {
         if (arg.name == "code") {
           code = std::move(arg.value);
         } else if (arg.name == "message") {
           message = std::move(arg.value);
         }
       }
       if (code == "wake-word-timeout" || code == "wake_word_detection_aborted" || code == "no_wake_word") {
         // Don't change state here since either the "tts-end" or "run-end" events will do it.
         return;
       } else if (code == "wake-provider-missing" || code == "wake-engine-missing") {
         // Wake word is not set up or not ready on Home Assistant so stop and do not retry until user starts again.
         this->defer([this, code, message]() {
           this->request_stop();
           this->error_trigger_->trigger(code, message);
         });
         return;
       }
       ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());
       if (this->state_ != State::IDLE) {
         this->signal_stop_();
         this->set_state_(State::STOP_MICROPHONE, State::IDLE);
       }
       this->defer([this, code, message]() { this->error_trigger_->trigger(code, message); });
       break;
     }
     case api::enums::VOICE_ASSISTANT_TTS_STREAM_START: {
 #ifdef USE_SPEAKER
       if (this->speaker_ != nullptr) {
         this->wait_for_stream_end_ = true;
         ESP_LOGD(TAG, "TTS stream start");
         this->defer([this] { this->tts_stream_start_trigger_->trigger(); });
       }
 #endif
       break;
     }
     case api::enums::VOICE_ASSISTANT_TTS_STREAM_END: {
 #ifdef USE_SPEAKER
       if (this->speaker_ != nullptr) {
         this->stream_ended_ = true;
         ESP_LOGD(TAG, "TTS stream end");
       }
 #endif
       break;
     }
     case api::enums::VOICE_ASSISTANT_STT_VAD_START:
       ESP_LOGD(TAG, "Starting STT by VAD");
       this->defer([this]() { this->stt_vad_start_trigger_->trigger(); });
       break;
     case api::enums::VOICE_ASSISTANT_STT_VAD_END:
       ESP_LOGD(TAG, "STT by VAD end");
       this->set_state_(State::STOP_MICROPHONE, State::AWAITING_RESPONSE);
       this->defer([this]() { this->stt_vad_end_trigger_->trigger(); });
       break;
     default:
       ESP_LOGD(TAG, "Unhandled event type: %" PRId32, msg.event_type);
       break;
   }
 }

 void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) {
 #ifdef USE_SPEAKER  // We should never get to this function if there is no speaker anyway
   if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
     if (this->speaker_buffer_index_ + msg.data.length() < SPEAKER_BUFFER_SIZE) {
       memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data.data(), msg.data.length());
       this->speaker_buffer_index_ += msg.data.length();
       this->speaker_buffer_size_ += msg.data.length();
       this->speaker_bytes_received_ += msg.data.length();
       ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data.length());
     } else {
       ESP_LOGE(TAG, "Cannot receive audio, buffer is full");
     }
   }
 #endif
 }

 void VoiceAssistant::on_timer_event(const api::VoiceAssistantTimerEventResponse &msg) {
   Timer timer = {
       .id = msg.timer_id,
       .name = msg.name,
       .total_seconds = msg.total_seconds,
       .seconds_left = msg.seconds_left,
       .is_active = msg.is_active,
   };
   this->timers_[timer.id] = timer;
   ESP_LOGD(TAG, "Timer Event");
   ESP_LOGD(TAG, "  Type: %" PRId32, msg.event_type);
   ESP_LOGD(TAG, "  %s", timer.to_string().c_str());

   switch (msg.event_type) {
     case api::enums::VOICE_ASSISTANT_TIMER_STARTED:
       this->timer_started_trigger_->trigger(timer);
       break;
     case api::enums::VOICE_ASSISTANT_TIMER_UPDATED:
       this->timer_updated_trigger_->trigger(timer);
       break;
     case api::enums::VOICE_ASSISTANT_TIMER_CANCELLED:
       this->timer_cancelled_trigger_->trigger(timer);
       this->timers_.erase(timer.id);
       break;
     case api::enums::VOICE_ASSISTANT_TIMER_FINISHED:
       this->timer_finished_trigger_->trigger(timer);
       this->timers_.erase(timer.id);
       break;
   }

   if (this->timers_.empty()) {
     this->cancel_interval("timer-event");
     this->timer_tick_running_ = false;
   } else if (!this->timer_tick_running_) {
     this->set_interval("timer-event", 1000, [this]() { this->timer_tick_(); });
     this->timer_tick_running_ = true;
   }
 }

 void VoiceAssistant::timer_tick_() {
   std::vector<Timer> res;
   res.reserve(this->timers_.size());
   for (auto &pair : this->timers_) {
     auto &timer = pair.second;
     if (timer.is_active && timer.seconds_left > 0) {
       timer.seconds_left--;
     }
     res.push_back(timer);
   }
   this->timer_tick_trigger_->trigger(res);
 }

 void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) {
 #ifdef USE_MEDIA_PLAYER
   if (this->media_player_ != nullptr) {
     this->tts_start_trigger_->trigger(msg.text);
     this->media_player_->make_call().set_media_url(msg.media_id).set_announcement(true).perform();
     this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE);
     this->tts_end_trigger_->trigger(msg.media_id);
     this->end_trigger_->trigger();
   }
 #endif
 }

 VoiceAssistant *global_voice_assistant = nullptr;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)

 }  // namespace voice_assistant
 }  // namespace esphome

 #endif  // USE_VOICE_ASSISTANT
esphome::voice_assistant::VoiceAssistant::tts_stream_start_trigger_
Trigger * tts_stream_start_trigger_
Definition: voice_assistant.h:232

esphome::api::VoiceAssistantAudioSettings
Definition: api_pb2.h:1733

esphome::api::VoiceAssistantRequest::wake_word_phrase
std::string wake_word_phrase
Definition: api_pb2.h:1753

esphome::voice_assistant::State::RESPONSE_FINISHED

esphome::speaker::Speaker::is_running
bool is_running() const
Definition: speaker.h:66

esphome::voice_assistant::VoiceAssistant::timer_cancelled_trigger_
Trigger< Timer > * timer_cancelled_trigger_
Definition: voice_assistant.h:252

esphome::voice_assistant::VoiceAssistant::wait_for_stream_end_
bool wait_for_stream_end_
Definition: voice_assistant.h:265

esphome::voice_assistant::VoiceAssistant::local_output_
bool local_output_
Definition: voice_assistant.h:272

esphome::Component::set_interval
void set_interval(const std::string &name, uint32_t interval, std::function< void()> &&f)
Set an interval function with a unique name.
Definition: component.cpp:52

esphome::api::VoiceAssistantTimerEventResponse::seconds_left
uint32_t seconds_left
Definition: api_pb2.h:1819

esphome::setup_priority::AFTER_CONNECTION
const float AFTER_CONNECTION
For components that should be initialized after a data connection (API/MQTT) is connected.
Definition: component.cpp:27

esphome::voice_assistant::VoiceAssistant::VoiceAssistant
VoiceAssistant()
Definition: voice_assistant.cpp:27

esphome::voice_assistant::VoiceAssistant::audio_mode_
AudioMode audio_mode_
Definition: voice_assistant.h:302

esphome::api::VoiceAssistantEventResponse
Definition: api_pb2.h:1787

esphome::voice_assistant::VoiceAssistant::timers_
std::unordered_map< std::string, Timer > timers_
Definition: voice_assistant.h:247

esphome::Component::cancel_timeout
bool cancel_timeout(const std::string &name)
Cancel a timeout function.
Definition: component.cpp:73

esphome::api::VoiceAssistantTimerEventResponse::event_type
enums::VoiceAssistantTimerEvent event_type
Definition: api_pb2.h:1815

esphome::voice_assistant::VoiceAssistant::deallocate_buffers_
void deallocate_buffers_()
Definition: voice_assistant.cpp:141

esphome::voice_assistant::VoiceAssistant::intent_end_trigger_
Trigger * intent_end_trigger_
Definition: voice_assistant.h:224

esphome::voice_assistant::State::AWAITING_RESPONSE

esphome::voice_assistant::VoiceAssistant::high_freq_
HighFrequencyLoopRequester high_freq_
Definition: voice_assistant.h:278

esphome::voice_assistant::global_voice_assistant
VoiceAssistant * global_voice_assistant
Definition: voice_assistant.cpp:899

esphome::voice_assistant::VoiceAssistant::conversation_id_
std::string conversation_id_
Definition: voice_assistant.h:274

esphome::socket::set_sockaddr_any
socklen_t set_sockaddr_any(struct sockaddr *addr, socklen_t addrlen, uint16_t port)
Set a sockaddr to the any address and specified port for the IP version used by socket_ip().
Definition: socket.cpp:51

esphome::voice_assistant::VoiceAssistant::wake_word_
std::string wake_word_
Definition: voice_assistant.h:276

esphome::api::VoiceAssistantRequest::conversation_id
std::string conversation_id
Definition: api_pb2.h:1750

esphome::voice_assistant::VoiceAssistant::api_client_
api::APIConnection * api_client_
Definition: voice_assistant.h:245

esphome::voice_assistant::VoiceAssistant::desired_state_
State desired_state_
Definition: voice_assistant.h:300

esphome::voice_assistant::VoiceAssistant::send_buffer_
uint8_t * send_buffer_
Definition: voice_assistant.h:293

esphome::voice_assistant::VoiceAssistant::timer_finished_trigger_
Trigger< Timer > * timer_finished_trigger_
Definition: voice_assistant.h:250

esphome::voice_assistant::VoiceAssistant::get_setup_priority
float get_setup_priority() const override
Definition: voice_assistant.cpp:29

esphome::voice_assistant::VoiceAssistant::vad_instance_
vad_handle_t vad_instance_
Definition: voice_assistant.h:281

esphome::api::enums::VOICE_ASSISTANT_INTENT_START
Definition: api_pb2.h:187

esphome::voice_assistant::State::STREAMING_RESPONSE

esphome::api::enums::VOICE_ASSISTANT_TIMER_FINISHED
Definition: api_pb2.h:202

esphome::api::VoiceAssistantAudioSettings::auto_gain
uint32_t auto_gain
Definition: api_pb2.h:1736

esphome::voice_assistant::VoiceAssistant::socket_
std::unique_ptr< socket::Socket > socket_
Definition: voice_assistant.h:221

esphome::api::VoiceAssistantRequest::flags
uint32_t flags
Definition: api_pb2.h:1751

esphome::api::enums::VOICE_ASSISTANT_TIMER_STARTED
Definition: api_pb2.h:199

esphome::api::VoiceAssistantAudioSettings::noise_suppression_level
uint32_t noise_suppression_level
Definition: api_pb2.h:1735

esphome::voice_assistant::State::WAIT_FOR_VAD

sockaddr_storage::ss_family
sa_family_t ss_family
Definition: headers.h:92

esphome::voice_assistant::VoiceAssistant::tts_start_trigger_
Trigger< std::string > * tts_start_trigger_
Definition: voice_assistant.h:238

esphome::Component::set_timeout
void set_timeout(const std::string &name, uint32_t timeout, std::function< void()> &&f)
Set a timeout function with a unique name.
Definition: component.cpp:69

esphome::Component::cancel_interval
bool cancel_interval(const std::string &name)
Cancel an interval function.
Definition: component.cpp:56

esphome::api::enums::VOICE_ASSISTANT_RUN_START
Definition: api_pb2.h:183

esphome::api::enums::VOICE_ASSISTANT_TIMER_CANCELLED
Definition: api_pb2.h:201

esphome::voice_assistant::State
State
Definition: voice_assistant.h:46

esphome::Component::defer
void defer(const std::string &name, std::function< void()> &&f)
Defer a callback to the next loop() call.
Definition: component.cpp:130

esphome::voice_assistant::State::STOPPING_MICROPHONE

sockaddr_in
Definition: headers.h:61

esphome::voice_assistant::VoiceAssistant::tts_end_trigger_
Trigger< std::string > * tts_end_trigger_
Definition: voice_assistant.h:237

esphome::RAMAllocator::allocate
T * allocate(size_t n)
Definition: helpers.h:703

socklen_t
uint32_t socklen_t
Definition: headers.h:97

esphome::api::VoiceAssistantRequest::audio_settings
VoiceAssistantAudioSettings audio_settings
Definition: api_pb2.h:1752

esphome::voice_assistant::VoiceAssistant::timer_tick_running_
bool timer_tick_running_
Definition: voice_assistant.h:255

esphome::voice_assistant::VoiceAssistant::mic_
microphone::Microphone * mic_
Definition: voice_assistant.h:257

esphome::voice_assistant::VoiceAssistant::timer_tick_trigger_
Trigger< std::vector< Timer > > * timer_tick_trigger_
Definition: voice_assistant.h:253

esphome::api::VoiceAssistantEventResponse::event_type
enums::VoiceAssistantEvent event_type
Definition: api_pb2.h:1789

esphome::voice_assistant::AUDIO_MODE_API
Definition: voice_assistant.h:64

esphome::api::enums::VOICE_ASSISTANT_REQUEST_USE_VAD
Definition: api_pb2.h:178

esphome::voice_assistant::VoiceAssistant::client_subscription
void client_subscription(api::APIConnection *client, bool subscribe)
Definition: voice_assistant.cpp:447

esphome::speaker::Speaker::has_buffered_data
virtual bool has_buffered_data() const =0

esphome::api::VoiceAssistantEventResponse::data
std::vector< VoiceAssistantEventData > data
Definition: api_pb2.h:1790

esphome::api::APIConnection::get_client_combined_info
std::string get_client_combined_info() const
Definition: api_connection.h:227

esphome::voice_assistant::VoiceAssistant::conversation_timeout_
uint32_t conversation_timeout_
Definition: voice_assistant.h:291

esphome::voice_assistant::VoiceAssistant::speaker_bytes_received_
size_t speaker_bytes_received_
Definition: voice_assistant.h:264

esphome::media_player::MediaPlayer::state
MediaPlayerState state
Definition: media_player.h:95

esphome::voice_assistant::VoiceAssistant::udp_socket_running_
bool udp_socket_running_
Definition: voice_assistant.h:303

esphome::microphone::Microphone::is_running
bool is_running() const
Definition: microphone.h:28

esphome::voice_assistant::VoiceAssistant::speaker_buffer_
uint8_t * speaker_buffer_
Definition: voice_assistant.h:261

esphome::voice_assistant::State::STREAMING_MICROPHONE

esphome::voice_assistant::VoiceAssistant::noise_suppression_level_
uint8_t noise_suppression_level_
Definition: voice_assistant.h:288

esphome::voice_assistant::VoiceAssistant::use_wake_word_
bool use_wake_word_
Definition: voice_assistant.h:287

esphome::Trigger::trigger
void trigger(Ts... x)
Inform the parent automation that the event has triggered.
Definition: automation.h:95

esphome::voice_assistant::VoiceAssistant
Definition: voice_assistant.h:93

esphome::api::VoiceAssistantTimerEventResponse::timer_id
std::string timer_id
Definition: api_pb2.h:1816

esphome::voice_assistant::VoiceAssistant::start_trigger_
Trigger * start_trigger_
Definition: voice_assistant.h:228

esphome::api::enums::VOICE_ASSISTANT_STT_VAD_START
Definition: api_pb2.h:193

esphome::api::VoiceAssistantAnnounceFinished::success
bool success
Definition: api_pb2.h:1844

esphome::voice_assistant::State::WAITING_FOR_VAD

esphome::voice_assistant::VoiceAssistant::clear_buffers_
void clear_buffers_()
Definition: voice_assistant.cpp:117

esphome::Component::status_has_error
bool status_has_error() const
Definition: component.cpp:150

esphome::voice_assistant::VoiceAssistant::auto_gain_
uint8_t auto_gain_
Definition: voice_assistant.h:289

esphome::voice_assistant::VoiceAssistant::timer_started_trigger_
Trigger< Timer > * timer_started_trigger_
Definition: voice_assistant.h:249

esphome::voice_assistant::VoiceAssistant::listening_trigger_
Trigger * listening_trigger_
Definition: voice_assistant.h:226

esphome::api::enums::VOICE_ASSISTANT_STT_START
Definition: api_pb2.h:185

esphome::api::VoiceAssistantAnnounceRequest::text
std::string text
Definition: api_pb2.h:1833

esphome::api::VoiceAssistantAnnounceRequest::media_id
std::string media_id
Definition: api_pb2.h:1832

esphome::voice_assistant::VoiceAssistant::write_speaker_
void write_speaker_()
Definition: voice_assistant.cpp:429

esphome::api::enums::VOICE_ASSISTANT_TTS_START
Definition: api_pb2.h:189

esphome::Component::status_set_error
void status_set_error(const char *message="unspecified")
Definition: component.cpp:159

esphome::voice_assistant::VoiceAssistant::media_player_
media_player::MediaPlayer * media_player_
Definition: voice_assistant.h:269

esphome::voice_assistant::VoiceAssistant::input_buffer_
int16_t * input_buffer_
Definition: voice_assistant.h:294

esphome::HighFrequencyLoopRequester::start
void start()
Start running the loop continuously.
Definition: helpers.cpp:674

esphome::voice_assistant::VoiceAssistant::client_connected_trigger_
Trigger * client_connected_trigger_
Definition: voice_assistant.h:242

defines.h

voice_assistant.h

esphome::voice_assistant::AUDIO_MODE_UDP
Definition: voice_assistant.h:63

esphome::api::enums::VOICE_ASSISTANT_REQUEST_USE_WAKE_WORD
Definition: api_pb2.h:179

esphome::voice_assistant::VoiceAssistant::start_udp_socket_
bool start_udp_socket_()
Definition: voice_assistant.cpp:31

esphome::voice_assistant::State::STOP_MICROPHONE

sockaddr
Definition: headers.h:83

esphome::voice_assistant::VoiceAssistant::end_trigger_
Trigger * end_trigger_
Definition: voice_assistant.h:227

esphome::microphone::Microphone::start
virtual void start()=0

esphome::api::VoiceAssistantTimerEventResponse::is_active
bool is_active
Definition: api_pb2.h:1820

esphome::api::APIServerConnectionBase::send_voice_assistant_request
bool send_voice_assistant_request(const VoiceAssistantRequest &msg)
Definition: api_pb2_service.cpp:468

esphome::voice_assistant::VoiceAssistant::reset_conversation_id
void reset_conversation_id()
Definition: voice_assistant.cpp:171

esphome::api::enums::VOICE_ASSISTANT_INTENT_END
Definition: api_pb2.h:188

esphome::api::enums::VOICE_ASSISTANT_WAKE_WORD_START
Definition: api_pb2.h:191

esphome::api::VoiceAssistantAnnounceRequest
Definition: api_pb2.h:1830

esphome::voice_assistant::VoiceAssistant::speaker_
speaker::Speaker * speaker_
Definition: voice_assistant.h:260

esphome::voice_assistant::VoiceAssistant::failed_to_start
void failed_to_start()
Definition: voice_assistant.cpp:515

esphome::voice_assistant::Timer
Definition: voice_assistant.h:67

esphome::HighFrequencyLoopRequester::stop
void stop()
Stop running the loop continuously.
Definition: helpers.cpp:680

sockaddr_storage
Definition: headers.h:90

esphome::media_player::MediaPlayer::make_call
MediaPlayerCall make_call()
Definition: media_player.h:98

esphome::voice_assistant::VoiceAssistant::set_state_
void set_state_(State state)
Definition: voice_assistant.cpp:502

esphome::voice_assistant::VoiceAssistant::continuous_
bool continuous_
Definition: voice_assistant.h:296

esphome::voice_assistant::State::START_PIPELINE

esphome::voice_assistant::VoiceAssistant::allocate_buffers_
bool allocate_buffers_()
Definition: voice_assistant.cpp:74

esphome::voice_assistant::VoiceAssistant::read_microphone_
int read_microphone_()
Definition: voice_assistant.cpp:176

esphome::microphone::Microphone::stop
virtual void stop()=0

esphome::voice_assistant::VoiceAssistant::state_
State state_
Definition: voice_assistant.h:299

esphome::voice_assistant::VoiceAssistant::stt_vad_start_trigger_
Trigger * stt_vad_start_trigger_
Definition: voice_assistant.h:229

esphome::shelly_dimmer::flags
const uint32_t flags
Definition: stm32flash.h:85

esphome::api::APIServerConnectionBase::send_voice_assistant_announce_finished
bool send_voice_assistant_announce_finished(const VoiceAssistantAnnounceFinished &msg)
Definition: api_pb2_service.cpp:492

esphome::RAMAllocator::deallocate
void deallocate(T *p, size_t n)
Definition: helpers.h:720

esphome::voice_assistant::VoiceAssistant::volume_multiplier_
float volume_multiplier_
Definition: voice_assistant.h:290

esphome::voice_assistant::VoiceAssistant::ring_buffer_
std::unique_ptr< RingBuffer > ring_buffer_
Definition: voice_assistant.h:285

esphome::api::enums::VOICE_ASSISTANT_RUN_END
Definition: api_pb2.h:184

esphome::api::APIServerConnectionBase::send_voice_assistant_audio
bool send_voice_assistant_audio(const VoiceAssistantAudio &msg)
Definition: api_pb2_service.cpp:480

esphome::microphone::Microphone::is_stopped
bool is_stopped() const
Definition: microphone.h:29

esphome::api::enums::VOICE_ASSISTANT_TTS_END
Definition: api_pb2.h:190

esphome::voice_assistant::VoiceAssistant::on_audio
void on_audio(const api::VoiceAssistantAudio &msg)
Definition: voice_assistant.cpp:819

esphome::voice_assistant::VoiceAssistant::start_streaming
void start_streaming()
Definition: voice_assistant.cpp:521

esphome::voice_assistant::VoiceAssistant::client_disconnected_trigger_
Trigger * client_disconnected_trigger_
Definition: voice_assistant.h:243

esphome::Component::status_clear_error
void status_clear_error()
Definition: component.cpp:172

esphome::voice_assistant::VoiceAssistant::on_timer_event
void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg)
Definition: voice_assistant.cpp:835

esphome::media_player::MediaPlayerCall::set_announcement
MediaPlayerCall & set_announcement(bool announce)
Definition: media_player.cpp:128

esphome::voice_assistant::VoiceAssistant::stt_vad_end_trigger_
Trigger * stt_vad_end_trigger_
Definition: voice_assistant.h:230

esphome::voice_assistant::VoiceAssistant::dest_addr_
struct sockaddr_storage dest_addr_
Definition: voice_assistant.h:222

esphome::voice_assistant::VoiceAssistant::stream_ended_
bool stream_ended_
Definition: voice_assistant.h:266

esphome::api::VoiceAssistantTimerEventResponse
Definition: api_pb2.h:1813

esphome::microphone::Microphone::read
virtual size_t read(int16_t *buf, size_t len)=0

esphome::voice_assistant::VoiceAssistant::wake_word_detected_trigger_
Trigger * wake_word_detected_trigger_
Definition: voice_assistant.h:235

esphome::media_player::MediaPlayerCall::perform
void perform()
Definition: media_player.cpp:72

esphome::voice_assistant::VoiceAssistant::loop
void loop() override
Definition: voice_assistant.cpp:192

esphome::voice_assistant::VoiceAssistant::tts_stream_end_trigger_
Trigger * tts_stream_end_trigger_
Definition: voice_assistant.h:233

esphome::voice_assistant::VoiceAssistant::silence_detection_
bool silence_detection_
Definition: voice_assistant.h:297

esphome::speaker::Speaker::start
virtual void start()=0

esphome::api::VoiceAssistantRequest::start
bool start
Definition: api_pb2.h:1749

esphome::Component::mark_failed
virtual void mark_failed()
Mark this component as failed.
Definition: component.cpp:118

esphome::api::VoiceAssistantTimerEventResponse::name
std::string name
Definition: api_pb2.h:1817

esphome::api::VoiceAssistantRequest
Definition: api_pb2.h:1747

esphome::voice_assistant::VoiceAssistant::request_stop
void request_stop()
Definition: voice_assistant.cpp:588

esphome::voice_assistant::Timer::id
std::string id
Definition: voice_assistant.h:68

esphome::api::VoiceAssistantAnnounceFinished
Definition: api_pb2.h:1842

esphome::voice_assistant::VoiceAssistant::idle_trigger_
Trigger * idle_trigger_
Definition: voice_assistant.h:240

esphome
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7

esphome::media_player::MEDIA_PLAYER_STATE_ANNOUNCING
Definition: media_player.h:14

sockaddr_in6
Definition: headers.h:72

esphome::api::enums::VOICE_ASSISTANT_TTS_STREAM_START
Definition: api_pb2.h:195

esphome::voice_assistant::VoiceAssistant::vad_threshold_
uint8_t vad_threshold_
Definition: voice_assistant.h:282

esphome::voice_assistant::VoiceAssistant::speaker_buffer_size_
size_t speaker_buffer_size_
Definition: voice_assistant.h:263

esphome::voice_assistant::Timer::to_string
std::string to_string() const
Definition: voice_assistant.h:74

esphome::voice_assistant::VoiceAssistant::intent_start_trigger_
Trigger * intent_start_trigger_
Definition: voice_assistant.h:225

esphome::api::VoiceAssistantTimerEventResponse::total_seconds
uint32_t total_seconds
Definition: api_pb2.h:1818

esphome::api::enums::VOICE_ASSISTANT_TTS_STREAM_END
Definition: api_pb2.h:196

log.h

esphome::voice_assistant::VoiceAssistant::on_announce
void on_announce(const api::VoiceAssistantAnnounceRequest &msg)
Definition: voice_assistant.cpp:887

esphome::api::enums::VOICE_ASSISTANT_STT_VAD_END
Definition: api_pb2.h:194

esphome::speaker::Speaker::play
virtual size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait)
Plays the provided audio data.
Definition: speaker.h:38

esphome::RAMAllocator
An STL allocator that uses SPI or internal RAM.
Definition: helpers.h:683

esphome::voice_assistant::State::STARTING_PIPELINE

esphome::voice_assistant::State::STARTING_MICROPHONE

esphome::voice_assistant::State::IDLE

esphome::RingBuffer::create
static std::unique_ptr< RingBuffer > create(size_t len)
Definition: ring_buffer.cpp:22

esphome::media_player::MediaPlayerCall::set_media_url
MediaPlayerCall & set_media_url(const std::string &url)
Definition: media_player.cpp:118

esphome::voice_assistant::State::START_MICROPHONE

esphome::voice_assistant::VoiceAssistant::timer_updated_trigger_
Trigger< Timer > * timer_updated_trigger_
Definition: voice_assistant.h:251

esphome::voice_assistant::VoiceAssistant::start_playback_timeout_
void start_playback_timeout_()
Definition: voice_assistant.cpp:630

esphome::api::VoiceAssistantAudioSettings::volume_multiplier
float volume_multiplier
Definition: api_pb2.h:1737

esphome::voice_assistant::VoiceAssistant::vad_counter_
uint8_t vad_counter_
Definition: voice_assistant.h:283

esphome::api::VoiceAssistantAudio::data
std::string data
Definition: api_pb2.h:1802

esphome::voice_assistant::VoiceAssistant::timer_tick_
void timer_tick_()
Definition: voice_assistant.cpp:874

esphome::api::enums::VOICE_ASSISTANT_STT_END
Definition: api_pb2.h:186

esphome::voice_assistant::VoiceAssistant::speaker_buffer_index_
size_t speaker_buffer_index_
Definition: voice_assistant.h:262

esphome::speaker::Speaker::stop
virtual void stop()=0

esphome::api::APIConnection
Definition: api_connection.h:17

esphome::api::VoiceAssistantAudio
Definition: api_pb2.h:1800

esphome::api::enums::VOICE_ASSISTANT_ERROR
Definition: api_pb2.h:182

esphome::voice_assistant::VoiceAssistant::on_event
void on_event(const api::VoiceAssistantEventResponse &msg)
Definition: voice_assistant.cpp:641

state
bool state
Definition: fan.h:34

esphome::api::enums::VOICE_ASSISTANT_WAKE_WORD_END
Definition: api_pb2.h:192

esphome::voice_assistant::VoiceAssistant::stt_end_trigger_
Trigger< std::string > * stt_end_trigger_
Definition: voice_assistant.h:236

esphome::voice_assistant::VoiceAssistant::request_start
void request_start(bool continuous, bool silence_detection)
Definition: voice_assistant.cpp:567

esphome::voice_assistant::VoiceAssistant::error_trigger_
Trigger< std::string, std::string > * error_trigger_
Definition: voice_assistant.h:239

esphome::socket::socket
std::unique_ptr< Socket > socket(int domain, int type, int protocol)
Create a socket of the given domain, type and protocol.
Definition: bsd_sockets_impl.cpp:133

esphome::voice_assistant::VoiceAssistant::signal_stop_
void signal_stop_()
Definition: voice_assistant.cpp:619

esphome::api::enums::VOICE_ASSISTANT_TIMER_UPDATED
Definition: api_pb2.h:200