api/voice__assistant_8h_source.html

 #pragma once

 #include "esphome/core/defines.h"

 #ifdef USE_VOICE_ASSISTANT

 #include "esphome/core/automation.h"
 #include "esphome/core/component.h"
 #include "esphome/core/helpers.h"
 #include "esphome/core/ring_buffer.h"

 #include "esphome/components/api/api_connection.h"
 #include "esphome/components/api/api_pb2.h"
 #include "esphome/components/microphone/microphone.h"
 #ifdef USE_SPEAKER
 #include "esphome/components/speaker/speaker.h"
 #endif
 #ifdef USE_MEDIA_PLAYER
 #include "esphome/components/media_player/media_player.h"
 #endif
 #include "esphome/components/socket/socket.h"

 #ifdef USE_ESP_ADF
 #include <esp_vad.h>
 #endif

 namespace esphome {
 namespace voice_assistant {

 // Version 1: Initial version
 // Version 2: Adds raw speaker support
 static const uint32_t LEGACY_INITIAL_VERSION = 1;
 static const uint32_t LEGACY_SPEAKER_SUPPORT = 2;

 enum VoiceAssistantFeature : uint32_t {
   FEATURE_VOICE_ASSISTANT = 1 << 0,
   FEATURE_SPEAKER = 1 << 1,
   FEATURE_API_AUDIO = 1 << 2,
 };

 enum class State {
   IDLE,
   START_MICROPHONE,
   STARTING_MICROPHONE,
   WAIT_FOR_VAD,
   WAITING_FOR_VAD,
   START_PIPELINE,
   STARTING_PIPELINE,
   STREAMING_MICROPHONE,
   STOP_MICROPHONE,
   STOPPING_MICROPHONE,
   AWAITING_RESPONSE,
   STREAMING_RESPONSE,
   RESPONSE_FINISHED,
 };

 enum AudioMode : uint8_t {
   AUDIO_MODE_UDP,
   AUDIO_MODE_API,
 };

 class VoiceAssistant : public Component {
  public:
   void setup() override;
   void loop() override;
   float get_setup_priority() const override;
   void start_streaming();
   void start_streaming(struct sockaddr_storage *addr, uint16_t port);
   void failed_to_start();

   void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; }
 #ifdef USE_SPEAKER
   void set_speaker(speaker::Speaker *speaker) {
     this->speaker_ = speaker;
     this->local_output_ = true;
   }
 #endif
 #ifdef USE_MEDIA_PLAYER
   void set_media_player(media_player::MediaPlayer *media_player) {
     this->media_player_ = media_player;
     this->local_output_ = true;
   }
 #endif

   uint32_t get_legacy_version() const {
 #ifdef USE_SPEAKER
     if (this->speaker_ != nullptr) {
       return LEGACY_SPEAKER_SUPPORT;
     }
 #endif
     return LEGACY_INITIAL_VERSION;
   }

   uint32_t get_feature_flags() const {
     uint32_t flags = 0;
     flags |= VoiceAssistantFeature::FEATURE_VOICE_ASSISTANT;
 #ifdef USE_SPEAKER
     if (this->speaker_ != nullptr) {
       flags |= VoiceAssistantFeature::FEATURE_SPEAKER;
       flags |= VoiceAssistantFeature::FEATURE_API_AUDIO;
     }
 #endif
     return flags;
   }

   void request_start(bool continuous, bool silence_detection);
   void request_stop();

   void on_event(const api::VoiceAssistantEventResponse &msg);
   void on_audio(const api::VoiceAssistantAudio &msg);

   bool is_running() const { return this->state_ != State::IDLE; }
   void set_continuous(bool continuous) { this->continuous_ = continuous; }
   bool is_continuous() const { return this->continuous_; }

   void set_use_wake_word(bool use_wake_word) { this->use_wake_word_ = use_wake_word; }
 #ifdef USE_ESP_ADF
   void set_vad_threshold(uint8_t vad_threshold) { this->vad_threshold_ = vad_threshold; }
 #endif

   void set_noise_suppression_level(uint8_t noise_suppression_level) {
     this->noise_suppression_level_ = noise_suppression_level;
   }
   void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; }
   void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; }

   Trigger<> *get_intent_end_trigger() const { return this->intent_end_trigger_; }
   Trigger<> *get_intent_start_trigger() const { return this->intent_start_trigger_; }
   Trigger<> *get_listening_trigger() const { return this->listening_trigger_; }
   Trigger<> *get_end_trigger() const { return this->end_trigger_; }
   Trigger<> *get_start_trigger() const { return this->start_trigger_; }
   Trigger<> *get_stt_vad_end_trigger() const { return this->stt_vad_end_trigger_; }
   Trigger<> *get_stt_vad_start_trigger() const { return this->stt_vad_start_trigger_; }
 #ifdef USE_SPEAKER
   Trigger<> *get_tts_stream_start_trigger() const { return this->tts_stream_start_trigger_; }
   Trigger<> *get_tts_stream_end_trigger() const { return this->tts_stream_end_trigger_; }
 #endif
   Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; }
   Trigger<std::string> *get_stt_end_trigger() const { return this->stt_end_trigger_; }
   Trigger<std::string> *get_tts_end_trigger() const { return this->tts_end_trigger_; }
   Trigger<std::string> *get_tts_start_trigger() const { return this->tts_start_trigger_; }
   Trigger<std::string, std::string> *get_error_trigger() const { return this->error_trigger_; }
   Trigger<> *get_idle_trigger() const { return this->idle_trigger_; }

   Trigger<> *get_client_connected_trigger() const { return this->client_connected_trigger_; }
   Trigger<> *get_client_disconnected_trigger() const { return this->client_disconnected_trigger_; }

   void client_subscription(api::APIConnection *client, bool subscribe);
   api::APIConnection *get_api_connection() const { return this->api_client_; }

   void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; }

  protected:
   int read_microphone_();
   void set_state_(State state);
   void set_state_(State state, State desired_state);
   void signal_stop_();

   std::unique_ptr<socket::Socket> socket_ = nullptr;
   struct sockaddr_storage dest_addr_;

   Trigger<> *intent_end_trigger_ = new Trigger<>();
   Trigger<> *intent_start_trigger_ = new Trigger<>();
   Trigger<> *listening_trigger_ = new Trigger<>();
   Trigger<> *end_trigger_ = new Trigger<>();
   Trigger<> *start_trigger_ = new Trigger<>();
   Trigger<> *stt_vad_start_trigger_ = new Trigger<>();
   Trigger<> *stt_vad_end_trigger_ = new Trigger<>();
 #ifdef USE_SPEAKER
   Trigger<> *tts_stream_start_trigger_ = new Trigger<>();
   Trigger<> *tts_stream_end_trigger_ = new Trigger<>();
 #endif
   Trigger<> *wake_word_detected_trigger_ = new Trigger<>();
   Trigger<std::string> *stt_end_trigger_ = new Trigger<std::string>();
   Trigger<std::string> *tts_end_trigger_ = new Trigger<std::string>();
   Trigger<std::string> *tts_start_trigger_ = new Trigger<std::string>();
   Trigger<std::string, std::string> *error_trigger_ = new Trigger<std::string, std::string>();
   Trigger<> *idle_trigger_ = new Trigger<>();

   Trigger<> *client_connected_trigger_ = new Trigger<>();
   Trigger<> *client_disconnected_trigger_ = new Trigger<>();

   api::APIConnection *api_client_{nullptr};

   microphone::Microphone *mic_{nullptr};
 #ifdef USE_SPEAKER
   void write_speaker_();
   speaker::Speaker *speaker_{nullptr};
   uint8_t *speaker_buffer_;
   size_t speaker_buffer_index_{0};
   size_t speaker_buffer_size_{0};
   size_t speaker_bytes_received_{0};
   bool wait_for_stream_end_{false};
   bool stream_ended_{false};
 #endif
 #ifdef USE_MEDIA_PLAYER
   media_player::MediaPlayer *media_player_{nullptr};
 #endif

   bool local_output_{false};

   std::string conversation_id_{""};

   std::string wake_word_{""};

   HighFrequencyLoopRequester high_freq_;

 #ifdef USE_ESP_ADF
   vad_handle_t vad_instance_;
   uint8_t vad_threshold_{5};
   uint8_t vad_counter_{0};
 #endif
   std::unique_ptr<RingBuffer> ring_buffer_;

   bool use_wake_word_;
   uint8_t noise_suppression_level_;
   uint8_t auto_gain_;
   float volume_multiplier_;

   uint8_t *send_buffer_;
   int16_t *input_buffer_;

   bool continuous_{false};
   bool silence_detection_;

   State state_{State::IDLE};
   State desired_state_{State::IDLE};

   AudioMode audio_mode_{AUDIO_MODE_UDP};
   bool udp_socket_running_{false};
   bool start_udp_socket_();
 };

 template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
   TEMPLATABLE_VALUE(std::string, wake_word);

  public:
   void play(Ts... x) override {
     this->parent_->set_wake_word(this->wake_word_.value(x...));
     this->parent_->request_start(false, this->silence_detection_);
   }

   void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }

  protected:
   bool silence_detection_;
 };

 template<typename... Ts> class StartContinuousAction : public Action<Ts...>, public Parented<VoiceAssistant> {
  public:
   void play(Ts... x) override { this->parent_->request_start(true, true); }
 };

 template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
  public:
   void play(Ts... x) override { this->parent_->request_stop(); }
 };

 template<typename... Ts> class IsRunningCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
  public:
   bool check(Ts... x) override { return this->parent_->is_running() || this->parent_->is_continuous(); }
 };

 template<typename... Ts> class ConnectedCondition : public Condition<Ts...>, public Parented<VoiceAssistant> {
  public:
   bool check(Ts... x) override { return this->parent_->get_api_connection() != nullptr; }
 };

 extern VoiceAssistant *global_voice_assistant;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)

 }  // namespace voice_assistant
 }  // namespace esphome

 #endif  // USE_VOICE_ASSISTANT
setup
void setup()

esphome::voice_assistant::State::RESPONSE_FINISHED

loop
void loop()

esphome::voice_assistant::VoiceAssistant::set_microphone
void set_microphone(microphone::Microphone *mic)
Definition: voice_assistant.h:71

esphome::voice_assistant::StartAction::play
void play(Ts... x) override
Definition: voice_assistant.h:238

esphome::voice_assistant::AudioMode
AudioMode
Definition: voice_assistant.h:57

esphome::voice_assistant::VoiceAssistant::get_tts_stream_start_trigger
Trigger * get_tts_stream_start_trigger() const
Definition: voice_assistant.h:135

esphome::speaker::Speaker
Definition: speaker.h:13

esphome::api::VoiceAssistantEventResponse
Definition: api_pb2.h:1745

esphome::voice_assistant::VoiceAssistant::get_tts_stream_end_trigger
Trigger * get_tts_stream_end_trigger() const
Definition: voice_assistant.h:136

esphome::voice_assistant::State::AWAITING_RESPONSE

esphome::voice_assistant::VoiceAssistant::high_freq_
HighFrequencyLoopRequester high_freq_
Definition: voice_assistant.h:206

esphome::voice_assistant::global_voice_assistant
VoiceAssistant * global_voice_assistant
Definition: voice_assistant.cpp:744

esphome::voice_assistant::VoiceAssistant::send_buffer_
uint8_t * send_buffer_
Definition: voice_assistant.h:220

esphome::voice_assistant::VoiceAssistant::vad_instance_
vad_handle_t vad_instance_
Definition: voice_assistant.h:209

esphome::voice_assistant::FEATURE_SPEAKER
Definition: voice_assistant.h:37

esphome::voice_assistant::VoiceAssistant::is_running
bool is_running() const
Definition: voice_assistant.h:112

esphome::voice_assistant::State::STREAMING_RESPONSE

x
uint16_t x
Definition: tt21100.cpp:17

media_player.h

esphome::voice_assistant::State::WAIT_FOR_VAD

esphome::voice_assistant::VoiceAssistant::set_continuous
void set_continuous(bool continuous)
Definition: voice_assistant.h:113

esphome::HighFrequencyLoopRequester
Helper class to request loop() to be called as fast as possible.
Definition: helpers.h:603

automation.h

esphome::voice_assistant::State
State
Definition: voice_assistant.h:41

esphome::voice_assistant::StartContinuousAction::play
void play(Ts... x) override
Definition: voice_assistant.h:251

esphome::voice_assistant::State::STOPPING_MICROPHONE

esphome::voice_assistant::VoiceAssistant::get_legacy_version
uint32_t get_legacy_version() const
Definition: voice_assistant.h:85

esphome::voice_assistant::VoiceAssistant::get_client_connected_trigger
Trigger * get_client_connected_trigger() const
Definition: voice_assistant.h:145

esphome::voice_assistant::ConnectedCondition
Definition: voice_assistant.h:264

esphome::Component
Definition: component.h:68

api_pb2.h

esphome::voice_assistant::VoiceAssistant::get_api_connection
api::APIConnection * get_api_connection() const
Definition: voice_assistant.h:149

esphome::voice_assistant::StopAction
Definition: voice_assistant.h:254

esphome::voice_assistant::AUDIO_MODE_API
Definition: voice_assistant.h:59

esphome::voice_assistant::StopAction::play
void play(Ts... x) override
Definition: voice_assistant.h:256

esphome::voice_assistant::VoiceAssistant::speaker_buffer_
uint8_t * speaker_buffer_
Definition: voice_assistant.h:189

esphome::voice_assistant::State::STREAMING_MICROPHONE

esphome::voice_assistant::VoiceAssistant::get_listening_trigger
Trigger * get_listening_trigger() const
Definition: voice_assistant.h:129

esphome::voice_assistant::VoiceAssistant::noise_suppression_level_
uint8_t noise_suppression_level_
Definition: voice_assistant.h:216

esphome::voice_assistant::VoiceAssistant::use_wake_word_
bool use_wake_word_
Definition: voice_assistant.h:215

esphome::microphone::Microphone
Definition: microphone.h:16

esphome::voice_assistant::VoiceAssistant
Definition: voice_assistant.h:62

esphome::voice_assistant::State::WAITING_FOR_VAD

esphome::voice_assistant::VoiceAssistant::set_auto_gain
void set_auto_gain(uint8_t auto_gain)
Definition: voice_assistant.h:124

esphome::voice_assistant::VoiceAssistant::set_wake_word
void set_wake_word(const std::string &wake_word)
Definition: voice_assistant.h:151

esphome::voice_assistant::VoiceAssistant::set_noise_suppression_level
void set_noise_suppression_level(uint8_t noise_suppression_level)
Definition: voice_assistant.h:121

esphome::voice_assistant::VoiceAssistant::auto_gain_
uint8_t auto_gain_
Definition: voice_assistant.h:217

esphome::voice_assistant::ConnectedCondition::check
bool check(Ts... x) override
Definition: voice_assistant.h:266

speaker.h

ring_buffer.h

esphome::voice_assistant::VoiceAssistant::set_volume_multiplier
void set_volume_multiplier(float volume_multiplier)
Definition: voice_assistant.h:125

esphome::Action
Definition: automation.h:121

esphome::voice_assistant::VoiceAssistant::input_buffer_
int16_t * input_buffer_
Definition: voice_assistant.h:221

esphome::Condition
Base class for all automation conditions.
Definition: automation.h:74

defines.h

esphome::voice_assistant::VoiceAssistant::get_idle_trigger
Trigger * get_idle_trigger() const
Definition: voice_assistant.h:143

esphome::voice_assistant::VoiceAssistant::get_tts_start_trigger
Trigger< std::string > * get_tts_start_trigger() const
Definition: voice_assistant.h:141

esphome::voice_assistant::AUDIO_MODE_UDP
Definition: voice_assistant.h:58

esphome::voice_assistant::State::STOP_MICROPHONE

esphome::voice_assistant::VoiceAssistant::get_stt_vad_start_trigger
Trigger * get_stt_vad_start_trigger() const
Definition: voice_assistant.h:133

esphome::media_player::MediaPlayer
Definition: media_player.h:69

esphome::voice_assistant::VoiceAssistant::get_end_trigger
Trigger * get_end_trigger() const
Definition: voice_assistant.h:130

esphome::voice_assistant::VoiceAssistant::get_intent_start_trigger
Trigger * get_intent_start_trigger() const
Definition: voice_assistant.h:128

sockaddr_storage
Definition: headers.h:90

api_connection.h

esphome::voice_assistant::VoiceAssistantFeature
VoiceAssistantFeature
Definition: voice_assistant.h:35

esphome::voice_assistant::State::START_PIPELINE

esphome::voice_assistant::VoiceAssistant::get_client_disconnected_trigger
Trigger * get_client_disconnected_trigger() const
Definition: voice_assistant.h:146

esphome::voice_assistant::VoiceAssistant::get_wake_word_detected_trigger
Trigger * get_wake_word_detected_trigger() const
Definition: voice_assistant.h:138

esphome::voice_assistant::StartContinuousAction
Definition: voice_assistant.h:249

esphome::voice_assistant::IsRunningCondition
Definition: voice_assistant.h:259

esphome::shelly_dimmer::flags
const uint32_t flags
Definition: stm32flash.h:85

esphome::voice_assistant::VoiceAssistant::get_error_trigger
Trigger< std::string, std::string > * get_error_trigger() const
Definition: voice_assistant.h:142

esphome::voice_assistant::VoiceAssistant::volume_multiplier_
float volume_multiplier_
Definition: voice_assistant.h:218

esphome::voice_assistant::VoiceAssistant::ring_buffer_
std::unique_ptr< RingBuffer > ring_buffer_
Definition: voice_assistant.h:213

esphome::voice_assistant::StartAction
Definition: voice_assistant.h:234

esphome::voice_assistant::VoiceAssistant::is_continuous
bool is_continuous() const
Definition: voice_assistant.h:114

esphome::voice_assistant::VoiceAssistant::set_speaker
void set_speaker(speaker::Speaker *speaker)
Definition: voice_assistant.h:73

esphome::voice_assistant::VoiceAssistant::get_intent_end_trigger
Trigger * get_intent_end_trigger() const
Definition: voice_assistant.h:127

esphome::voice_assistant::VoiceAssistant::set_vad_threshold
void set_vad_threshold(uint8_t vad_threshold)
Definition: voice_assistant.h:118

esphome::voice_assistant::VoiceAssistant::get_tts_end_trigger
Trigger< std::string > * get_tts_end_trigger() const
Definition: voice_assistant.h:140

esphome::voice_assistant::VoiceAssistant::set_use_wake_word
void set_use_wake_word(bool use_wake_word)
Definition: voice_assistant.h:116

esphome::voice_assistant::VoiceAssistant::set_media_player
void set_media_player(media_player::MediaPlayer *media_player)
Definition: voice_assistant.h:79

esphome::voice_assistant::VoiceAssistant::silence_detection_
bool silence_detection_
Definition: voice_assistant.h:224

esphome::voice_assistant::FEATURE_VOICE_ASSISTANT
Definition: voice_assistant.h:36

esphome::voice_assistant::IsRunningCondition::check
bool check(Ts... x) override
Definition: voice_assistant.h:261

esphome::voice_assistant::VoiceAssistant::get_feature_flags
uint32_t get_feature_flags() const
Definition: voice_assistant.h:94

esphome::voice_assistant::StartAction::set_silence_detection
void set_silence_detection(bool silence_detection)
Definition: voice_assistant.h:243

esphome
This is a workaround until we can figure out a way to get the tflite-micro idf component code availab...
Definition: a01nyub.cpp:7

esphome::Trigger<>

esphome::voice_assistant::VoiceAssistant::get_stt_vad_end_trigger
Trigger * get_stt_vad_end_trigger() const
Definition: voice_assistant.h:132

component.h

microphone.h

helpers.h

esphome::voice_assistant::VoiceAssistant::get_stt_end_trigger
Trigger< std::string > * get_stt_end_trigger() const
Definition: voice_assistant.h:139

esphome::voice_assistant::StartAction::silence_detection_
bool silence_detection_
Definition: voice_assistant.h:246

socket.h

esphome::voice_assistant::State::STARTING_PIPELINE

esphome::voice_assistant::State::STARTING_MICROPHONE

esphome::voice_assistant::State::IDLE

esphome::voice_assistant::VoiceAssistant::get_start_trigger
Trigger * get_start_trigger() const
Definition: voice_assistant.h:131

esphome::voice_assistant::State::START_MICROPHONE

esphome::voice_assistant::FEATURE_API_AUDIO
Definition: voice_assistant.h:38

esphome::Parented
Helper class to easily give an object a parent of type T.
Definition: helpers.h:515

esphome::api::APIConnection
Definition: api_connection.h:16

esphome::api::VoiceAssistantAudio
Definition: api_pb2.h:1758

state
bool state
Definition: fan.h:34