9 static const char *
const TAG =
"micro_wake_word";
12 namespace micro_wake_word {
15 ESP_LOGCONFIG(TAG,
" - Wake Word: %s", this->
wake_word_.c_str());
21 ESP_LOGCONFIG(TAG,
" - VAD Model");
32 ESP_LOGE(TAG,
"Could not allocate the streaming model's tensor arena.");
40 ESP_LOGE(TAG,
"Could not allocate the streaming model's variable tensor arena.");
43 this->
ma_ = tflite::MicroAllocator::Create(this->
var_arena_, STREAMING_MODEL_VARIABLE_ARENA_SIZE);
44 this->
mrv_ = tflite::MicroResourceVariables::Create(this->
ma_, 20);
48 if (model->version() != TFLITE_SCHEMA_VERSION) {
49 ESP_LOGE(TAG,
"Streaming model's schema is not supported");
54 this->
interpreter_ = make_unique<tflite::MicroInterpreter>(
56 if (this->
interpreter_->AllocateTensors() != kTfLiteOk) {
57 ESP_LOGE(TAG,
"Failed to allocate tensors for the streaming model");
64 if ((input->dims->size != 3) || (input->dims->data[0] != 1) ||
65 (input->dims->data[2] != PREPROCESSOR_FEATURE_SIZE)) {
66 ESP_LOGE(TAG,
"Streaming model tensor input dimensions has improper dimensions.");
70 if (input->type != kTfLiteInt8) {
71 ESP_LOGE(TAG,
"Streaming model tensor input is not int8.");
77 if ((output->dims->size != 2) || (output->dims->data[0] != 1) || (output->dims->data[1] != 1)) {
78 ESP_LOGE(TAG,
"Streaming model tensor output dimension is not 1x1.");
81 if (output->type != kTfLiteUInt8) {
82 ESP_LOGE(TAG,
"Streaming model tensor output is not uint8.");
106 (int8_t *) (tflite::GetTensorData<int8_t>(input)) + PREPROCESSOR_FEATURE_SIZE * this->
current_stride_step_,
107 features, PREPROCESSOR_FEATURE_SIZE);
110 uint8_t stride = this->
interpreter_->input(0)->dims->data[1];
112 if (this->current_stride_step_ >= stride) {
113 this->current_stride_step_ = 0;
115 TfLiteStatus invoke_status = this->
interpreter_->Invoke();
116 if (invoke_status != kTfLiteOk) {
117 ESP_LOGW(TAG,
"Streaming interpreter invoke failed");
130 ESP_LOGE(TAG,
"Streaming interpreter is not initialized.");
141 const std::string &wake_word,
size_t tensor_arena_size) {
156 float sliding_window_average =
static_cast<float>(sum) / static_cast<float>(255 * this->
sliding_window_size_);
160 ESP_LOGD(TAG,
"The '%s' model sliding average probability is %.3f and most recent probability is %.3f",
161 this->
wake_word_.c_str(), sliding_window_average,
162 this->recent_streaming_probabilities_[this->
last_n_index_] / (255.0));
169 size_t tensor_arena_size) {
183 float sliding_window_average =
static_cast<float>(sum) / static_cast<float>(255 * this->
sliding_window_size_);
void log_model_config() override
tflite::MicroResourceVariables * mrv_
void log_model_config() override
void unload_model()
Destroys the TFLite interpreter and frees the tensor and variable arenas' memory. ...
std::vector< uint8_t > recent_streaming_probabilities_
uint8_t current_stride_step_
float probability_cutoff_
VADModel(const uint8_t *model_start, float probability_cutoff, size_t sliding_window_size, size_t tensor_arena_size)
bool load_model(tflite::MicroMutableOpResolver< 20 > &op_resolver)
Allocates tensor and variable arenas and sets up the model interpreter.
size_t tensor_arena_size_
void reset_probabilities()
Sets all recent_streaming_probabilities to 0.
std::unique_ptr< tflite::MicroInterpreter > interpreter_
tflite::MicroAllocator * ma_
const uint8_t * model_start_
bool perform_streaming_inference(const int8_t features[PREPROCESSOR_FEATURE_SIZE])
bool determine_detected() override
Checks for voice activity by comparing the max probability in the sliding window with the probability...
void deallocate(T *p, size_t n)
WakeWordModel(const uint8_t *model_start, float probability_cutoff, size_t sliding_window_average_size, const std::string &wake_word, size_t tensor_arena_size)
bool determine_detected() override
Checks for the wake word by comparing the mean probability in the sliding window with the probability...
Implementation of SPI Controller mode.
size_t sliding_window_size_
An STL allocator that uses SPI or internal RAM.