小智聊天机器人应用层Application::Start()解析

小智聊天机器人应用层Application::Start()解析
源码如下:

void Application::Start() {
    
    
    auto& board = Board::GetInstance();
    SetDeviceState(kDeviceStateStarting);

    /* Setup the display */
    auto display = board.GetDisplay();

    /* Setup the audio codec */
    auto codec = board.GetAudioCodec();
    opus_decode_sample_rate_ = codec->output_sample_rate();
    opus_decoder_ = std::make_unique<OpusDecoderWrapper>(opus_decode_sample_rate_, 1);
    opus_encoder_ = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
    // For ML307 boards, we use complexity 5 to save bandwidth
    // For other boards, we use complexity 3 to save CPU
    if (board.GetBoardType() == "ml307") {
    
    
        ESP_LOGI(TAG, "ML307 board detected, setting opus encoder complexity to 5");
        opus_encoder_->SetComplexity(5);
    } else {
    
    
        ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 3");
        opus_encoder_->SetComplexity(3);
    }

    if (codec->input_sample_rate() != 16000) {
    
    
        input_resampler_.Configure(codec->input_sample_rate(), 16000);
        reference_resampler_.Configure(codec->input_sample_rate(), 16000);
    }
    codec->OnInputReady([this, codec]() {
    
    
        BaseType_t higher_priority_task_woken = pdFALSE;
        xEventGroupSetBitsFromISR(event_group_, AUDIO_INPUT_READY_EVENT, &higher_priority_task_woken);
        return higher_priority_task_woken == pdTRUE;
    });
    codec->OnOutputReady([this]() {
    
    
        BaseType_t higher_priority_task_woken = pdFALSE;
        xEventGroupSetBitsFromISR(event_group_, AUDIO_OUTPUT_READY_EVENT, &higher_priority_task_woken);
        return higher_priority_task_woken == pdTRUE;
    });
    codec->Start();

    /* Start the main loop */
    xTaskCreate([](void* arg) {
    
    
        Application* app = (Application*)arg;
        app->MainLoop();
        vTaskDelete(NULL);
    }, "main_loop", 4096 * 2, this, 2, nullptr);

    /* Wait for the network to be ready */
    board.StartNetwork();

    // Initialize the protocol
    display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
#ifdef CONFIG_CONNECTION_TYPE_WEBSOCKET
    protocol_ = std::make_unique<WebsocketProtocol>();
#else
    protocol_ = std::make_unique<MqttProtocol>();
#endif
    protocol_->OnNetworkError([this](const std::string& message) {
    
    
        Alert(Lang::Strings::ERROR, message.c_str(), "sad");
    });
    protocol_->OnIncomingAudio([this](std::vector<uint8_t>&& data) {
    
    
        std::lock_guard<std::mutex> lock(mutex_);
        if (device_state_ == kDeviceStateSpeaking) {
    
    
            audio_decode_queue_.emplace_back(std::move(data));
        }
    });
    protocol_->OnAudioChannelOpened([this, codec, &board]() {
    
    
        board.SetPowerSaveMode(false);
        if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
    
    
            ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
                protocol_->server_sample_rate(), codec->output_sample_rate());
        }
        SetDecodeSampleRate(protocol_->server_sample_rate());
        // IoT device descriptors
        last_iot_states_.clear();
        auto& thing_manager = iot::ThingManager::GetInstance();
        protocol_->SendIotDescriptors(thing_manager.GetDescriptorsJson());
    });
    protocol_->OnAudioChannelClosed([this, &board]() {
    
    
        board.SetPowerSaveMode(true);
        Schedule([this]() {
    
    
            auto display = Board::GetInstance().GetDisplay();
            display->SetChatMessage("system", "");
            SetDeviceState(kDeviceStateIdle);
        });
    });
    protocol_->OnIncomingJson([this, display](const cJSON* root) {
    
    
        // Parse JSON data
        auto type = cJSON_GetObjectItem(root, "type");
        if (strcmp(type->valuestring, "tts") == 0) {
    
    
            auto state = cJSON_GetObjectItem(root, "state");
            if (strcmp(state->valuestring, "start") == 0) {
    
    
                Schedule([this]() {
    
    
                    aborted_ = false;
                    if (device_state_ == kDeviceStateIdle || device_state_ == kDeviceStateListening) {
    
    
                        SetDeviceState(kDeviceStateSpeaking);
                    }
                });
            } else if (strcmp(state->valuestring, "stop") == 0) {
    
    
                Schedule([this]() {
    
    
                    if (device_state_ == kDeviceStateSpeaking) {
    
    
                        background_task_->WaitForCompletion();
                        if (keep_listening_) {
    
    
                            protocol_->SendStartListening(kListeningModeAutoStop);
                            SetDeviceState(kDeviceStateListening);
                        } else {
    
    
                            SetDeviceState(kDeviceStateIdle);
                        }
                    }
                });
            } else if (strcmp(state->valuestring, "sentence_start") == 0) {
    
    
                auto text = cJSON_GetObjectItem(root, "text");
                if (text != NULL) {
    
    
                    ESP_LOGI(TAG, "<< %s", text->valuestring);
                    Schedule([this, display, message = std::string(text->valuestring)]() {
    
    
                        display->SetChatMessage("assistant", message.c_str());
                    });
                }
            }
        } else if (strcmp(type->valuestring, "stt") == 0) {
    
    
            auto text = cJSON_GetObjectItem(root, "text");
            if (text != NULL) {
    
    
                ESP_LOGI(TAG, ">> %s", text->valuestring);
                Schedule([this, display, message = std::string(text->valuestring)]() {
    
    
                    display->SetChatMessage("user", message.c_str());
                });
            }
        } else if (strcmp(type->valuestring, "llm") == 0) {
    
    
            auto emotion = cJSON_GetObjectItem(root, "emotion");
            if (emotion != NULL) {
    
    
                Schedule([this, display, emotion_str = std::string(emotion->valuestring)]() {
    
    
                    display->SetEmotion(emotion_str.c_str());
                });
            }
        } else if (strcmp(type->valuestring, "iot") == 0) {
    
    
            auto commands = cJSON_GetObjectItem(root, "commands");
            if (commands != NULL) {
    
    
                auto& thing_manager = iot::ThingManager::GetInstance();
                for (int i = 0; i < cJSON_GetArraySize(commands); ++i) {
    
    
                    auto command = cJSON_GetArrayItem(commands, i);
                    thing_manager.Invoke(command);
                }
            }
        }
    });
    protocol_->Start();

    // Check for new firmware version or get the MQTT broker address
    ota_.SetCheckVersionUrl(CONFIG_OTA_VERSION_URL);
    ota_.SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
    ota_.SetHeader("Client-Id", board.GetUuid());
    ota_.SetHeader("X-Language", Lang::CODE);

    xTaskCreate([](void* arg) {
    
    
        Application* app = (Application*)arg;
        app->CheckNewVersion();
        vTaskDelete(NULL);
    }, "check_new_version", 4096 * 2, this, 1, nullptr);


#if CONFIG_USE_AUDIO_PROCESSING
    audio_processor_.Initialize(codec->input_channels(), codec->input_reference());
    audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
    
    
        background_task_->Schedule([this, data = std::move(data)]() mutable {
    
    
            opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
    
    
                Schedule([this, opus = std::move(opus)]() {
    
    
                    protocol_->SendAudio(opus);
                });
            });
        });
    });

    wake_word_detect_.Initialize(codec->input_channels(), codec->input_reference());
    wake_word_detect_.OnVadStateChange([this](bool speaking) {
    
    
        Schedule([this, speaking]() {
    
    
            if (device_state_ == kDeviceStateListening) {
    
    
                if (speaking) {
    
    
                    voice_detected_ = true;
                } else {
    
    
                    voice_detected_ = false;
                }
                auto led = Board::GetInstance().GetLed();
                led->OnStateChanged();
            }
        });
    });

    wake_word_detect_.OnWakeWordDetected([this](const std::string& wake_word) {
    
    
        Schedule([this, &wake_word]() {
    
    
            if (device_state_ == kDeviceStateIdle) {
    
    
                SetDeviceState(kDeviceStateConnecting);
                wake_word_detect_.EncodeWakeWordData();

                if (!protocol_->OpenAudioChannel()) {
    
    
                    ESP_LOGE(TAG, "Failed to open audio channel");
                    SetDeviceState(kDeviceStateIdle);
                    wake_word_detect_.StartDetection();
                    return;
                }
                
                std::vector<uint8_t> opus;
                // Encode and send the wake word data to the server
                while (wake_word_detect_.GetWakeWordOpus(opus)) {
    
    
                    protocol_->SendAudio(opus);
                }
                // Set the chat state to wake word detected
                protocol_->SendWakeWordDetected(wake_word);
                ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
                keep_listening_ = true;
                SetDeviceState(kDeviceStateListening);
            } else if (device_state_ == kDeviceStateSpeaking) {
    
    
                AbortSpeaking(kAbortReasonWakeWordDetected);
            }

            // Resume detection
            wake_word_detect_.StartDetection();
        });
    });
    wake_word_detect_.StartDetection();
#endif

    SetDeviceState(kDeviceStateIdle);
}
  1. 这段代码是 Application::Start() 方法的实现,它负责初始化应用程序的各个组件,并启动主循环。代码主要涉及音频处理、网络通信、协议处理、任务调度等功能。以下是对代码的详细讲解:

1. 获取单例对象并设置设备状态

auto& board = Board::GetInstance();
SetDeviceState(kDeviceStateStarting);
  • Board::GetInstance() 获取 Board 类的单例对象,Board 类可能负责管理硬件资源。
  • SetDeviceState(kDeviceStateStarting) 设置设备状态为“启动中”。

2. 初始化显示和音频编解码器

auto display = board.GetDisplay();
auto codec = board.GetAudioCodec();
  • board.GetDisplay() 获取显示设备对象,用于显示信息。
  • board.GetAudioCodec() 获取音频编解码器对象,用于处理音频数据。

3. 配置 Opus 编解码器

opus_decode_sample_rate_ = codec->output_sample_rate();
opus_decoder_ = std::make_unique<OpusDecoderWrapper>(opus_decode_sample_rate_, 1);
opus_encoder_ = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
  • 设置 Opus 解码器的采样率。
  • 创建 Opus 解码器和编码器实例,编码器的采样率固定为 16000 Hz。

4. 根据板子类型设置编码器复杂度

if (board.GetBoardType() == "ml307") {
    
    
    ESP_LOGI(TAG, "ML307 board detected, setting opus encoder complexity to 5");
    opus_encoder_->SetComplexity(5);
} else {
    
    
    ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 3");
    opus_encoder_->SetComplexity(3);
}
  • 根据板子类型设置 Opus 编码器的复杂度,ML307 板子设置为 5,其他板子设置为 3。

5. 配置音频重采样器

if (codec->input_sample_rate() != 16000) {
    
    
    input_resampler_.Configure(codec->input_sample_rate(), 16000);
    reference_resampler_.Configure(codec->input_sample_rate(), 16000);
}
  • 如果音频输入采样率不是 16000 Hz,则配置输入和参考音频的重采样器。

6. 设置音频输入和输出回调

codec->OnInputReady([this, codec]() {
    
    
    BaseType_t higher_priority_task_woken = pdFALSE;
    xEventGroupSetBitsFromISR(event_group_, AUDIO_INPUT_READY_EVENT, &higher_priority_task_woken);
    return higher_priority_task_woken == pdTRUE;
});
codec->OnOutputReady([this]() {
    
    
    BaseType_t higher_priority_task_woken = pdFALSE;
    xEventGroupSetBitsFromISR(event_group_, AUDIO_OUTPUT_READY_EVENT, &higher_priority_task_woken);
    return higher_priority_task_woken == pdTRUE;
});
codec->Start();
  • 设置音频输入和输出准备就绪时的回调函数,回调函数会设置事件组中的相应位。
  • 启动音频编解码器。

7. 启动主循环任务

xTaskCreate([](void* arg) {
    
    
    Application* app = (Application*)arg;
    app->MainLoop();
    vTaskDelete(NULL);
}, "main_loop", 4096 * 2, this, 2, nullptr);
  • 创建一个任务来运行 MainLoop() 方法,任务名称为 main_loop,栈大小为 8192 字节,优先级为 2。

8. 启动网络

board.StartNetwork();
  • 启动网络连接。

9. 初始化协议

display->SetStatus(Lang::Strings::LOADING_PROTOCOL);
#ifdef CONFIG_CONNECTION_TYPE_WEBSOCKET
    protocol_ = std::make_unique<WebsocketProtocol>();
#else
    protocol_ = std::make_unique<MqttProtocol>();
#endif
  • 设置显示状态为“加载协议”。
  • 根据配置选择使用 WebSocket 或 MQTT 协议。

10. 设置协议回调

protocol_->OnNetworkError([this](const std::string& message) {
    
    
    Alert(Lang::Strings::ERROR, message.c_str(), "sad");
});
protocol_->OnIncomingAudio([this](std::vector<uint8_t>&& data) {
    
    
    std::lock_guard<std::mutex> lock(mutex_);
    if (device_state_ == kDeviceStateSpeaking) {
    
    
        audio_decode_queue_.emplace_back(std::move(data));
    }
});
protocol_->OnAudioChannelOpened([this, codec, &board]() {
    
    
    board.SetPowerSaveMode(false);
    if (protocol_->server_sample_rate() != codec->output_sample_rate()) {
    
    
        ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
            protocol_->server_sample_rate(), codec->output_sample_rate());
    }
    SetDecodeSampleRate(protocol_->server_sample_rate());
    last_iot_states_.clear();
    auto& thing_manager = iot::ThingManager::GetInstance();
    protocol_->SendIotDescriptors(thing_manager.GetDescriptorsJson());
});
protocol_->OnAudioChannelClosed([this, &board]() {
    
    
    board.SetPowerSaveMode(true);
    Schedule([this]() {
    
    
        auto display = Board::GetInstance().GetDisplay();
        display->SetChatMessage("system", "");
        SetDeviceState(kDeviceStateIdle);
    });
});
  • 设置网络错误、音频数据接收、音频通道打开和关闭时的回调函数。

11. 处理 JSON 数据

protocol_->OnIncomingJson([this, display](const cJSON* root) {
    
    
    // Parse JSON data
    auto type = cJSON_GetObjectItem(root, "type");
    if (strcmp(type->valuestring, "tts") == 0) {
    
    
        // Handle TTS messages
    } else if (strcmp(type->valuestring, "stt") == 0) {
    
    
        // Handle STT messages
    } else if (strcmp(type->valuestring, "llm") == 0) {
    
    
        // Handle LLM messages
    } else if (strcmp(type->valuestring, "iot") == 0) {
    
    
        // Handle IoT commands
    }
});
  • 解析和处理传入的 JSON 数据,根据 type 字段处理不同类型的消息。

12. 启动协议

protocol_->Start();
  • 启动协议通信。

13. 检查新固件版本

ota_.SetCheckVersionUrl(CONFIG_OTA_VERSION_URL);
ota_.SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
ota_.SetHeader("Client-Id", board.GetUuid());
ota_.SetHeader("X-Language", Lang::CODE);

xTaskCreate([](void* arg) {
    
    
    Application* app = (Application*)arg;
    app->CheckNewVersion();
    vTaskDelete(NULL);
}, "check_new_version", 4096 * 2, this, 1, nullptr);
  • 配置 OTA(Over-The-Air)更新相关参数。
  • 创建一个任务来检查新版本。

14. 初始化音频处理器和唤醒词检测

#if CONFIG_USE_AUDIO_PROCESSING
    audio_processor_.Initialize(codec->input_channels(), codec->input_reference());
    audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
    
    
        background_task_->Schedule([this, data = std::move(data)]() mutable {
    
    
            opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
    
    
                Schedule([this, opus = std::move(opus)]() {
    
    
                    protocol_->SendAudio(opus);
                });
            });
        });
    });

    wake_word_detect_.Initialize(codec->input_channels(), codec->input_reference());
    wake_word_detect_.OnVadStateChange([this](bool speaking) {
    
    
        Schedule([this, speaking]() {
    
    
            if (device_state_ == kDeviceStateListening) {
    
    
                if (speaking) {
    
    
                    voice_detected_ = true;
                } else {
    
    
                    voice_detected_ = false;
                }
                auto led = Board::GetInstance().GetLed();
                led->OnStateChanged();
            }
        });
    });

    wake_word_detect_.OnWakeWordDetected([this](const std::string& wake_word) {
    
    
        Schedule([this, &wake_word]() {
    
    
            if (device_state_ == kDeviceStateIdle) {
    
    
                SetDeviceState(kDeviceStateConnecting);
                wake_word_detect_.EncodeWakeWordData();

                if (!protocol_->OpenAudioChannel()) {
    
    
                    ESP_LOGE(TAG, "Failed to open audio channel");
                    SetDeviceState(kDeviceStateIdle);
                    wake_word_detect_.StartDetection();
                    return;
                }
                
                std::vector<uint8_t> opus;
                // Encode and send the wake word data to the server
                while (wake_word_detect_.GetWakeWordOpus(opus)) {
    
    
                    protocol_->SendAudio(opus);
                }
                // Set the chat state to wake word detected
                protocol_->SendWakeWordDetected(wake_word);
                ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
                keep_listening_ = true;
                SetDeviceState(kDeviceStateListening);
            } else if (device_state_ == kDeviceStateSpeaking) {
    
    
                AbortSpeaking(kAbortReasonWakeWordDetected);
            }

            // Resume detection
            wake_word_detect_.StartDetection();
        });
    });
    wake_word_detect_.StartDetection();
#endif
  • 如果启用了音频处理,初始化音频处理器和唤醒词检测器。
  • 设置音频处理器输出和唤醒词检测器的回调函数。

15. 设置设备状态为“空闲”

SetDeviceState(kDeviceStateIdle);
  • 最后将设备状态设置为“空闲”。

总结

这段代码主要完成了以下工作:

  1. 初始化硬件和软件组件。
  2. 配置音频编解码器和重采样器。
  3. 启动主循环和网络连接。
  4. 初始化并启动协议通信。
  5. 处理音频和 JSON 数据。
  6. 检查新固件版本。
  7. 初始化音频处理器和唤醒词检测器。

代码结构清晰,功能模块化,适合嵌入式系统或物联网设备的应用程序。