1、编译及运行unimrcp
要使用unimrcp 必须先编译一个正常的unimrcpserver ,再修改里面的asr语音识别模块
1.1 获取unimrcp代码
mkdir unimrcpserver
cd unimrcpserver
git clone https://github.com/unispeech/unimrcp.git
1.2 下载unimrcp的依赖库,并安装
进入UniMRCP Dependencies - UniMRCP,直接选择最新版本下载
解压后,进入unimrcp-deps-XXX 目录,执行安装命令:
./build-dep-libs.sh
1.3 安装 unimrcp
./bootstrap
./configure
sudo make
sudo make install
后续就可以在/usr/local/unimrcp 路径下,看到unimrcp相关的内容
1.4 ASR 测试
1、先启动服务段 ./unimrcpserver
2、启动客户端
./umc
run recog
到这个时候标准的 unimrcpserver已经按照好了
2、安装并且完成和funasr的对接
因为funasr使用wss作为通信协议,感谢华为,华为的ASR 刚好有这个能力,而且它的SDK C++类库写的非常标准,阿里funasr的C++代码就不敢恭维了,整了很多让人感觉多此一举的代码,可能是一个刚毕业的985学生写的,用了很多类库,忘记了代码核心是间接明e
2.1 下载华为ASR SDK并且进行修改
华为SDK所以来的三方库包括
- openssl
- jsoncpp
- websocketpp 只需要头文件,无需编译
- glog
- gflags
- boost 只需要头文件,无需编译
这些依赖库均以源码形式存放在SDK根目录。该SDK默认是开启了所有依赖库的安装,如果你在系统中提前安装过openssl,则在构建时候,可以选择执行cmake … -DOPENSSL=OFF,跳过SDK对openssl的安装
2.1.1 SDK 获取
wget --no-check-certificate https://sis-sdk-repository.obs.cn-north-1.myhuaweicloud.com/cpp/huaweicloud-cpp-sdk-sis-linux.1.3.3.tar.gz
tar -xzvf huaweicloud-cpp-sdk-sis-linux.1.3.3.tar.gz
2.1.2 一键安装
mkdir -p build
cd build && cmake .. && make -j
这个时候可以看到编译出来一个 libhuawei_rasr.so 文件
2.2 修改代码,支持funasr 标准
为了尽量减少工作量,只对几个核心的文件进行修改
2.2.1 修改RasrRequest.cpp 、RasrRequest.h
RasrRequest.h
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
*/
#ifndef RASR_REQUEST_H_
#define RASR_REQUEST_H_
#include "Utils.h"
namespace speech {
namespace huawei_asr {
class RasrRequest {
public:
RasrRequest(long auidioFs,int chunkInterval,std::string chunkSize, std::string hotwords);
std::string ConstructParams();
private:
long laudioFs_ = 8000;
int chunkInterval_ = 10;
std::string chunkSize_ = "5,10,5";
std::string hotwords_ = "";
};
}
}
#endif
RasrRequest.cpp
#include "RasrRequest.h"
#include "json/value.h"
namespace speech {
namespace huawei_asr {
RasrRequest::RasrRequest(long auidioFs,int chunkInterval,std::string chunkSize, std::string hotwords)
{
laudioFs_ = auidioFs;
chunkInterval_ = chunkInterval;
chunkSize_ = chunkSize;
hotwords_ = hotwords;
}
std::string RasrRequest::ConstructParams()
{
Json::Value jsonbegin;
Json::Value chunksize;
chunksize = Json::Value(Json::arrayValue);
std::istringstream ss(chunkSize_);
std::string item;
while (std::getline(ss, item, ',')) {
chunksize.append(std::stoi(item));
}
jsonbegin["mode"] = "2pass";
jsonbegin["chunk_size"] = chunksize;
jsonbegin["chunk_interval"] = chunkInterval_;
jsonbegin["wav_name"] = "funcAsr";
jsonbegin["wav_format"] = "pcm";
jsonbegin["audio_fs"] = laudioFs_;
jsonbegin["itn"] = true;
jsonbegin["encoder_chunk_look_back"] = 4;
jsonbegin["decoder_chunk_look_back"] = 0;
jsonbegin["is_speaking"] = true;
if(!hotwords_.empty()){
jsonbegin["hotwords"] = hotwords_;
}
return jsonbegin.toStyledString();
}
}
}
2.2.2 RasrClient.cpp
void RasrClient::SendStart(RasrRequest request) {
if (CheckStart()) {
websocketServicePtr->SetStatus(WB_BLOCKING);
ws.SendTxt(request.ConstructParams());
/* ALI FUNASR 没有返回任何信息
std::set<WebsocketStatus> targetStatuses{WB_START, WB_ERROR, WB_CLOSE};
WaitStatus(targetStatuses, httpConfig.GetReadTimeout());
*/
websocketServicePtr->OnStart();
} else {
LOG(INFO) << "status " << WebsocketStatusToStr(websocketServicePtr->GetStatus()) << " can't send start";
}
}
void RasrClient::SendEnd() {
LOG(INFO) << " SendEnd";
if (CheckEnd()) {
/*websocketServicePtr->SetStatus(WB_BLOCKING);
ws.SendTxt("{\"command\": \"END\", \"cancel\": \"false\"}");
std::set<WebsocketStatus> targetStatuses{WB_END, WB_ERROR, WB_CLOSE};
WaitStatus(targetStatuses, httpConfig.GetReadTimeout());
LOG(INFO) << "received end success";*/
} else {
LOG(INFO) << "status " << WebsocketStatusToStr(websocketServicePtr->GetStatus()) << " can't send end";
}
}
void RasrClient::Connect(std::string api) {
api = "";
LOG(INFO) << " RasrClient::Connect";
if (CheckConnect()) {
std::string url = "wss://" + authInfo.GetEndpoint() + api;
//std::map<std::string, std::string> headers = SignHeaders(authInfo, api, "", "", "GET");
std::map<std::string, std::string> headers;
websocketServicePtr->SetStatus(WB_BLOCKING);
ws.Connect(url, headers);
} else {
LOG(INFO) << "status " << WebsocketStatusToStr(websocketServicePtr->GetStatus()) << " can't Connect";
}
std::set<WebsocketStatus> targetStatuses{WB_CONNECT, WB_ERROR, WB_CLOSE};
WaitStatus(targetStatuses, httpConfig.GetConnectTimeout());
}
2.2.3 WebsocketService.cpp
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
*/
#include "WebsocketService.h"
#include "json/json.h"
namespace speech {
namespace huawei_asr {
void WebsocketService::OnOpen(client *c, websocketpp::connection_hdl hdl) {
ProcessConnect();
}
void WebsocketService::OnStart(){
SetStatus(WB_START);
ProcessStart("");
}
void WebsocketService::OnClose(client *c, websocketpp::connection_hdl hdl) {
ProcessClose();
}
void WebsocketService::OnFail(client *c, websocketpp::connection_hdl hdl) {
status_ = WB_ERROR; // avoid next step failed
client::connection_ptr con = c->get_con_from_hdl(hdl);
std::string errorMsg = con->get_ec().message();
ProcessError(errorMsg);
}
void WebsocketService::OnMessage(websocketpp::connection_hdl, client::message_ptr msg) {
if (msg->get_opcode() == websocketpp::frame::opcode::text) {
std::string message = msg->get_payload();
ProcessMessage(message);
} else {
std::string errorMsg = "receive binary data, which is wrong";
ProcessError(errorMsg);
}
}
void WebsocketService::ProcessMessage(std::string message) {
Json::CharReaderBuilder readerBuilder;
Json::Value root;
std::string errs;
std::istringstream s(message);
if (!Json::parseFromStream(readerBuilder, s, &root, &errs)) {
std::string errorMsg = "receive not json data, which is wrong "+message;
ProcessError(errorMsg);
return;
}
bool is_final = root["is_final"].asBool();
if(is_final == true){
ProcessEnd(message);
return;
}
std::string mode = root["mode"].asString();
std::string txtMsg = root["text"].asString();
if (mode == "2pass-online") {
ProcessEvent("VOICE_START");
}else
if (mode == "2pass-offline") {
ProcessResp(txtMsg);
ProcessEvent("VOICE_END");
} else {
ProcessError(message);
}
}
void WebsocketService::ProcessConnect() {
LOG(INFO) << " WebsocketService::ProcessConnect " ;
status_ = WB_CONNECT;
rasrListener_->OnConnect();
}
void WebsocketService::ProcessStart(std::string text) {
//LOG(DEBUG) << " WebsocketService::ProcessStart " ;
status_ = WB_START;
rasrListener_->OnStart(text);
}
void WebsocketService::ProcessEnd(std::string text) {
LOG(INFO) << " WebsocketService::ProcessEnd " ;
status_ = WB_END;
rasrListener_->OnEnd(text);
}
void WebsocketService::ProcessResp(std::string text) {
//LOG(INFO) << " WebsocketService::ProcessResp " ;
rasrListener_->OnResp(text);
}
void WebsocketService::ProcessEvent(std::string text) {
LOG(INFO) << " WebsocketService::ProcessEvent " ;
rasrListener_->OnEvent(text);
}
void WebsocketService::ProcessClose() {
LOG(INFO) << " WebsocketService::ProcessEvent " ;
status_ = WB_CLOSE;
rasrListener_->OnClose();
}
void WebsocketService::ProcessError(std::string text) {
LOG(INFO) << "websocket error: " << text;
status_ = WB_ERROR;
rasrListener_->OnError(text);
}
}
}
另外修改 RasrDemo.cpp、RasrMultiThread.cpp 以便符合最新的.h要求。另外由于华为代码问题,RasrListener.h 的 virtual需要调整,调整后如下
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
*/
#ifndef HUAWEICLOUD_CPP_SDK_SIS_CLION_RASRLISTENER_H
#define HUAWEICLOUD_CPP_SDK_SIS_CLION_RASRLISTENER_H
#include "Utils.h"
class RasrListener {
public:
virtual void OnConnect() {
LOG(INFO) << "rasr Connect success";;
}
virtual void OnStart(std::string text) {
LOG(INFO) << "rasr receive start response " << text;
}
virtual void OnResp(std::string text) {
// text encoded by utf-8 contains chinese character, which will cause error code. So we should convert to ansi
// cout << "rasr receive " << text << endl;
LOG(INFO) << "rasr receive " << text;
}
virtual void OnEnd(std::string text) {
LOG(INFO) << "rasr receive end response " << text;
}
virtual