Unity 智能语音助手

Unity智能语音聊天机器人

在本篇文章中,使用了百度的语音识别、语音合成、智能对话Unit的功能,制作成了一款简易的聊天机器人,在开始做之前呢,需要确定需要实现的核心功能,有以下几点:
(1)实现人机文字聊天
(2)实现人机语音聊天
(3)语音聊天记录播放
(4)文字聊天与语音聊天切换

创建UI界面

在这里插入图片描述

核心代码

一、人机文字聊天

chatDialog.onEndEdit.AddListener(delegate
        {
    
    
            if (chatDialog != null)
            {
    
    
                if (chatDialog.text.Equals(""))
                {
    
    
                    return;
                }
                chat.AddChatMessage( ChatUI.enumChatMessageType.MessageRight, chatDialog.text);//自己的聊天UI界面
                Unit_NLP(chatDialog.text);
                JsonDecode(result);
            }
        });
 /// <summary>
    /// NLP的人工智能对话功能
    /// </summary>
    /// <param name="mysay"></param>
    /// <param name="session_id"></param>
    /// <param name="action_id"></param>
    public void Unit_NLP(string mysay, string session_id = "", string action_id = "")
    {
    
    
        string token = accessToken;
        string host = "https://aip.baidubce.com/rpc/2.0/unit/service/chat?access_token=" + token;
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
        request.Method = "post";
        request.ContentType = "application/json";
        request.KeepAlive = true;
        JsonData send = new JsonData();
        send["version"] = "2.0";
        send["service_id"] = "S27034";
        send["log_id"] = "home";
        send["session_id"] = "home";
        send["action_id"] = "1017002";
        send["request"] = new JsonData();
        send["request"]["user_id"] = "88888";
        send["request"]["query"] = mysay;
        send["request"]["query_info"] = new JsonData();
        send["request"]["query_info"]["type"] = "TEXT";
        JsonData bot_session = new JsonData();
        bot_session["session_id"] = "";
        send["bot_session"] = JsonMapper.ToJson(bot_session);
        string sendStr = JsonMapper.ToJson(send);
        byte[] buffer = Encoding.UTF8.GetBytes(sendStr);
        request.ContentLength = buffer.Length;
        request.GetRequestStream().Write(buffer, 0, buffer.Length);
        HttpWebResponse response = (HttpWebResponse)request.GetResponse();
        StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
        result = reader.ReadToEnd();
    }
 /// <summary>
    /// 对NLP的回复进行Json解析
    /// </summary>
    /// <param name="js"></param>
    /// <returns></returns>
    public List<string> JsonDecode(string js)
    {
    
    
        List<string> Says = new List<string>();
        var json = SimpleJson.SimpleJson.DeserializeObject<SimpleJson.JsonObject>(js);
        if (json.ContainsKey("result"))
        {
    
    
            var result = (SimpleJson.JsonObject)json["result"];
            if (result.ContainsKey("response_list"))
            {
    
    
                var resArray = (SimpleJson.JsonArray)result["response_list"];
                var res = (SimpleJson.JsonObject)resArray[0];
                if (res.ContainsKey("action_list"))
                {
    
    
                    var actArray = (SimpleJson.JsonArray)res["action_list"];
                    var act = (SimpleJson.JsonObject)actArray[0];
                    if (act.ContainsKey("say"))
                    {
    
    
                        var say = (string)act["say"];

                        if (!isChooseSpeech)
                        {
    
    
                            chat.AddChatMessage(ChatUI.enumChatMessageType.MessageLeft,say);
                        }
                        else
                        {
    
    
                            StartCoroutine(StratTTS(say, s =>
                            {
    
    
                                robotclipDic.Add(robotindex,s.clip);
                                robotindex++;
                                chat.AddChatMessage(ChatUI.enumChatMessageType.MessageLeft);
                                tts_source.clip = s.clip;
                                tts_source.Play();
                            }));
                        }
                        Says.Add(say);
                    }
                }
            }
        }
        return Says;
    }

二、人机语音聊天

/// <summary>
    /// 开始录音
    /// </summary>
    public void StartRecord()
    {
    
    
        saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
    }

    /// <summary>
    /// 结束录音
    /// </summary>
    public void EndRecord()
    {
    
    
        Microphone.End(currentDeviceName);
        myclipDic.Add(myindex, saveAudioClip);
        myindex++;
        //source.PlayOneShot(saveAudioClip);
        StartCoroutine(RequestASR());//请求语音识别
    }

    /// <summary>
    /// 请求语音识别
    /// </summary>
    /// <returns></returns>
    public IEnumerator RequestASR()
    {
    
    
        if (string.IsNullOrEmpty(accessToken))
        {
    
    
            yield return _GetAccessToken();
        }
        resulrStr = string.Empty;
        //处理当前录音数据为PCM16
        float[] samples = new float[recordFrequency * 10 * saveAudioClip.channels];
        saveAudioClip.GetData(samples, 0);
        var samplesShort = new short[samples.Length];
        for (var index = 0; index < samples.Length; index++)
        {
    
    
            samplesShort[index] = (short)(samples[index] * short.MaxValue);
        }
        byte[] datas = new byte[samplesShort.Length * 2];
        Buffer.BlockCopy(samplesShort, 0, datas, 0, datas.Length);

        string url = string.Format("{0}?cuid={1}&token={2}", "https://vop.baidu.com/server_api", SystemInfo.deviceUniqueIdentifier, accessToken);

        WWWForm wwwForm = new WWWForm();
        wwwForm.AddBinaryData("audio", datas);

        UnityWebRequest unityWebRequest = UnityWebRequest.Post(url, wwwForm);

        unityWebRequest.SetRequestHeader("Content-Type", "audio/pcm;rate=" + recordFrequency);

        yield return unityWebRequest.SendWebRequest();

        if (string.IsNullOrEmpty(unityWebRequest.error))
        {
    
    
            resulrStr = unityWebRequest.downloadHandler.text;
            if (Regex.IsMatch(resulrStr, @"err_msg.:.success"))
            {
    
    
                Match match = Regex.Match(resulrStr, "result.:..(.*?)..]");
                if (match.Success)
                {
    
    
                    resulrStr = match.Groups[1].ToString();//语音识别的结果
                    Unit_NLP(resulrStr);
                    JsonDecode(result);
                }
            }
            else
            {
    
    
                resulrStr = "识别结果为空";
            }
        }
    }


    /// <summary>
    /// 返回的语音合成结果
    /// </summary>
    
    [System.Serializable]
    public class TtsResponse
    {
    
    
        public int error_index;
        public string error_msg;
        public string sn;
        public int idx;
        public bool Success
        {
    
    
            get {
    
     return error_index == 0; }
        }
        public AudioClip clip;
    }
    /// <summary>
    /// 请求语音合成
    /// </summary>
    /// <param name="text"></param>
    /// <param name="callback"></param>
    /// <returns></returns>
    public IEnumerator StratTTS(string text, Action<TtsResponse> callback)
    {
    
    
        var url = "http://tsn.baidu.com/text2audio";

        var param = new Dictionary<string, string>();
        param.Add("tex", text);
        param.Add("tok", accessToken);
        param.Add("cuid", SystemInfo.deviceUniqueIdentifier);
        param.Add("ctp", "1");
        param.Add("lan", "zh");
        param.Add("spd", "5");
        param.Add("pit", "5");
        param.Add("vol", "10");
        param.Add("per", "1");
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
        param.Add("aue", "6"); //Windows设置为wav格式,移动端需要mp3格式
#endif
        int i = 0;
        foreach (var p in param)
        {
    
    
            url += i != 0 ? "&" : "?";
            url += p.Key + "=" + p.Value;
            i++;
        }
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP  //根据不同平台,获取不同类型的音频格式
        var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.WAV);
#else
        var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG);
#endif
        Debug.Log("[WitBaiduAip]" + www.url);
        yield return www.SendWebRequest();

        if (www.isHttpError || www.isNetworkError)
            Debug.LogError(www.error);
        else
        {
    
    
            var type = www.GetResponseHeader("Content-Type");
            Debug.Log("[WitBaiduAip]response type: " + type);
            if (type.Contains("audio"))
            {
    
    
                var response = new TtsResponse {
    
     clip = DownloadHandlerAudioClip.GetContent(www) };
                callback(response);
            }
            else
            {
    
    
                var textBytes = www.downloadHandler.data;
                var errorText = Encoding.UTF8.GetString(textBytes);
            }
        }
    }

三、语音聊天记录播放

//发送录音保存字典
    public int myindex=0;
    public Dictionary<int, AudioClip> myclipDic = new Dictionary<int, AudioClip>();
    //收到录音保存字典
    public int robotindex = 0;
    public Dictionary<int, AudioClip> robotclipDic = new Dictionary<int, AudioClip>();
oid Start()
    {
    
    
        index = UnitManager.instance.myindex;
        source = GameObject.Find("Canvas/robot").GetComponent<AudioSource>();
        Litsen_btn = transform.GetComponent<Button>();
        Litsen_btn.onClick.AddListener(() => {
    
    
            if (MessageType.Equals("myself"))
            {
    
    
                GetValueByKey(UnitManager.instance.myclipDic, index - 1);
            }
            else if (MessageType.Equals("robot"))
            {
    
    
                GetValueByKey(UnitManager.instance.robotclipDic, index - 1);
            }
        });
    }

    public void GetValueByKey(Dictionary<int, AudioClip> dic, int key)
    {
    
    
        AudioClip clip = null;
        dic.TryGetValue(key, out clip);
        source.clip = clip;
        source.Play();
    }

通篇代码

using LitJson;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using UnityEngine;
using UnityEngine.Networking;
using UnityEngine.UI;
public class UnitManager : MonoBehaviour
{
    
    
    public static UnitManager instance;
    private void Awake()
    {
    
    
        instance = this;
    }

    public string api_key;
    public string secret_key;
    string accessToken = string.Empty;

    InputField chatDialog;
    Button speechToggle; //选择当前发送信息的类型(语音或者文字)

    public ChatUI chat; //聊天界面
    public bool isChooseSpeech = false; //判断当前的发送信息类型状态

    //语音识别模块
    string resulrStr = string.Empty;
    int recordFrequency = 8000;
    AudioClip saveAudioClip; //存储语音
    string currentDeviceName = string.Empty; //当前录音设备名称
    AudioSource source;
    
    int recordMaxTime=20;
    public Sprite[] _sp;
    public GameObject speechButton;

    //NLP
    AudioSource tts_source;
    string result=string.Empty;

    //发送录音保存字典
    public int myindex=0;
    public Dictionary<int, AudioClip> myclipDic = new Dictionary<int, AudioClip>();
    //收到录音保存字典
    public int robotindex = 0;
    public Dictionary<int, AudioClip> robotclipDic = new Dictionary<int, AudioClip>();
    void Start()
    {
    
    
        StartCoroutine(_GetAccessToken());
        chat = GameObject.Find("Canvas/ChatUI").GetComponent<ChatUI>();
        chatDialog = GameObject.Find("Canvas/ChatUI/InputArea/InputField").GetComponent<InputField>();
        speechToggle= GameObject.Find("Canvas/ChatUI/speechToggle").GetComponent<Button>();
        tts_source = GameObject.Find("Canvas/ChatUI/speechToggle").GetComponent<AudioSource>();
        source = transform.GetComponent<AudioSource>();
        chatDialog.onEndEdit.AddListener(delegate
        {
    
    
            if (chatDialog != null)
            {
    
    
                if (chatDialog.text.Equals(""))
                {
    
    
                    return;
                }
                chat.AddChatMessage( ChatUI.enumChatMessageType.MessageRight, chatDialog.text);//自己的聊天UI界面
                Unit_NLP(chatDialog.text);
                JsonDecode(result);
            }
        });
        speechToggle.onClick.AddListener(ToChangeSpeechToggle);
    }

    /// <summary>
    /// 当前处于发语音还是文字,如果是语音则AI回复语音,如果是文字则AI回复文字
    /// </summary>
    void ToChangeSpeechToggle()
    {
    
    
        if (!this.isChooseSpeech)
        {
    
    
            this.isChooseSpeech = true;
            speechToggle.GetComponent<Image>().sprite = _sp[0];
            speechButton.SetActive(true);
        }
        else
        {
    
    
            this.isChooseSpeech = false;
            speechToggle.GetComponent<Image>().sprite = _sp[1];
            speechButton.SetActive(false);
        }
    }


    /// <summary>
    /// 开始录音
    /// </summary>
    public void StartRecord()
    {
    
    
        saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
    }

    /// <summary>
    /// 结束录音
    /// </summary>
    public void EndRecord()
    {
    
    
        Microphone.End(currentDeviceName);
        myclipDic.Add(myindex, saveAudioClip);
        myindex++;
        //source.PlayOneShot(saveAudioClip);
        StartCoroutine(RequestASR());//请求语音识别
    }

    /// <summary>
    /// 请求语音识别
    /// </summary>
    /// <returns></returns>
    public IEnumerator RequestASR()
    {
    
    
        if (string.IsNullOrEmpty(accessToken))
        {
    
    
            yield return _GetAccessToken();
        }
        resulrStr = string.Empty;
        //处理当前录音数据为PCM16
        float[] samples = new float[recordFrequency * 10 * saveAudioClip.channels];
        saveAudioClip.GetData(samples, 0);
        var samplesShort = new short[samples.Length];
        for (var index = 0; index < samples.Length; index++)
        {
    
    
            samplesShort[index] = (short)(samples[index] * short.MaxValue);
        }
        byte[] datas = new byte[samplesShort.Length * 2];
        Buffer.BlockCopy(samplesShort, 0, datas, 0, datas.Length);

        string url = string.Format("{0}?cuid={1}&token={2}", "https://vop.baidu.com/server_api", SystemInfo.deviceUniqueIdentifier, accessToken);

        WWWForm wwwForm = new WWWForm();
        wwwForm.AddBinaryData("audio", datas);

        UnityWebRequest unityWebRequest = UnityWebRequest.Post(url, wwwForm);

        unityWebRequest.SetRequestHeader("Content-Type", "audio/pcm;rate=" + recordFrequency);

        yield return unityWebRequest.SendWebRequest();

        if (string.IsNullOrEmpty(unityWebRequest.error))
        {
    
    
            resulrStr = unityWebRequest.downloadHandler.text;
            if (Regex.IsMatch(resulrStr, @"err_msg.:.success"))
            {
    
    
                Match match = Regex.Match(resulrStr, "result.:..(.*?)..]");
                if (match.Success)
                {
    
    
                    resulrStr = match.Groups[1].ToString();//语音识别的结果
                    Unit_NLP(resulrStr);
                    JsonDecode(result);
                }
            }
            else
            {
    
    
                resulrStr = "识别结果为空";
            }
        }
    }


    /// <summary>
    /// 返回的语音合成结果
    /// </summary>
    
    [System.Serializable]
    public class TtsResponse
    {
    
    
        public int error_index;
        public string error_msg;
        public string sn;
        public int idx;
        public bool Success
        {
    
    
            get {
    
     return error_index == 0; }
        }
        public AudioClip clip;
    }
    /// <summary>
    /// 请求语音合成
    /// </summary>
    /// <param name="text"></param>
    /// <param name="callback"></param>
    /// <returns></returns>
    public IEnumerator StratTTS(string text, Action<TtsResponse> callback)
    {
    
    
        var url = "http://tsn.baidu.com/text2audio";

        var param = new Dictionary<string, string>();
        param.Add("tex", text);
        param.Add("tok", accessToken);
        param.Add("cuid", SystemInfo.deviceUniqueIdentifier);
        param.Add("ctp", "1");
        param.Add("lan", "zh");
        param.Add("spd", "5");
        param.Add("pit", "5");
        param.Add("vol", "10");
        param.Add("per", "1");
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP
        param.Add("aue", "6"); //Windows设置为wav格式,移动端需要mp3格式
#endif
        int i = 0;
        foreach (var p in param)
        {
    
    
            url += i != 0 ? "&" : "?";
            url += p.Key + "=" + p.Value;
            i++;
        }
#if UNITY_STANDALONE || UNITY_EDITOR || UNITY_UWP  //根据不同平台,获取不同类型的音频格式
        var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.WAV);
#else
        var www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG);
#endif
        Debug.Log("[WitBaiduAip]" + www.url);
        yield return www.SendWebRequest();

        if (www.isHttpError || www.isNetworkError)
            Debug.LogError(www.error);
        else
        {
    
    
            var type = www.GetResponseHeader("Content-Type");
            Debug.Log("[WitBaiduAip]response type: " + type);
            if (type.Contains("audio"))
            {
    
    
                var response = new TtsResponse {
    
     clip = DownloadHandlerAudioClip.GetContent(www) };
                callback(response);
            }
            else
            {
    
    
                var textBytes = www.downloadHandler.data;
                var errorText = Encoding.UTF8.GetString(textBytes);
            }
        }
    }


    /// <summary>
    /// NLP的人工智能对话功能
    /// </summary>
    /// <param name="mysay"></param>
    /// <param name="session_id"></param>
    /// <param name="action_id"></param>
    public void Unit_NLP(string mysay, string session_id = "", string action_id = "")
    {
    
    
        string token = accessToken;
        string host = "https://aip.baidubce.com/rpc/2.0/unit/service/chat?access_token=" + token;
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
        request.Method = "post";
        request.ContentType = "application/json";
        request.KeepAlive = true;
        JsonData send = new JsonData();
        send["version"] = "2.0";
        send["service_id"] = "S27034";
        send["log_id"] = "home";
        send["session_id"] = "home";
        send["action_id"] = "1017002";
        send["request"] = new JsonData();
        send["request"]["user_id"] = "88888";
        send["request"]["query"] = mysay;
        send["request"]["query_info"] = new JsonData();
        send["request"]["query_info"]["type"] = "TEXT";
        JsonData bot_session = new JsonData();
        bot_session["session_id"] = "";
        send["bot_session"] = JsonMapper.ToJson(bot_session);
        string sendStr = JsonMapper.ToJson(send);
        byte[] buffer = Encoding.UTF8.GetBytes(sendStr);
        request.ContentLength = buffer.Length;
        request.GetRequestStream().Write(buffer, 0, buffer.Length);
        HttpWebResponse response = (HttpWebResponse)request.GetResponse();
        StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
        result = reader.ReadToEnd();
    }

    /// <summary>
    /// 对NLP的回复进行Json解析
    /// </summary>
    /// <param name="js"></param>
    /// <returns></returns>
    public List<string> JsonDecode(string js)
    {
    
    
        List<string> Says = new List<string>();
        var json = SimpleJson.SimpleJson.DeserializeObject<SimpleJson.JsonObject>(js);
        if (json.ContainsKey("result"))
        {
    
    
            var result = (SimpleJson.JsonObject)json["result"];
            if (result.ContainsKey("response_list"))
            {
    
    
                var resArray = (SimpleJson.JsonArray)result["response_list"];
                var res = (SimpleJson.JsonObject)resArray[0];
                if (res.ContainsKey("action_list"))
                {
    
    
                    var actArray = (SimpleJson.JsonArray)res["action_list"];
                    var act = (SimpleJson.JsonObject)actArray[0];
                    if (act.ContainsKey("say"))
                    {
    
    
                        var say = (string)act["say"];

                        if (!isChooseSpeech)
                        {
    
    
                            chat.AddChatMessage(ChatUI.enumChatMessageType.MessageLeft,say);
                        }
                        else
                        {
    
    
                            StartCoroutine(StratTTS(say, s =>
                            {
    
    
                                robotclipDic.Add(robotindex,s.clip);
                                robotindex++;
                                chat.AddChatMessage(ChatUI.enumChatMessageType.MessageLeft);
                                tts_source.clip = s.clip;
                                tts_source.Play();
                            }));
                        }
                        Says.Add(say);
                    }
                }
            }
        }
        return Says;
    }

    /// <summary>
    /// 获取accessToken请求令牌
    /// </summary>
    /// <returns></returns>
    IEnumerator _GetAccessToken()
    {
    
    
        var uri =
            string.Format(
                "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}",
                api_key, secret_key);
        UnityWebRequest unityWebRequest = UnityWebRequest.Get(uri);
        yield return unityWebRequest.SendWebRequest();
        if (unityWebRequest.isDone)
        {
    
    
            Match match = Regex.Match(unityWebRequest.downloadHandler.text, @"access_token.:.(.*?).,");
            if (match.Success)
            {
    
    
                Debug.Log("Token已经匹配");
                accessToken = match.Groups[1].ToString();
            }
            else
            {
    
    
                Debug.Log("验证错误,获取AccessToken失败!!!");
            }
        }
    }
}

演示效果

发送文字:
在这里插入图片描述
发送语音:
(因为语音播放演示不了,所以就打个样吧)
在这里插入图片描述
结尾,希望能与对这方面感兴趣的兄弟姐妹一起探讨,有需要源码做项目的也可以私聊。技术有限,现在是在Windows平台上实现了简易智能语音聊天机器人的基本功能,各方面的细节还有待优化,本篇结束啦,希望明天会更好。

猜你喜欢

转载自blog.csdn.net/weixin_43541308/article/details/122225034