[情绪识别+语音助手]代码整理(2)--两大功能介绍

主要功能介绍

假如你在阅读过程中发现代码有问题,或者我的代码风格习惯不好,请直接指出,我会细心改正,谢谢!

这是目前在unity里面运行起来界面
左上角是聊天框。
左下角的播放按钮是用于语音识别,×是让模型停止当前动作与音乐。
右下角的播放按钮是进行语音识别。
右上角是用户当前情绪的分析。


这里写图片描述
这是本项目的流程图。
语音识别的流程是:用户先说一段话,百度语音开始识别,同时将语音转换成文字,然后再发送给图灵机器人,等待图灵机器人的返回结果,然后再将获取的文字结果通过百度语音转成语音播放出来。
情绪识别的流程是:摄像头保持开启,并且每时每刻都在分析用户当前的情绪,当用户按下情绪识别按钮时,就从那一刻的情绪列表中分析用户当前情绪,并且根据情绪模型做出相应的动作与播放音乐。


语音识别功能

下面贴上代码。(引用自作者云图,原文链接,这里只贴上的是对本项目调整后的代码,如手动获取token)

   #region 录制声音转化为文字
    private string token;                           //access_token
    private string cuid = "hmy";        //用户标识
    private string format = "pcm";                  //语音格式
    private int rate = 8000;                        //采样率
    private int channel = 1;                        //声道数
    private string speech;                          //语音数据,进行base64编码
    private int len;                                //原始语音长度
    private string lan = "zh";                      //语种
    private string grant_Type = "client_credentials";             //dpi id10266074
    private string client_ID = "填入你的百度appkey";                       //百度appkey 
    private string client_Secret = "填入你的百度Secret";                   //百度Secret 
    private string baiduAPI = "http://vop.baidu.com/server_api";

    private string getTokenAPIPath =
    "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=4srHTxv1GNfimhTpiyc9VKxy&client_secret=7e2c45f43b52e4df312eaec0d57f0718";
    private Byte[] clipByte;

    /// <summary>
    /// 转换出来的TEXT
    /// </summary>
    public static string audioToString;

    private AudioSource aud;
    private int audioLength;//录音的长度

    //开始录音
    public void StartRecord()
    {
        Debug.Log("开始说话");
        emotionsTestToSee1.text = "开始说话";
        if (Microphone.devices.Length == 0) return;
        Microphone.End(null);
        aud.clip = Microphone.Start(null, false, 10, rate);
    }


    //结束录音
    public void EndRecord()
    { 
        Debug.Log("结束说话");
        emotionsTestToSee1.text = ("结束说话,正在识别...");
        int lastPos = Microphone.GetPosition(null);
        if (Microphone.IsRecording(null))
            audioLength = lastPos / rate;//录音时长  
        else
            audioLength = 10; 
        Microphone.End(null);
        clipByte = GetClipData();
        len = clipByte.Length;
        speech = Convert.ToBase64String(clipByte);
        StartCoroutine(GetToken(getTokenAPIPath)); 
        StartCoroutine(GetAudioString(baiduAPI)); 
    }



    /// <summary>
    /// 把录音转换为Byte[]
    /// </summary>
    /// <returns></returns>
    public Byte[] GetClipData()
    {
        if (aud.clip == null)
        {
            Debug.LogError("录音数据为空");
            return null;
        }

        float[] samples = new float[aud.clip.samples];

        aud.clip.GetData(samples, 0);


        Byte[] outData = new byte[samples.Length * 2];

        int rescaleFactor = 32767; //to convert float to Int16   

        for (int i = 0; i < samples.Length; i++)
        {
            short temshort = (short)(samples[i] * rescaleFactor);

            Byte[] temdata = System.BitConverter.GetBytes(temshort);

            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        }
        if (outData == null || outData.Length <= 0)
        {
            Debug.LogError("录音数据为空");
            return null;
        }

        return outData;
    }

    /// <summary>
    /// 获取百度用户令牌
    /// </summary>
    /// <param name="url">获取的url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWW getTW = new WWW(url);
        yield return getTW;
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                //token = getTW.text;
                token = "24.07fba5ccdfb95b03ac73d9cea458ea98.2592000.1527134506.282335-10915649";
                //Debug.Log(token);
                StartCoroutine(GetAudioString(baiduAPI));
            }
            else
            {
                Debug.LogError("获取令牌出错" + getTW.error);
            }
        }
        else
        {
            Debug.LogError("下载出错" + getTW.error);
        }
    }


    /// <summary>
    /// 把语音转换为文字
    /// </summary>
    /// <param name="url"></param>
    /// <returns></returns>
    private IEnumerator GetAudioString(string url)
    {
        JsonWriter jw = new JsonWriter();
        jw.WriteObjectStart();
        jw.WritePropertyName("format");
        jw.Write(format);
        jw.WritePropertyName("rate");
        jw.Write(rate);
        jw.WritePropertyName("channel");
        jw.Write(channel);
        jw.WritePropertyName("token");
        jw.Write(token);
        jw.WritePropertyName("cuid");
        jw.Write(cuid);
        jw.WritePropertyName("len");
        jw.Write(len);
        jw.WritePropertyName("speech");
        jw.Write(speech);
        jw.WriteObjectEnd();
        //Debug.Log(jw.ToString());
        WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
        //Debug.Log(getASW.ToString());
        yield return getASW;
        if (getASW.isDone)
        {

            if (getASW.error == null)
            { 
                JsonData getASWJson = JsonMapper.ToObject(getASW.text);
                Debug.Log(getASWJson.ToString());
                if (getASWJson["err_msg"].ToString() == "success.")
                {

                    audioToString = getASWJson["result"][0].ToString();
                    if (audioToString.Substring(audioToString.Length - 1) == ",")
                        audioToString = audioToString.Substring(0, audioToString.Length - 1);
                    Debug.Log("说话的问题是:" + audioToString);
                    GetAnswer(audioToString);
                }
                else
                {
                    Debug.LogWarning("没有成功:" + getASWJson["err_msg"].ToString());
                }
            }
            else
            {
                Debug.LogError(getASW.error);
            }
        }
    }
    #endregion

这里要说几个问题:
自动获取token似乎总会报这几个错误

token error是没有成功获取到token
第二个是身份验证失败
到百度语言的官方论坛查询后发现很多人都遇到这样的问题,而且也测试了各种方法,最后无奈之下只能选择最简单也是最笨的方法–写死token(无奈)


情绪识别功能

下面的代码主要就是情绪识别功能。当时在怎么用Affdex的SDK这一块卡了很久,自己是超级小白,连工程都不会建的那种,多亏组内同学给了我很多帮助,前前后后也给了很多建议,总算把这一功能弄出来了。
Affdex的SDK很强大,它能判断出人脸上面的每一个部位比如眉毛嘴角眼睛的变化,从而分析出用户的表情。但是我只用到情绪分析部分,所以就把它截取了。
分析代码得知,Affdex把分析结果存储在一个faces这个字典中,我们用ToString().Split(‘\n’)这个方法来拆分这个字典,最后把结果存储在一个数组里,然后通过比较数组的最大值来得到用户最可能的情绪,最后模型做出相应的反馈。
获取模型然后根据情绪来做出动作这些都不难,我用的是Switch语句来实现。

using Affdex;
using System;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;

public class Listener : ImageResultsListener
{
    private GameObject emotionsTestToSee;
    public static Text emotionsTestToSee1;
    public static bool foundFace = false;

    //调用Animator
    Animator Animator;
    private GameObject model;
    public string i;
    private void Awake()
    {
        model = GameObject.FindGameObjectWithTag("model");//找到model
        Animator = model.GetComponent<Animator>();//获得model上的动态机
        emotionsTestToSee = GameObject.FindGameObjectWithTag("ET");
        emotionsTestToSee1 = emotionsTestToSee.GetComponentInChildren<Text>();//获得message下的text子物体
        emotionsTestToSee1.fontSize = 300;
    }


    public Dictionary<Emotions, float> Emotions { get; private set; }
    public Text textArea;
    public float[] emotionsArray = new float[7];
    public Dictionary<int, Face> emotion;
    public static int max = -1;
    public void Play(string str)
    {
        AudioClip clip = (AudioClip)Resources.Load(str, typeof(AudioClip));//调用Resources方法加载AudioClip资源
        Vector3 position = transform.position;
        AudioSource.PlayClipAtPoint(clip, position);
    }
    public override void onFaceFound(float timestamp, int faceId)
    {
        foundFace = true;

        Debug.Log("Found the face");
    }
    public override void onFaceLost(float timestamp, int faceId)
    {
        foundFace = false;

        Debug.Log("Lost the face");
    }
    public override void onImageResults(Dictionary<int, Face> faces)//faces是字典变量,其中一个int对应一个Face类
    {

        if (faces.Count > 0)
        {
            DebugFeatureViewer dfv = GameObject.FindObjectOfType<DebugFeatureViewer>();
            if (dfv != null)
            {
                dfv.ShowFace(faces[0]);
            }

            // Adjust font size to fit the selected platform.
            if ((Application.platform == RuntimePlatform.IPhonePlayer) ||
                (Application.platform == RuntimePlatform.Android))
            {
                textArea.fontSize = 36;
                //           textArea1.fontSize = 36;
            }
            else
            {
                textArea.fontSize = 12;
                //            textArea1.fontSize = 12;
            }
            //续写
            //获得情绪
            //用for循环面板会出错,懒得改了
            i = faces[0].ToString().Split('\n')[18]
                 + '\n' + faces[0].ToString().Split('\n')[19]
                 + '\n' + faces[0].ToString().Split('\n')[20]
                 + '\n' + faces[0].ToString().Split('\n')[21]
                 + '\n' + faces[0].ToString().Split('\n')[23]
                 + '\n' + faces[0].ToString().Split('\n')[24]
                 + '\n' + faces[0].ToString().Split('\n')[25]
                 + '\n' + faces[0].ToString().Split('\n')[26]
                 + '\n' + faces[0].ToString().Split('\n')[27]
                 ;

            textArea.text = i;// emotionsTextToSee[1]'//.Split('\n')[18];//
                              //       textArea1.text = faces[0].ToString().Split('\n')[19];
                              //分别获得情绪
                              //faces[0]对应Face这个类,然后调用Emotions这个字典,Emotions这个枚举,从里面寻值;
            emotionsArray[0] = faces[0].Emotions[Affdex.Emotions.Anger];//愤怒  
            emotionsArray[1] = faces[0].Emotions[Affdex.Emotions.Contempt];//耻辱
            emotionsArray[2] = faces[0].Emotions[Affdex.Emotions.Disgust];//厌恶
            emotionsArray[3] = faces[0].Emotions[Affdex.Emotions.Fear];//害怕
            emotionsArray[4] = faces[0].Emotions[Affdex.Emotions.Joy];//愉悦
            emotionsArray[5] = Math.Abs(faces[0].Emotions[Affdex.Emotions.Valence]);
            //emotionsArray[5] = faces[0].Emotions[Affdex.Emotions.Sadness];//悲伤
            emotionsArray[6] = faces[0].Emotions[Affdex.Emotions.Surprise];//惊讶
            //emotionsArray[3] = faces[0].Emotions[Affdex.Emotions.Engagement];
            //emotionsArray[8] = faces[0].Emotions[Affdex.Emotions.Valence];//正面情绪
            // emotions[0] = 0;
            // emotion = faces;
            textArea.CrossFadeColor(Color.white, 0.2f, true, false);
            //     textArea1.CrossFadeColor(Color.white, 0.2f, true, false);
        }
        else
        {
            textArea.CrossFadeColor(new Color(1, 0.7f, 0.7f), 0.2f, true, false);
            //   textArea1.CrossFadeColor(new Color(1, 0.7f, 0.7f), 0.2f, true, false);
        }
    }



    //比较出最可能的情绪
    public void max1(float[] j)
    {
        float comp = 0;
        for (int i = 0; i < emotionsArray.Length; i++)
        {

            if (emotionsArray[i] > comp)
            {
                comp = emotionsArray[i];
                max = i;
            }
        }      
    }





    public void OnButtonclick()
    {
        int rand = UnityEngine.Random.Range(0, 2);
        if (foundFace == false)
        {
            emotionsTestToSee1.text = "摄像头没有识别到脸部";
            return;
        }
        max1(emotionsArray);
        Debug.Log(max);

        //初始化动作的bool
        Animator.SetBool("happy", false);//happy舞蹈
        Animator.SetBool("Contempt", false);//被蔑视
        Animator.SetBool("Sadness", false);//用于情绪消极,目前用第二段舞蹈代替动作
        Animator.SetBool("shiluo", false);
        Animator.SetBool("happy2", false);
        Animator.SetBool("Sadness2", false);
        Animator.SetBool("surprise", false);
        // int randEmo = 0;

        switch (max)
        {
            //emotionsArray[0] = faces[0].Emotions[Affdex.Emotions.Anger];//愤怒  
            //emotionsArray[1] = faces[0].Emotions[Affdex.Emotions.Contempt];//蔑视
            //emotionsArray[2] = faces[0].Emotions[Affdex.Emotions.Disgust];//厌恶 极少输出
            //emotionsArray[3] = faces[0].Emotions[Affdex.Emotions.Fear];//害怕
            //emotionsArray[4] = faces[0].Emotions[Affdex.Emotions.Joy];//愉悦
            //emotionsArray[5] = faces[0].Emotions[Affdex.Emotions.Sadness];//悲伤
            //emotionsArray[6] = faces[0].Emotions[Affdex.Emotions.Surprise];//惊讶

            case 0://disgust
                Animator.SetBool("shiluo", true);
                emotionsTestToSee1.text = "感觉你有点低落,o(>﹏<)o";
                break;
            case 1://comtempt
                Animator.SetBool("Contempt", true);
                emotionsTestToSee1.text = "我怎么觉得你在蔑视我!哼,生气了!";
                break;
            case 4: //happy
                if (rand == 0)
                {
                    Animator.SetBool("happy", true);
                    //AudioSource.PlayClipAtPoint(AudioClipHappy[0],new Vector3(0,0,0));
                    //randplay1();
                    emotionsTestToSee1.fontSize = 40;
                    emotionsTestToSee1.text = "happy!!跳支舞给你看吧";

                }
                else
                {
                    Animator.SetBool("happy2", true);
                    emotionsTestToSee1.text = "我觉得你在笑!!跳支舞给你看吧";
                }

                break;
            case 5://sadness
                if (rand == 0)
                {
                    Animator.SetBool("Sadness", true);
                    emotionsTestToSee1.text = "感觉你情绪消极,Take it easy!";
                }
                else
                {
                    Animator.SetBool("Sadness2", true);
                    emotionsTestToSee1.text = "你是不是压力有点大?要记得放松哦!~";
                }
                break;           
            case 6://惊讶
                emotionsTestToSee1.text = "惊讶";
                Animator.SetBool("surprise", true);
                emotionsTestToSee1.text = "我觉得你有点惊讶!哈哈哈,我是不是很准啊~";
                break;
        }
    }

}

以上就是我的主要代码分析,下一篇博客会详细的分析代码以及总结自己的经验。

猜你喜欢

转载自blog.csdn.net/hhmy77/article/details/80062643
今日推荐