FreeSWITCH 1.10 源码阅读(6)-unimrcp 模块原理

1. 前言

MRCP(Media Resource Control Protocol, 媒体资源控制协议) 是一种通讯协议,用于语音服务器向客户端提供各种语音服务,例如 语音识别(ASR)和语音合成(TTS)。FreeSWITCH 中的 unimrcp模块 就是对接 MRCP 协议栈的中间层,提供了集成使用 ASR、TTS 的能力。下图是 FreeSWITCH 中 unimrcp模块 的源码时序,下文将对源码进行分析

在这里插入图片描述

2. 源码分析

2.1 unimrcp 模块的加载

  1. FreeSWITCH 1.10 源码阅读(1)-服务启动及 Event Socket 模块工作原理 中笔者分析了 FreeSWITCH 加载模块的主流程,unimrcp 模块被加载时将触发 mod_unimrcp.c#SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load) 执行。这个函数比较简练,大致有以下几个关键点:

    1. 调用 mod_unimrcp.c#mod_unimrcp_do_config() 函数获取 XML 配置中指定的 unimrcp.conf 名称下的配置内容,这部分不做赘述
    2. 调用 mod_unimrcp.c#mod_unimrcp_client_create() 函数创建 FreeSWITCH 本地的 MRCP 客户端,用于后续与 MRCP 服务器交互
    3. 调用 mod_unimrcp.c#synth_load() 函数加载创建 TTS 应用
    4. 调用 mod_unimrcp.c#recog_load() 函数加载创建 ASR 应用,与 TTS 应用加载类似,不做赘述
    5. 调用库函数 mrcp_client.c#mrcp_client_start() 新开线程启动 MRCP 客户端,涉及库函数不做赘述
    SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load)
    {
          
          
     if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CREATE) != SWITCH_STATUS_SUCCESS) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CREATE);
     	return SWITCH_STATUS_TERM;
     }
    
     if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CLOSE) != SWITCH_STATUS_SUCCESS) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CLOSE);
     	return SWITCH_STATUS_TERM;
     }
    
     if (switch_event_reserve_subclass(MY_EVENT_PROFILE_OPEN) != SWITCH_STATUS_SUCCESS) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_OPEN);
     	return SWITCH_STATUS_TERM;
     }
    
     /* connect my internal structure to the blank pointer passed to me */
     *module_interface = switch_loadable_module_create_module_interface(pool, modname);
    
     memset(&globals, 0, sizeof(globals));
     switch_mutex_init(&globals.mutex, SWITCH_MUTEX_UNNESTED, pool);
     globals.speech_channel_number = 0;
     switch_core_hash_init_nocase(&globals.profiles);
    
     /* get MRCP module configuration */
     mod_unimrcp_do_config();
     if (zstr(globals.unimrcp_default_synth_profile)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-tts-profile\n");
     	return SWITCH_STATUS_FALSE;
     }
     if (zstr(globals.unimrcp_default_recog_profile)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-asr-profile\n");
     	return SWITCH_STATUS_FALSE;
     }
    
     /* link UniMRCP logs to FreeSWITCH */
     switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "UniMRCP log level = %s\n", globals.unimrcp_log_level);
     if (apt_log_instance_create(APT_LOG_OUTPUT_NONE, str_to_log_level(globals.unimrcp_log_level), pool) == FALSE) {
          
          
     	/* already created */
     	apt_log_priority_set(str_to_log_level(globals.unimrcp_log_level));
     }
     apt_log_ext_handler_set(unimrcp_log);
    
     /* Create the MRCP client */
     if ((globals.mrcp_client = mod_unimrcp_client_create(pool)) == NULL) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create mrcp client\n");
     	return SWITCH_STATUS_FALSE;
     }
    
     /* Create the synthesizer interface */
     if (synth_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     /* Create the recognizer interface */
     if (recog_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     /* Start the client stack */
     mrcp_client_start(globals.mrcp_client);
    
     /* indicate that the module should continue to be loaded */
     return SWITCH_STATUS_SUCCESS;
    }
    
  2. mod_unimrcp.c#mod_unimrcp_client_create() 函数的关键点在于与底层 mrcp 库的交互,由于底层库已经不属于 FreeSWITCH 源码,本文不会再深入:

    1. 调用库函数 mrcp_client.c#mrcp_client_create() 创建 FreeSWITCH 作为客户端连接 MRCP 服务器的 mrcp_client_t 对象,并设置该对象上回调函数表中处理消息的函数为 mrcp_client.c#mrcp_client_msg_process()
    2. 调用库函数 mrcp_client_connection.c#mrcp_client_connection_agent_create() 创建 MRCP 连接端点对象 mrcp_connection_agent_t,用于管理底层 socket 数据读写
    3. 调用 mrcp_client.c#mrcp_client_connection_agent_register() 将 MRCP 连接端点注册到 FreeSWITCH 客户端对象中,并设置底层连接收到数据时的回调函数表为 mrcp_client.connection_method_vtable
    4. 解析 unimrcp 配置文件属性,创建对应的 profile,据此可以将多个 MRCP 服务器的连接信息隔离。如果是 v2 版本的 MRCP 协议,在 FreeSWITCH 和 MRCP 服务器之间还需要 SIP 信令交互,所以也会调用 mrcp_sofiasip_client_agent.c#mrcp_sofiasip_client_agent_create() 函数创建一个 SIP 交互的端点对象
     static mrcp_client_t *mod_unimrcp_client_create(switch_memory_pool_t *mod_pool)
    {
          
          
     switch_xml_t cfg = NULL, xml = NULL, profiles = NULL, profile = NULL;
     mrcp_client_t *client = NULL;
     apr_pool_t *pool = NULL;
     mrcp_resource_loader_t *resource_loader = NULL;
     mrcp_resource_factory_t *resource_factory = NULL;
     mpf_codec_manager_t *codec_manager = NULL;
     apr_size_t max_connection_count = 0;
     apt_bool_t offer_new_connection = FALSE;
     mrcp_connection_agent_t *connection_agent;
     mpf_engine_t *media_engine;
     apt_dir_layout_t *dir_layout;
    
     /* create the client */
     if ((dir_layout = apt_default_dir_layout_create("../", mod_pool)) == NULL) {
          
          
     	goto done;
     }
     client = mrcp_client_create(dir_layout);
     if (!client) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP client\n");
     	goto done;
     }
    
     pool = mrcp_client_memory_pool_get(client);
     if (!pool) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to get MRCP client memory pool\n");
     	client = NULL;
     	goto done;
     }
    
     /* load the synthesizer and recognizer resources */
     resource_loader = mrcp_resource_loader_create(FALSE, pool);
     if (!resource_loader) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP resource loader\n");
     	client = NULL;
     	goto done;
     } else {
          
          
     	apt_str_t synth_resource;
     	apt_str_t recog_resource;
     	apt_string_set(&synth_resource, "speechsynth");
     	mrcp_resource_load(resource_loader, &synth_resource);
     	apt_string_set(&recog_resource, "speechrecog");
     	mrcp_resource_load(resource_loader, &recog_resource);
     	resource_factory = mrcp_resource_factory_get(resource_loader);
     	mrcp_client_resource_factory_register(client, resource_factory);
     }
    
     codec_manager = mpf_engine_codec_manager_create(pool);
     if (!codec_manager) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF codec manager\n");
     	client = NULL;
     	goto done;
     }
     if (!mrcp_client_codec_manager_register(client, codec_manager)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP client codec manager\n");
     	client = NULL;
     	goto done;
     }
    
     /* set up MRCP connection agent that will be shared with all profiles */
     if (!zstr(globals.unimrcp_max_connection_count)) {
          
          
     	max_connection_count = atoi(globals.unimrcp_max_connection_count);
     }
     if (max_connection_count <= 0) {
          
          
     	max_connection_count = 100;
     }
     if (!zstr(globals.unimrcp_offer_new_connection)) {
          
          
     	offer_new_connection = strcasecmp("true", globals.unimrcp_offer_new_connection);
     }
     connection_agent = mrcp_client_connection_agent_create("MRCPv2ConnectionAgent", max_connection_count, offer_new_connection, pool);
     if (!connection_agent) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP connection agent\n");
     	client = NULL;
     	goto done;
     }
     if (!zstr(globals.unimrcp_rx_buffer_size)) {
          
          
     	apr_size_t rx_buffer_size = (apr_size_t)atol(globals.unimrcp_rx_buffer_size);
     	if (rx_buffer_size > 0) {
          
          
     		mrcp_client_connection_rx_size_set(connection_agent, rx_buffer_size);
     	}
     }
     if (!zstr(globals.unimrcp_tx_buffer_size)) {
          
          
     	apr_size_t tx_buffer_size = (apr_size_t)atol(globals.unimrcp_tx_buffer_size);
     	if (tx_buffer_size > 0) {
          
          
     		mrcp_client_connection_tx_size_set(connection_agent, tx_buffer_size);
     	}
     }
     if (!zstr(globals.unimrcp_request_timeout)) {
          
          
     	apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);
     	if (request_timeout > 0) {
          
          
     		mrcp_client_connection_timeout_set(connection_agent, request_timeout);
     	}
     }
     if (!mrcp_client_connection_agent_register(client, connection_agent)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP connection agent\n");
     	client = NULL;
     	goto done;
     }
    
     /* Set up the media engine that will be shared with all profiles */
     media_engine = mpf_engine_create("MediaEngine", pool);
     if (!media_engine) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF media engine\n");
     	client = NULL;
     	goto done;
     }
     if (!mpf_engine_scheduler_rate_set(media_engine, 1)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to set MPF engine scheduler rate\n");
     	client = NULL;
     	goto done;
     }
     if (!mrcp_client_media_engine_register(client, media_engine)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to register MPF media engine\n");
     	client = NULL;
     	goto done;
     }
    
     /* configure the client profiles */
     if (!(xml = switch_xml_open_cfg(CONFIG_FILE, &cfg, NULL))) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Could not open %s\n", CONFIG_FILE);
     	client = NULL;
     	goto done;
     }
     if ((profiles = switch_xml_child(cfg, "profiles"))) {
          
          
     	for (profile = switch_xml_child(profiles, "profile"); profile; profile = switch_xml_next(profile)) {
          
          
     		/* a profile is a signaling agent + termination factory + media engine + connection agent (MRCPv2 only) */
     		mrcp_sig_agent_t *agent = NULL;
     		mpf_termination_factory_t *termination_factory = NULL;
     		mrcp_profile_t *mprofile = NULL;
     		mpf_rtp_config_t *rtp_config = NULL;
     		mpf_rtp_settings_t *rtp_settings = mpf_rtp_settings_alloc(pool);
     		mrcp_sig_settings_t *sig_settings = mrcp_signaling_settings_alloc(pool);
     		profile_t *mod_profile = NULL;
     		switch_xml_t default_params = NULL;
     		mrcp_connection_agent_t *v2_profile_connection_agent = NULL;
    
     		/* get profile attributes */
     		const char *name = apr_pstrdup(pool, switch_xml_attr(profile, "name"));
     		const char *version = switch_xml_attr(profile, "version");
     		if (zstr(name) || zstr(version)) {
          
          
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "<profile> missing name or version attribute\n");
     			client = NULL;
     			goto done;
     		}
    
     		/* prepare mod_unimrcp's profile for configuration */
     		profile_create(&mod_profile, name, mod_pool);
     		if (mod_profile) {
          
          
     			switch_core_hash_insert(globals.profiles, mod_profile->name, mod_profile);
     		} else {
          
          
     			client = NULL;
     			goto done;
     		}
    
     		/* pull in any default SPEAK params */
     		default_params = switch_xml_child(profile, "synthparams");
     		if (default_params) {
          
          
     			switch_xml_t param = NULL;
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK params\n");
     			for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {
          
          
     				const char *param_name = switch_xml_attr(param, "name");
     				const char *param_value = switch_xml_attr(param, "value");
     				if (zstr(param_name)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param name\n");
     					client = NULL;
     					goto done;
     				}
     				if (zstr(param_value)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param value\n");
     					client = NULL;
     					goto done;
     				}
     				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK Param %s:%s\n", param_name, param_value);
     				switch_core_hash_insert(mod_profile->default_synth_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));
     			}
     		}
    
     		/* pull in any default RECOGNIZE params */
     		default_params = switch_xml_child(profile, "recogparams");
     		if (default_params) {
          
          
     			switch_xml_t param = NULL;
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE params\n");
     			for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {
          
          
     				const char *param_name = switch_xml_attr(param, "name");
     				const char *param_value = switch_xml_attr(param, "value");
     				if (zstr(param_name)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param name\n");
     					client = NULL;
     					goto done;
     				}
     				if (zstr(param_value)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param value\n");
     					client = NULL;
     					goto done;
     				}
     				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE Param %s:%s\n", param_name, param_value);
     				switch_core_hash_insert(mod_profile->default_recog_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));
     			}
     		}
    
     		/* create RTP config, common to MRCPv1 and MRCPv2 */
     		rtp_config = mpf_rtp_config_alloc(pool);
     		rtp_config->rtp_port_min = DEFAULT_RTP_PORT_MIN;
     		rtp_config->rtp_port_max = DEFAULT_RTP_PORT_MAX;
     		apt_string_set(&rtp_config->ip, DEFAULT_LOCAL_IP_ADDRESS);
    
     		if (strcmp("1", version) == 0) {
          
          
     			/* MRCPv1 configuration */
     			switch_xml_t param = NULL;
     			rtsp_client_config_t *config = mrcp_unirtsp_client_config_alloc(pool);
     			config->origin = DEFAULT_SDP_ORIGIN;
     			sig_settings->resource_location = DEFAULT_RESOURCE_LOCATION;
     			v2_profile_connection_agent = NULL;
    
     			if (!zstr(globals.unimrcp_request_timeout)) {
          
          
     				apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);
     				if (request_timeout > 0) {
          
          
     					config->request_timeout = request_timeout;
     				}
     			}
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv1 profile: %s\n", name);
     			for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {
          
          
     				const char *param_name = switch_xml_attr(param, "name");
     				const char *param_value = switch_xml_attr(param, "value");
     				if (zstr(param_name)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");
     					client = NULL;
     					goto done;
     				}
     				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);
     				if (!process_mrcpv1_config(config, sig_settings, param_name, param_value, pool) &&
     					!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&
     					!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);
     				}
     			}
     			agent = mrcp_unirtsp_client_agent_create(name, config, pool);
     			if (!agent) {
          
          
     				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP RTSP client agent\n");
     				client = NULL;
     				goto done;
     			}
     		} else if (strcmp("2", version) == 0) {
          
          
     			/* MRCPv2 configuration */
     			mrcp_sofia_client_config_t *config = mrcp_sofiasip_client_config_alloc(pool);
     			switch_xml_t param = NULL;
     			config->local_ip = DEFAULT_LOCAL_IP_ADDRESS;
     			config->local_port = DEFAULT_SIP_LOCAL_PORT;
     			sig_settings->server_ip = DEFAULT_REMOTE_IP_ADDRESS;
     			sig_settings->server_port = DEFAULT_SIP_REMOTE_PORT;
     			config->ext_ip = NULL;
     			config->user_agent_name = DEFAULT_SOFIASIP_UA_NAME;
     			config->origin = DEFAULT_SDP_ORIGIN;
     			v2_profile_connection_agent = connection_agent;
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv2 profile: %s\n", name);
     			for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {
          
          
     				const char *param_name = switch_xml_attr(param, "name");
     				const char *param_value = switch_xml_attr(param, "value");
     				if (zstr(param_name)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");
     					client = NULL;
     					goto done;
     				}
     				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);
     				if (!process_mrcpv2_config(config, sig_settings, param_name, param_value, pool) &&
     					!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&
     					!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {
          
          
     					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);
     				}
     			}
     			agent = mrcp_sofiasip_client_agent_create(name, config, pool);
     			if (!agent) {
          
          
     				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP SIP client agent\n");
     				client = NULL;
     				goto done;
     			}
     		} else {
          
          
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "version must be either \"1\" or \"2\"\n");
     			client = NULL;
     			goto done;
     		}
    
     		termination_factory = mpf_rtp_termination_factory_create(rtp_config, pool);
     		if (!termination_factory) {
          
          
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create RTP termination factory\n");
     			client = NULL;
     			goto done;
     		}
     		mrcp_client_rtp_factory_register(client, termination_factory, name);
     		mrcp_client_rtp_settings_register(client, rtp_settings, "RTP-Settings");
     		mrcp_client_signaling_settings_register(client, sig_settings, "Signaling-Settings");
     		mrcp_client_signaling_agent_register(client, agent);
    
     		/* create the profile and register it */
     		mprofile = mrcp_client_profile_create(NULL, agent, v2_profile_connection_agent, media_engine, termination_factory, rtp_settings, sig_settings, pool);
     		if (!mprofile) {
          
          
     			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP client profile\n");
     			client = NULL;
     			goto done;
     		}
     		mrcp_client_profile_register(client, mprofile, name);
     	}
     }
    
    done:
    
     if (xml) {
          
          
     	switch_xml_free(xml);
     }
    
     return client;
    }
    
    
  3. mod_unimrcp.c#synth_load() 函数加载创建 TTS 功能应用的处理主要分为两个部分,

    1. 创建 SWITCH_SPEECH_INTERFACE 接口,将 TTS 相关功能封装到 FreeSWITCH 标准模块结构中,供上层使用
    2. 调用库函数 mrcp_application.c#mrcp_application_create() 创建 unimrcp 模块的 TTS 应用,这个部分主要是将 unimrcp 模块的处理逻辑嵌入到底层 MRCP 客户端,供底层回调通知上层
     static switch_status_t synth_load(switch_loadable_module_interface_t *module_interface, switch_memory_pool_t *pool)
    {
          
          
     /* link to FreeSWITCH ASR / TTS callbacks */
     switch_speech_interface_t *speech_interface = NULL;
     if ((speech_interface = (switch_speech_interface_t *) switch_loadable_module_create_interface(module_interface, SWITCH_SPEECH_INTERFACE)) == NULL) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
     speech_interface->interface_name = MOD_UNIMRCP;
     speech_interface->speech_open = synth_speech_open;
     speech_interface->speech_close = synth_speech_close;
     speech_interface->speech_feed_tts = synth_speech_feed_tts;
     speech_interface->speech_read_tts = synth_speech_read_tts;
     speech_interface->speech_flush_tts = synth_speech_flush_tts;
     speech_interface->speech_text_param_tts = synth_speech_text_param_tts;
     speech_interface->speech_numeric_param_tts = synth_speech_numeric_param_tts;
     speech_interface->speech_float_param_tts = synth_speech_float_param_tts;
    
     /* Create the synthesizer application and link its callbacks to UniMRCP */
     if ((globals.synth.app = mrcp_application_create(synth_message_handler, (void *) 0, pool)) == NULL) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
     globals.synth.dispatcher.on_session_update = NULL;
     globals.synth.dispatcher.on_session_terminate = speech_on_session_terminate;
     globals.synth.dispatcher.on_channel_add = speech_on_channel_add;
     globals.synth.dispatcher.on_channel_remove = speech_on_channel_remove;
     globals.synth.dispatcher.on_message_receive = synth_on_message_receive;
     globals.synth.audio_stream_vtable.destroy = NULL;
     globals.synth.audio_stream_vtable.open_rx = NULL;
     globals.synth.audio_stream_vtable.close_rx = NULL;
     globals.synth.audio_stream_vtable.read_frame = NULL;
     globals.synth.audio_stream_vtable.open_tx = NULL;
     globals.synth.audio_stream_vtable.close_tx = NULL;
     globals.synth.audio_stream_vtable.write_frame = synth_stream_write;
     mrcp_client_application_register(globals.mrcp_client, globals.synth.app, "synth");
    
     /* map FreeSWITCH params to MRCP param */
     switch_core_hash_init_nocase(&globals.synth.fs_param_map);
     switch_core_hash_insert(globals.synth.fs_param_map, "voice", "voice-name");
    
     /* map MRCP params to UniMRCP ID */
     switch_core_hash_init_nocase(&globals.synth.param_id_map);
     switch_core_hash_insert(globals.synth.param_id_map, "jump-size", unimrcp_param_id_create(SYNTHESIZER_HEADER_JUMP_SIZE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "kill-on-barge-in", unimrcp_param_id_create(SYNTHESIZER_HEADER_KILL_ON_BARGE_IN, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "speaker-profile", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAKER_PROFILE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "completion-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_CAUSE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "completion-reason", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_REASON, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "voice-gender", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_GENDER, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "voice-age", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_AGE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "voice-variant", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_VARIANT, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "voice-name", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_NAME, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "prosody-volume", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_VOLUME, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "prosody-rate", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_RATE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "speech-marker", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_MARKER, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "speech-language", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_LANGUAGE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_FETCH_HINT, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "audio-fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_AUDIO_FETCH_HINT, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "failed-uri", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "failed-uri-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI_CAUSE, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "speak-restart", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_RESTART, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "speak-length", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_LENGTH, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "load-lexicon", unimrcp_param_id_create(SYNTHESIZER_HEADER_LOAD_LEXICON, pool));
     switch_core_hash_insert(globals.synth.param_id_map, "lexicon-search-order", unimrcp_param_id_create(SYNTHESIZER_HEADER_LEXICON_SEARCH_ORDER, pool));
    
     return SWITCH_STATUS_SUCCESS;
    }
    

2.2 tts 功能的实现

  1. speak 放音 APP 为例,当上层执行这个 APP 时实际调用到 mod_dptools.c#speak_function() 函数,可以看到该函数主要处理是校验参数合法性,然后调用 switch_ivr_play_say.c#switch_ivr_speak_text() 函数

    SWITCH_STANDARD_APP(speak_function)
    {
          
          
     switch_channel_t *channel = switch_core_session_get_channel(session);
     char buf[10];
     char *argv[3] = {
          
           0 };
     int argc;
     const char *engine = NULL;
     const char *voice = NULL;
     char *text = NULL;
     char *mydata = NULL;
     switch_input_args_t args = {
          
           0 };
    
     if (zstr(data) || !(mydata = switch_core_session_strdup(session, data))) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");
     	return;
     }
    
     argc = switch_separate_string(mydata, '|', argv, sizeof(argv) / sizeof(argv[0]));
    
     if (argc == 0) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");
     	return;
     } else if (argc == 1) {
          
          
     	text = switch_core_session_strdup(session, data); /* unstripped text */
     } else if (argc == 2) {
          
          
     	voice = argv[0];
     	text = switch_core_session_strdup(session, data + (argv[1] - argv[0])); /* unstripped text */
     } else {
          
          
     	engine = argv[0];
     	voice = argv[1];
     	text = switch_core_session_strdup(session, data + (argv[2] - argv[0])); /* unstripped text */
     }
    
     if (!engine) {
          
          
     	engine = switch_channel_get_variable(channel, "tts_engine");
     }
    
     if (!voice) {
          
          
     	voice = switch_channel_get_variable(channel, "tts_voice");
     }
    
     if (!(engine && voice && text)) {
          
          
     	if (!engine) {
          
          
     		engine = "NULL";
     	}
     	if (!voice) {
          
          
     		voice = "NULL";
     	}
     	if (!text) {
          
          
     		text = "NULL";
     	}
     	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params! [%s][%s][%s]\n", engine, voice, text);
     	switch_channel_hangup(channel, SWITCH_CAUSE_DESTINATION_OUT_OF_ORDER);
     }
    
     args.input_callback = on_dtmf;
     args.buf = buf;
     args.buflen = sizeof(buf);
    
     switch_channel_set_variable(channel, SWITCH_PLAYBACK_TERMINATOR_USED, "");
    
     switch_ivr_speak_text(session, engine, voice, text, &args);
    }
    
  2. switch_ivr_play_say.c#switch_ivr_speak_text() 函数核心处理为以下几步:

    1. 调用函数 switch_core_soeech.c#switch_core_speech_open() 使用本地 MRCP 客户端请求 MRCP 服务器新建会话
    2. 调用函数 switch_ivr_play_say.c#switch_ivr_speak_text_handle() 处理语音合成
    SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session,
     												  const char *tts_name, const char *voice_name, const char *text, switch_input_args_t *args)
    {
          
          
     switch_channel_t *channel = switch_core_session_get_channel(session);
     uint32_t rate = 0;
     int interval = 0;
     uint32_t channels;
     switch_frame_t write_frame = {
          
           0 };
     switch_timer_t ltimer, *timer;
     switch_codec_t lcodec, *codec;
     switch_memory_pool_t *pool = switch_core_session_get_pool(session);
     char *codec_name;
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     switch_speech_handle_t lsh, *sh;
     switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
     const char *timer_name, *var;
     cached_speech_handle_t *cache_obj = NULL;
     int need_create = 1, need_alloc = 1;
     switch_codec_implementation_t read_impl = {
          
           0 };
     switch_core_session_get_read_impl(session, &read_impl);
    
     if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     arg_recursion_check_start(args);
    
     sh = &lsh;
     codec = &lcodec;
     timer = &ltimer;
    
     if ((var = switch_channel_get_variable(channel, SWITCH_CACHE_SPEECH_HANDLES_VARIABLE)) && switch_true(var)) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cache enabled");
     	if ((cache_obj = (cached_speech_handle_t *) switch_channel_get_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME))) {
          
          
     		need_create = 0;
     		if (!strcasecmp(cache_obj->tts_name, tts_name)) {
          
          
     			need_alloc = 0;
     		} else {
          
          
     			switch_ivr_clear_speech_cache(session);
     		}
     	}
    
     	if (!cache_obj) {
          
          
     		cache_obj = (cached_speech_handle_t *) switch_core_session_alloc(session, sizeof(*cache_obj));
     	}
     	if (need_alloc) {
          
          
     		switch_copy_string(cache_obj->tts_name, tts_name, sizeof(cache_obj->tts_name));
     		switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));
     		switch_channel_set_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME, cache_obj);
     	}
     	sh = &cache_obj->sh;
     	codec = &cache_obj->codec;
     	timer = &cache_obj->timer;
     }
    
     timer_name = switch_channel_get_variable(channel, "timer_name");
    
     switch_core_session_reset(session, SWITCH_FALSE, SWITCH_FALSE);
    
     rate = read_impl.actual_samples_per_second;
     interval = read_impl.microseconds_per_packet / 1000;
     channels = read_impl.number_of_channels;
    
     if (need_create) {
          
          
     	memset(sh, 0, sizeof(*sh));
     	if ((status = switch_core_speech_open(sh, tts_name, voice_name, (uint32_t) rate, interval, read_impl.number_of_channels, &flags, NULL)) != SWITCH_STATUS_SUCCESS) {
          
          
     		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid TTS module %s[%s]!\n", tts_name, voice_name);
     		switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
     		switch_ivr_clear_speech_cache(session);
     		arg_recursion_check_stop(args);
     		return status;
     	}
     } else if (cache_obj && strcasecmp(cache_obj->voice_name, voice_name)) {
          
          
     	switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));
     	switch_core_speech_text_param_tts(sh, "voice", voice_name);
     }
    
     if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
          
          
     	flags = 0;
     	switch_core_speech_close(sh, &flags);
     	arg_recursion_check_stop(args);
     	return SWITCH_STATUS_FALSE;
     }
     switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name);
    
     codec_name = "L16";
    
     if (need_create) {
          
          
     	if (switch_core_codec_init(codec,
     							   codec_name,
     							   NULL,
     							   NULL, (int) rate, interval, channels, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, NULL,
     							   pool) == SWITCH_STATUS_SUCCESS) {
          
          
     		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activated\n");
     	} else {
          
          
     		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activation Failed %s@%uhz 1 channel %dms\n", codec_name,
     						  rate, interval);
     		flags = 0;
     		switch_core_speech_close(sh, &flags);
     		switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
     		switch_ivr_clear_speech_cache(session);
     		arg_recursion_check_stop(args);
     		return SWITCH_STATUS_GENERR;
     	}
     }
    
     write_frame.codec = codec;
    
     if (timer_name) {
          
          
     	if (need_create) {
          
          
     		if (switch_core_timer_init(timer, timer_name, interval, (int) sh->samples, pool) != SWITCH_STATUS_SUCCESS) {
          
          
     			switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Setup timer failed!\n");
     			switch_core_codec_destroy(write_frame.codec);
     			flags = 0;
     			switch_core_speech_close(sh, &flags);
     			switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
     			switch_ivr_clear_speech_cache(session);
     			arg_recursion_check_stop(args);
     			return SWITCH_STATUS_GENERR;
     		}
     		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Setup timer success %u bytes per %d ms!\n", sh->samples * 2,
     						  interval);
     	}
     	switch_core_timer_sync(timer); // Sync timer
    
     	/* start a thread to absorb incoming audio */
     	switch_core_service_session(session);
    
     }
    
     status = switch_ivr_speak_text_handle(session, sh, write_frame.codec, timer_name ? timer : NULL, text, args);
     flags = 0;
    
     if (!cache_obj) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "no cache_obj");
     	switch_core_speech_close(sh, &flags);
     	switch_core_codec_destroy(codec);
     }
    
     if (timer_name) {
          
          
     	/* End the audio absorbing thread */
     	switch_core_thread_session_end(session);
     	if (!cache_obj) {
          
          
     		switch_core_timer_destroy(timer);
     	}
     }
    
     switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE);
     arg_recursion_check_stop(args);
    
     return status;
    }
    
    
  3. switch_core_soeech.c#switch_core_speech_open() 函数实际只是通过核心注册的接口调用到 unimrcp 模块的 mod_unimrcp.c#synth_speech_open() 函数

    SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
     													const char *module_name,
     													const char *voice_name,
     													unsigned int rate, unsigned int interval, unsigned int channels,
     													switch_speech_flag_t *flags, switch_memory_pool_t *pool)
    {
          
          
     switch_status_t status;
     char buf[256] = "";
     char *param = NULL;
    
     if (!sh || !flags || zstr(module_name)) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     if (strchr(module_name, ':')) {
          
          
     	switch_set_string(buf, module_name);
     	if ((param = strchr(buf, ':'))) {
          
          
     		*param++ = '\0';
     		module_name = buf;
     	}
     }
    
     if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid speech module [%s]!\n", module_name);
     	return SWITCH_STATUS_GENERR;
     }
    
     sh->flags = *flags;
     if (pool) {
          
          
     	sh->memory_pool = pool;
     } else {
          
          
     	if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
          
          
     		UNPROTECT_INTERFACE(sh->speech_interface);
     		return status;
     	}
     	switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
     }
    
     sh->engine = switch_core_strdup(sh->memory_pool, module_name);
     if (param) {
          
          
     	sh->param = switch_core_strdup(sh->memory_pool, param);
     }
    
     sh->rate = rate;
     sh->name = switch_core_strdup(sh->memory_pool, module_name);
     sh->samples = switch_samples_per_packet(rate, interval);
     sh->samplerate = rate;
     sh->native_rate = rate;
     sh->channels = channels;
     sh->real_channels = 1;
    
     if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, channels, flags)) == SWITCH_STATUS_SUCCESS) {
          
          
     	switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
     } else {
          
          
     	UNPROTECT_INTERFACE(sh->speech_interface);
     }
    
     return status;
    }
    
  4. mod_unimrcp.c#synth_speech_open() 函数的核心处理是创建一个 FreeSWITCH 层面的 speech_channel_t 对象,并调用 mod_unimrcp.c#speech_channel_open() 函数通过底层 MRCP 客户端建立与远程 MRCP 服务端之间的连接

    static switch_status_t synth_speech_open(switch_speech_handle_t *sh, const char *voice_name, int rate, int channels, switch_speech_flag_t *flags)
    {
          
          
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     speech_channel_t *schannel = NULL;
     const char *profile_name = sh->param;
     profile_t *profile = NULL;
     int speech_channel_number = get_next_speech_channel_number();
     char *name = NULL;
     char *session_uuid = NULL;
     switch_hash_index_t *hi = NULL;
    
     /* Name the channel */
     if (profile_name && strchr(profile_name, ':')) {
          
          
     	/* Profile has session name appended to it.  Pick it out */
     	profile_name = switch_core_strdup(sh->memory_pool, profile_name);
     	session_uuid = strchr(profile_name, ':');
     	*session_uuid = '\0';
     	session_uuid++;
     	session_uuid = switch_core_strdup(sh->memory_pool, session_uuid);
     } else {
          
          
     	/* check if session is associated w/ this memory pool */
     	switch_core_session_t *session = switch_core_memory_pool_get_data(sh->memory_pool, "__session");
     	if (session) {
          
          
     		session_uuid = switch_core_session_get_uuid(session);
     	}
     }
     name = switch_core_sprintf(sh->memory_pool, "TTS-%d", speech_channel_number);
    
     switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO,
     				  "speech_handle: name = %s, rate = %d, speed = %d, samples = %d, voice = %s, engine = %s, param = %s\n", sh->name, sh->rate,
     				  sh->speed, sh->samples, sh->voice, sh->engine, sh->param);
     switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO, "voice = %s, rate = %d\n", voice_name, rate);
    
     /* Allocate the channel */
     if (speech_channel_create(&schannel, name, session_uuid, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, (uint16_t) rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
     sh->private_info = schannel;
     schannel->fsh = sh;
    
     /* Open the channel */
     if (zstr(profile_name)) {
          
          
     	profile_name = globals.unimrcp_default_synth_profile;
     }
     profile = (profile_t *) switch_core_hash_find(globals.profiles, profile_name);
     if (!profile) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_ERROR, "(%s) Can't find profile, %s\n", name, profile_name);
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
     if ((status = speech_channel_open(schannel, profile)) != SWITCH_STATUS_SUCCESS) {
          
          
     	goto done;
     }
    
     /* Set session TTS params */
     if (!zstr(voice_name)) {
          
          
     	speech_channel_set_param(schannel, "Voice-Name", voice_name);
     }
    
     /* Set default TTS params */
     for (hi = switch_core_hash_first(profile->default_synth_params); hi; hi = switch_core_hash_next(&hi)) {
          
          
     	char *param_name = NULL, *param_val = NULL;
     	const void *key;
     	void *val;
     	switch_core_hash_this(hi, &key, NULL, &val);
     	param_name = (char *) key;
     	param_val = (char *) val;
     	speech_channel_set_param(schannel, param_name, param_val);
     }
    
     done:
    
     return status;
    }
    
  5. mod_unimrcp.c#speech_channel_open() 函数主要逻辑是调用底层库函数创建 MRCP 会话,并建立连接

    1. 调用库函数 mrcp_application.c#mrcp_application_session_create() 创建 MRCP 会话
    2. 调用库函数 mrcp_application.c#mrcp_application_channel_create() 创建 MRCP 会话下的 channel
    3. 调用库函数 mrcp_application.c#mrcp_application_channel_add() 请求远程 MRCP 服务器创建新会话
    static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile)
    {
          
          
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     mpf_termination_t *termination = NULL;
     mrcp_resource_type_e resource_type;
     int warned = 0;
    
     switch_mutex_lock(schannel->mutex);
    
     /* make sure we can open channel */
     if (schannel->state != SPEECH_CHANNEL_CLOSED) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     schannel->profile = profile;
    
     /* create MRCP session */
     if ((schannel->unimrcp_session = mrcp_application_session_create(schannel->application->app, profile->name, schannel)) == NULL) {
          
          
     	/* profile doesn't exist? */
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create session with %s\n", schannel->name, profile->name);
     	status = SWITCH_STATUS_RESTART;
     	goto done;
     }
     mrcp_application_session_name_set(schannel->unimrcp_session, schannel->name);
    
     /* create audio termination and add to channel */
     if ((termination = speech_channel_create_mpf_termination(schannel)) == NULL) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name);
     	mrcp_application_session_destroy(schannel->unimrcp_session);
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
     if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
          
          
     	resource_type = MRCP_SYNTHESIZER_RESOURCE;
     } else {
          
          
     	resource_type = MRCP_RECOGNIZER_RESOURCE;
     }
     if ((schannel->unimrcp_channel = mrcp_application_channel_create(schannel->unimrcp_session, resource_type, termination, NULL, schannel)) == NULL) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create channel with %s\n", schannel->name, profile->name);
     	mrcp_application_session_destroy(schannel->unimrcp_session);
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     /* add channel to session... this establishes the connection to the MRCP server */
     if (mrcp_application_channel_add(schannel->unimrcp_session, schannel->unimrcp_channel) != TRUE) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to add channel to session with %s\n", schannel->name, profile->name);
     	mrcp_application_session_destroy(schannel->unimrcp_session);
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     /* wait for channel to be ready */
     warned = 0;
     while (schannel->state == SPEECH_CHANNEL_CLOSED) {
          
          
     	if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
          
          
     		warned = 1;
     		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not opened after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
     	}
     }
     if (schannel->state == SPEECH_CHANNEL_READY) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) channel is ready\n", schannel->name);
     } else if (schannel->state == SPEECH_CHANNEL_CLOSED) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Timed out waiting for channel to be ready\n", schannel->name);
     	/* can't retry */
     	status = SWITCH_STATUS_FALSE;
     } else if (schannel->state == SPEECH_CHANNEL_ERROR) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) Terminating MRCP session\n", schannel->name);
     	if (!mrcp_application_session_terminate(schannel->unimrcp_session)) {
          
          
     		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) Unable to terminate application session\n", schannel->name);
     		status = SWITCH_STATUS_FALSE;
     		goto done;
     	}
    
     	/* Wait for session to be cleaned up */
     	warned = 0;
     	while (schannel->state == SPEECH_CHANNEL_ERROR) {
          
          
     		if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
          
          
     			warned = 1;
     			switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not cleaned up after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
     		}
     	}
     	if (schannel->state != SPEECH_CHANNEL_CLOSED) {
          
          
     		/* major issue... can't retry */
     		status = SWITCH_STATUS_FALSE;
     	} else {
          
          
     		/* failed to open profile, retry is allowed */
     		status = SWITCH_STATUS_RESTART;
     	}
     }
    
    done:
    
     switch_mutex_unlock(schannel->mutex);
     return status;
    }
    
    
  6. 此时回到本节步骤2第2步switch_ivr_play_say.c#switch_ivr_speak_text_handle() 函数是 tts 处理的功能主体,关键处理如下:

    1. 通过核心函数 switch_core.c#switch_core_speech_feed_tts() 调用到 mod_unimrcp.c#synth_speech_feed_tts() 函数发起 MRCP 语音合成请求
    2. 在 for 空循环中不断执行核心函数 switch_core.c#switch_core_speech_read_tts() 调用到 mod_unimrcp.c#synth_speech_read_tts() 函数尝试获取合成的语音
    3. 通过核心函数 switch_core.c#switch_core_session_write_frame() 将 MRCP 服务器返回的语音流写到当前会话,通过 RTP 传输到 SIP 终端播放
    SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session,
     														 switch_speech_handle_t *sh,
     														 switch_codec_t *codec, switch_timer_t *timer, const char *text, switch_input_args_t *args)
    {
          
          
     switch_channel_t *channel = switch_core_session_get_channel(session);
     short abuf[SWITCH_RECOMMENDED_BUFFER_SIZE];
     switch_dtmf_t dtmf = {
          
           0 };
     uint32_t len = 0;
     switch_size_t ilen = 0;
     switch_frame_t write_frame = {
          
           0 };
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
     switch_size_t extra = 0;
     char *tmp = NULL;
     const char *star, *pound, *p;
     switch_size_t starlen, poundlen;
    
     if (!sh) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     if (!switch_core_codec_ready(codec)) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     arg_recursion_check_start(args);
    
     write_frame.data = abuf;
     write_frame.buflen = sizeof(abuf);
    
     len = sh->samples * 2 * sh->channels;
    
     flags = 0;
    
     if (!(star = switch_channel_get_variable(channel, "star_replace"))) {
          
          
     	star = "star";
     }
     if (!(pound = switch_channel_get_variable(channel, "pound_replace"))) {
          
          
     	pound = "pound";
     }
     starlen = strlen(star);
     poundlen = strlen(pound);
    
    
     for (p = text; p && *p; p++) {
          
          
     	if (*p == '*') {
          
          
     		extra += starlen;
     	} else if (*p == '#') {
          
          
     		extra += poundlen;
     	}
     }
    
     if (extra) {
          
          
     	char *tp;
     	switch_size_t mylen = strlen(text) + extra + 1;
     	tmp = malloc(mylen);
     	if (!tmp) {
          
          
     		arg_recursion_check_stop(args);
     		return SWITCH_STATUS_MEMERR;
     	}
     	memset(tmp, 0, mylen);
     	tp = tmp;
     	for (p = text; p && *p; p++) {
          
          
     		if (*p == '*' ) {
          
          
     			snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", star);
     			tp += starlen;
     		} else if (*p == '#') {
          
          
     			snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", pound);
     			tp += poundlen;
     		} else {
          
          
     			*tp++ = *p;
     		}
     	}
    
     	text = tmp;
     }
    
     switch_core_speech_feed_tts(sh, text, &flags);
     switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Speaking text: %s\n", text);
     switch_safe_free(tmp);
     text = NULL;
    
     write_frame.rate = sh->rate;
     memset(write_frame.data, 0, len);
     write_frame.datalen = len;
     write_frame.samples = len / 2;
     write_frame.codec = codec;
    
     switch_assert(codec->implementation != NULL);
    
     switch_channel_audio_sync(channel);
    
    
     for (;;) {
          
          
     	switch_event_t *event;
    
     	ilen = len;
    
     	if (!switch_channel_ready(channel)) {
          
          
     		status = SWITCH_STATUS_FALSE;
     		break;
     	}
    
     	if (switch_channel_test_flag(channel, CF_BREAK)) {
          
          
     		switch_channel_clear_flag(channel, CF_BREAK);
     		status = SWITCH_STATUS_BREAK;
     		break;
     	}
    
     	switch_ivr_parse_all_events(session);
    
     	if (args) {
          
          
     		/* dtmf handler function you can hook up to be executed when a digit is dialed during playback
     		 * if you return anything but SWITCH_STATUS_SUCCESS the playback will stop.
     		 */
     		if (switch_channel_has_dtmf(channel)) {
          
          
     			if (!args->input_callback && !args->buf && !args->dmachine) {
          
          
     				status = SWITCH_STATUS_BREAK;
     				break;
     			}
     			if (args->buf && !strcasecmp(args->buf, "_break_")) {
          
          
     				status = SWITCH_STATUS_BREAK;
     			} else {
          
          
     				switch_channel_dequeue_dtmf(channel, &dtmf);
    
     				if (args->dmachine) {
          
          
     					char ds[2] = {
          
          dtmf.digit, '\0'};
     					if ((status = switch_ivr_dmachine_feed(args->dmachine, ds, NULL)) != SWITCH_STATUS_SUCCESS) {
          
          
     						break;
     					}
     				}
    
     				if (args->input_callback) {
          
          
     					status = args->input_callback(session, (void *) &dtmf, SWITCH_INPUT_TYPE_DTMF, args->buf, args->buflen);
     				} else if (args->buf) {
          
          
     					*((char *) args->buf) = dtmf.digit;
     					status = SWITCH_STATUS_BREAK;
     				}
     			}
     		}
    
     		if (args->input_callback) {
          
          
     			if (switch_core_session_dequeue_event(session, &event, SWITCH_FALSE) == SWITCH_STATUS_SUCCESS) {
          
          
     				switch_status_t ostatus = args->input_callback(session, event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen);
     				if (ostatus != SWITCH_STATUS_SUCCESS) {
          
          
     					status = ostatus;
     				}
     				switch_event_destroy(&event);
     			}
     		}
    
     		if (status != SWITCH_STATUS_SUCCESS) {
          
          
     			break;
     		}
     	}
    
     	if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_PAUSE)) {
          
          
     		if (timer) {
          
          
     			if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {
          
          
     				break;
     			}
     		} else {
          
          
     			switch_frame_t *read_frame;
     			switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);
    
     			while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {
          
          
     				switch_ivr_parse_all_messages(session);
     				switch_yield(10000);
     			}
    
     			if (!SWITCH_READ_ACCEPTABLE(tstatus)) {
          
          
     				break;
     			}
    
     			if (args && args->dmachine) {
          
          
     				if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {
          
          
     					goto done;
     				}
     			}
    
     			if (args && (args->read_frame_callback)) {
          
          
     				if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {
          
          
     					goto done;
     				}
     			}
     		}
     		continue;
     	}
    
    
     	flags = SWITCH_SPEECH_FLAG_BLOCKING;
     	status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags);
    
     	if (status != SWITCH_STATUS_SUCCESS) {
          
          
     		if (status == SWITCH_STATUS_BREAK) {
          
          
     			status = SWITCH_STATUS_SUCCESS;
     		}
     		break;
     	}
    
     	write_frame.datalen = (uint32_t) ilen;
     	write_frame.samples = (uint32_t) (ilen / 2 / sh->channels);
     	if (timer) {
          
          
     		write_frame.timestamp = timer->samplecount;
     	}
     	if (switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0) != SWITCH_STATUS_SUCCESS) {
          
          
     		break;
     	}
    
     	if (timer) {
          
          
     		if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {
          
          
     			break;
     		}
     	} else {
          
          				/* time off the channel (if you must) */
     		switch_frame_t *read_frame;
     		switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);
    
     		while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {
          
          
     			switch_ivr_parse_all_messages(session);
     			switch_yield(10000);
     		}
    
     		if (!SWITCH_READ_ACCEPTABLE(tstatus)) {
          
          
     			break;
     		}
    
     		if (args && args->dmachine) {
          
          
     			if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {
          
          
     				goto done;
     			}
     		}
    
     		if (args && (args->read_frame_callback)) {
          
          
     			if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {
          
          
     				goto done;
     			}
     		}
     	}
     }
    
     done:
    
     switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "done speaking text\n");
     flags = 0;
     switch_core_speech_flush_tts(sh);
    
     arg_recursion_check_stop(args);
     return status;
    }
    
    
  7. mod_unimrcp.c#synth_speech_feed_tts() 函数的核心其实是执行 mod_unimrcp.c#synth_channel_speak() 函数,mod_unimrcp.c#synth_channel_speak()函数的核心处理如下:

    1. 调用底层库函数 mrcp_application.c#mrcp_application_message_create() 创建 SYNTHESIZER_SPEAK tts 请求的消息结构
    2. 调用底层库函数 mrcp_application.c#mrcp_application_message_send() 触发执行向 MRCP 服务器发送语音合成请求
    3. 等待 MRCP 服务器返回,将当前 tts 的 channel 状态流转为 SPEECH_CHANNEL_PROCESSING。这个部分主要依靠 unimrcp 模块加载时嵌入到底层 MRCP 客户端的回调 mod_unimrcp.c#synth_on_message_receive() 函数完成
    static switch_status_t synth_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
    {
          
          
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     speech_channel_t *schannel = (speech_channel_t *) sh->private_info;
    
     if (zstr(text)) {
          
          
     	status = SWITCH_STATUS_FALSE;
     } else {
          
          
     	status = synth_channel_speak(schannel, text);
     }
     return status;
    }
    
    static switch_status_t synth_channel_speak(speech_channel_t *schannel, const char *text)
    {
          
          
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     mrcp_message_t *mrcp_message = NULL;
     mrcp_generic_header_t *generic_header = NULL;
     mrcp_synth_header_t *synth_header = NULL;
     int warned = 0;
    
     switch_mutex_lock(schannel->mutex);
     if (schannel->state != SPEECH_CHANNEL_READY) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     mrcp_message = mrcp_application_message_create(schannel->unimrcp_session, schannel->unimrcp_channel, SYNTHESIZER_SPEAK);
     if (mrcp_message == NULL) {
          
          
     	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Failed to create SPEAK message\n", schannel->name);
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     /* set generic header fields (content-type) */
     if ((generic_header = (mrcp_generic_header_t *) mrcp_generic_header_prepare(mrcp_message)) == NULL) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     /* good enough way of determining SSML or plain text body */
     if (text_starts_with(text, XML_ID) || text_starts_with(text, SSML_ID)) {
          
          
     	apt_string_assign(&generic_header->content_type, schannel->profile->ssml_mime_type, mrcp_message->pool);
     } else {
          
          
     	apt_string_assign(&generic_header->content_type, MIME_TYPE_PLAIN_TEXT, mrcp_message->pool);
     }
     mrcp_generic_header_property_add(mrcp_message, GENERIC_HEADER_CONTENT_TYPE);
    
     /* set synthesizer header fields (voice, rate, etc.) */
     if ((synth_header = (mrcp_synth_header_t *) mrcp_resource_header_prepare(mrcp_message)) == NULL) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
     /* add params to MRCP message */
     synth_channel_set_params(schannel, mrcp_message, generic_header, synth_header);
    
     /* set body (plain text or SSML) */
     apt_string_assign(&mrcp_message->body, text, schannel->memory_pool);
    
     /* Empty audio queue and send SPEAK to MRCP server */
     audio_queue_clear(schannel->audio_queue);
     if (mrcp_application_message_send(schannel->unimrcp_session, schannel->unimrcp_channel, mrcp_message) == FALSE) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
     /* wait for IN-PROGRESS */
     while (schannel->state == SPEECH_CHANNEL_READY) {
          
          
     	if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
          
          
     		warned = 1;
     		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) SPEAK IN-PROGRESS not received after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
     	}
     }
     if (schannel->state != SPEECH_CHANNEL_PROCESSING) {
          
          
     	status = SWITCH_STATUS_FALSE;
     	goto done;
     }
    
    done:
    
     switch_mutex_unlock(schannel->mutex);
     return status;
    }
    
    
  8. mod_unimrcp.c#synth_speech_read_tts() 函数的核心是执行 mod_unimrcp.c#speech_channel_read()mod_unimrcp.c#speech_channel_read()函数的关键则是检查 tts 的 channel 状态,当其状态符合要求的时候从 channel 的语音流缓冲队列中读取数据。此时回到本节步骤6第3步switch_core.c#switch_core_session_write_frame() 函数会将从 MRCP 服务器传输过来到语音流数据写入到当前会话缓冲,经过编码转化,最终将通过 RTP 发送到终端播放,至此 tts 语音合成处理流程基本结束

    static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags)
    {
          
          
     switch_status_t status = SWITCH_STATUS_SUCCESS;
     switch_size_t bytes_read;
     speech_channel_t *schannel = (speech_channel_t *) sh->private_info;
     bytes_read = *datalen;
     if (speech_channel_read(schannel, data, &bytes_read, (*flags & SWITCH_SPEECH_FLAG_BLOCKING)) == SWITCH_STATUS_SUCCESS) {
          
          
     	/* pad data, if not enough read */
     	if (bytes_read < *datalen) {
          
          
    #ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
     		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) adding %ld bytes of padding\n", schannel->name, *datalen - bytes_read);
    #endif
     		memset((uint8_t *) data + bytes_read, schannel->silence, *datalen - bytes_read);
     	}
     } else {
          
          
     	/* ready for next speak request */
     	speech_channel_set_state(schannel, SPEECH_CHANNEL_READY);
     	*datalen = 0;
     	status = SWITCH_STATUS_BREAK;
     }
    
     /* report negotiated sample rate back to FreeSWITCH */
     sh->native_rate = schannel->rate;
    
     return status;
    }
    
    static switch_status_t speech_channel_read(speech_channel_t *schannel, void *data, switch_size_t *len, int block)
    {
          
          
     switch_status_t status = SWITCH_STATUS_SUCCESS;
    
     if (!schannel || !schannel->mutex || !schannel->audio_queue) {
          
          
     	return SWITCH_STATUS_FALSE;
     }
    
     switch (schannel->state) {
          
          
     case SPEECH_CHANNEL_DONE:
     	/* pull any remaining audio - never blocking */
     	if (audio_queue_read(schannel->audio_queue, data, len, 0) == SWITCH_STATUS_FALSE) {
          
          
     		/* all frames read */
     		status = SWITCH_STATUS_BREAK;
     	}
     	break;
     case SPEECH_CHANNEL_PROCESSING:
     	/* IN-PROGRESS */
     	audio_queue_read(schannel->audio_queue, data, len, block);
     	break;
     default:
     	status = SWITCH_STATUS_BREAK;
     }
    
     return status;
    }
    

猜你喜欢

转载自blog.csdn.net/weixin_45505313/article/details/129623617