我们从今天起,开始做一个实时的回声消除应用。
这个应用的工作环境描述如下:
1】linux系统中
2】系统通过电脑的喇叭播放音乐
3】在播放音乐过程中打开录音功能进行录音
我们的应用的目标:
1】录音要求输出两个基本的音频文件:录音过程中喇叭的音频、MIC拾到的声音(说话声+喇叭回声)
2】对MIC的拾到的声音进行回声消除处理,输出回声消除后的音频文件
先看一下alsa支持的pcm信息
ALSA library version: 1.1.3
PCM stream types:
PLAYBACK
CAPTURE
PCM access types:
MMAP_INTERLEAVED
MMAP_NONINTERLEAVED
MMAP_COMPLEX
RW_INTERLEAVED
RW_NONINTERLEAVED
PCM formats:
S8 (Signed 8 bit)
U8 (Unsigned 8 bit)
S16_LE (Signed 16 bit Little Endian)
S16_BE (Signed 16 bit Big Endian)
U16_LE (Unsigned 16 bit Little Endian)
U16_BE (Unsigned 16 bit Big Endian)
S24_LE (Signed 24 bit Little Endian)
S24_BE (Signed 24 bit Big Endian)
U24_LE (Unsigned 24 bit Little Endian)
U24_BE (Unsigned 24 bit Big Endian)
S32_LE (Signed 32 bit Little Endian)
S32_BE (Signed 32 bit Big Endian)
U32_LE (Unsigned 32 bit Little Endian)
U32_BE (Unsigned 32 bit Big Endian)
FLOAT_LE (Float 32 bit Little Endian)
FLOAT_BE (Float 32 bit Big Endian)
FLOAT64_LE (Float 64 bit Little Endian)
FLOAT64_BE (Float 64 bit Big Endian)
IEC958_SUBFRAME_LE (IEC-958 Little Endian)
IEC958_SUBFRAME_BE (IEC-958 Big Endian)
MU_LAW (Mu-Law)
A_LAW (A-Law)
IMA_ADPCM (Ima-ADPCM)
MPEG (MPEG)
GSM (GSM)
SPECIAL (Special)
S24_3LE (Signed 24 bit Little Endian in 3bytes)
S24_3BE (Signed 24 bit Big Endian in 3bytes)
U24_3LE (Unsigned 24 bit Little Endian in 3bytes)
U24_3BE (Unsigned 24 bit Big Endian in 3bytes)
S20_3LE (Signed 20 bit Little Endian in 3bytes)
S20_3BE (Signed 20 bit Big Endian in 3bytes)
U20_3LE (Unsigned 20 bit Little Endian in 3bytes)
U20_3BE (Unsigned 20 bit Big Endian in 3bytes)
S18_3LE (Signed 18 bit Little Endian in 3bytes)
S18_3BE (Signed 18 bit Big Endian in 3bytes)
U18_3LE (Unsigned 18 bit Little Endian in 3bytes)
U18_3BE (Unsigned 18 bit Big Endian in 3bytes)
G723_24 (G.723 (ADPCM) 24 kbit/s, 8 samples in 3 bytes)
G723_24_1B (G.723 (ADPCM) 24 kbit/s, 1 sample in 1 byte)
G723_40 (G.723 (ADPCM) 40 kbit/s, 8 samples in 3 bytes)
G723_40_1B (G.723 (ADPCM) 40 kbit/s, 1 sample in 1 byte)
DSD_U8 (Direct Stream Digital, 1-byte (x8), oldest bit in MSB)
DSD_U16_LE (Direct Stream Digital, 2-byte (x16), little endian, oldest bits in MSB)
DSD_U32_LE (Direct Stream Digital, 4-byte (x32), little endian, oldest bits in MSB)
DSD_U16_BE (Direct Stream Digital, 2-byte (x16), big endian, oldest bits in MSB)
DSD_U32_BE (Direct Stream Digital, 4-byte (x32), big endian, oldest bits in MSB)
PCM subformats:
STD (Standard)
PCM states:
OPEN
SETUP
PREPARED
RUNNING
XRUN
DRAINING
PAUSED
SUSPENDED
根据以上信息,我们进一步细化目标,我们录音的采样率定为8000,sample长度定为16位,小端。
PCM handle name = 'default'
PCM state = PREPARED
access type = RW_INTERLEAVED
format = 'S16_LE' (Signed 16 bit Little Endian)
subformat = 'STD' (Standard)
channels = 1
rate = 7999 bps
period time = 256000 us
period size = 2048 frames
buffer time = 256000 us
buffer size = 2097152 frames
periods per buffer = 1024 frames
exact rate = 7999/1 bps
significant bits = 16
tick time = 0 us
is batch = 0
is block transfer = 1
is double = 0
is half duplex = 0
is joint duplex = 0
can overrange = 0
can mmap = 0
can pause = 1
can resume = 0
can sync start = 0
实时处理程序代码
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
#include "signal_processing_library.h"
#include "noise_suppression_x.h"
#include "noise_suppression.h"
#include "gain_control.h"
#include "echo_cancellation.h"
#define SAMPLES 8000
#define FRAMES 160
void webRtcNsProc(NsHandle *pNS_inst,short * pData, FILE *outfilenameNs,int frames,short * pOutData,int* filter_state1,int* filter_state12,int* Synthesis_state1,int* Synthesis_state12)
{
int len = frames*2;
short shInL[160],shInH[160];
short shOutL[160] = {0},shOutH[160] = {0};
//fprintf(stderr,"NS shuBufferIn[] data: %d ... %d \n",shuBufferIn[0],shuBufferIn[79]);
//首先需要使用滤波函数将音频数据分高低频,以高频和低频的方式传入降噪函数内部
WebRtcSpl_AnalysisQMF(pData,frames,shInL,shInH,filter_state1,filter_state12);
//将需要降噪的数据以高频和低频传入对应接口,同时需要注意返回数据也是分高频和低频
if (0 == WebRtcNs_Process(pNS_inst ,shInL ,shInH ,shOutL , shOutH))
{
short shBufferOut[320];
//如果降噪成功,则根据降噪后高频和低频数据传入滤波接口,然后用将返回的数据写入文件
WebRtcSpl_SynthesisQMF(shOutL,shOutH,160,shBufferOut,Synthesis_state1,Synthesis_state12);
memcpy(pOutData,shBufferOut,frames*sizeof(short));
}
if (NULL == outfilenameNs)
{
printf("open NS out file err! \n");
}
fwrite(pOutData, 1, len, outfilenameNs);
}
void WebRtcAgcProc(void *agcHandle,short * pData, FILE * outfilename,int frames,short * pOutData)
{
int len = frames*sizeof(short); // len=2*frames
int micLevelIn = 0;
int micLevelOut = 0;
// memset(pData, 0, len);
int inMicLevel = micLevelOut;
int outMicLevel = 0;
uint8_t saturationWarning;
int nAgcRet = WebRtcAgc_Process(agcHandle, pData, NULL, frames, pOutData,NULL, inMicLevel, &outMicLevel, 0, &saturationWarning);
if (nAgcRet != 0)
{
printf("failed in WebRtcAgc_Process %d \n",nAgcRet);
}
micLevelIn = outMicLevel;
fwrite(pOutData, 1, len, outfilename);
}
void WebRtcAecProc(void *aecmInst,short *near_frame,short* far_frame,FILE * fp_out,int frames,short * out_frame)
{
int len = FRAMES*sizeof(short);
printf("aec_proc_near_frame data: %d %d %d ... %d %d %d\n",*near_frame,*(near_frame + 1),*(near_frame + 2),*(near_frame + frames - 3),*(near_frame + frames -2),*(near_frame + frames - 1));
WebRtcAec_BufferFarend(aecmInst, far_frame, FRAMES);//对参考声音(回声)的处理
WebRtcAec_Process(aecmInst, near_frame, NULL, out_frame, NULL, frames,-3,0);//回声消除
printf("aec_out_proc_frame data: %d %d %d ... %d %d %d\n",*out_frame,*(out_frame + 1),*(out_frame + 2),*(out_frame + frames - 3),*(out_frame + frames - 2),*(out_frame + frames - 1));
fwrite(out_frame, 1, len, fp_out);
}
int main()
{
long loops; //一个长整型变量,
int rc,rc1,rc2,rc3,rc4,rc5,rc6; //一个int变量 ,用来存放 snd_pcm_open(访问硬件)的返回值
int size; //一个int变量
snd_pcm_t * handle; // 一个指向snd_pcm_t的指针
snd_pcm_hw_params_t * params; // 一个指向 snd_pcm_hw_params_t的指针
unsigned int val; // 无符号整型变量 ,用来存放录音时候的采样率
int dir; // 整型变量
snd_pcm_uframes_t frames; // snd_pcm_uframes_t 型变量
short * buffer = NULL; // 一个字符型指针
short * buffertemp1 = NULL; // 一个临时字符型指针
short * buffertemp2 = NULL; // 一个临时字符型指针
short * buffertemp3 = NULL; // 一个临时字符型指针
short * buffertemp4 = NULL; // 一个临时字符型指针
short * buffertemp5 = NULL; // 一个临时字符型指针
short * buffertempmicin = NULL; // 一个临时字符型指针
short * buffertempspeaker = NULL; // 一个临时字符型指针
short * bufferAgcOutData = NULL; // 指向Agc输出数据地址的指针
short * bufferNsOutData = NULL; // 指向NS输出数据地址的指针
short * bufferAecOutData = NULL; // 指向NS输出数据地址的指针
short * bufferAecMicinData = NULL; // 指向NS输出数据地址的指针
short * bufferAecSpeakerData = NULL; // 指向NS输出数据地址的指针
int filter_state1[6],filter_state12[6];
int Synthesis_state1[6],Synthesis_state12[6];
memset(filter_state1,0,sizeof(filter_state1));
memset(filter_state12,0,sizeof(filter_state12));
memset(Synthesis_state1,0,sizeof(Synthesis_state1));
memset(Synthesis_state12,0,sizeof(Synthesis_state12));
FILE * out_fd1,*out_fd2,*out_fd3,*out_fd4,*out_fd5,*out_fd6,*out_fdAgc,*out_fdNs,*out_fdAec; // 一个指向文件的指针
out_fd1 = fopen("out_pcm1.raw","wb+");
out_fd2 = fopen("out_pcm2.raw","wb+");
out_fd3 = fopen("out_pcm3.raw","wb+");
out_fd4 = fopen("out_pcm4.raw","wb+");
out_fd5 = fopen("out_pcm5.raw","wb+");
out_fd6 = fopen("out_pcm6.raw","wb+");
out_fdAgc = fopen("out_pcmAgc.raw","wb+");
out_fdNs = fopen("out_pcmNs.raw","wb+");
out_fdAec = fopen("out_pcmAec.raw","wb+");
/* 将流与文件之间的关系建立起来,文件名为 out_pcm.raw,w是以文本方式打开文件,wb是二进制方式打开文件wb+ 读写打开或建立一个二进制文件,允许读和写。*/
/* open PCM device for recording (capture). */
// 访问硬件,并判断硬件是否访问成功
rc = snd_pcm_open(&handle, "default",SND_PCM_STREAM_CAPTURE,0);
if( rc < 0 )
{
fprintf(stderr,"unable to open pcm device: %s\n",
snd_strerror(rc));
exit(1);
}
/* allocate a hardware parameters object */
// 分配一个硬件变量对象
snd_pcm_hw_params_alloca(¶ms);
/* fill it with default values. */
// 按照默认设置对硬件对象进行设置
snd_pcm_hw_params_any(handle,params);
/* set the desired hardware parameters */
/* interleaved mode 设置数据为交叉模式*/
snd_pcm_hw_params_set_access(handle,params,SND_PCM_ACCESS_RW_INTERLEAVED);
/* signed 16-bit little-endian format */
// 设置数据编码格式为PCM、有符号、16bit、LE格式
snd_pcm_hw_params_set_format(handle,params,SND_PCM_FORMAT_S16_LE);
// 设置声道数量
snd_pcm_hw_params_set_channels(handle,params,6);
/* sampling rate */
// 设置采样率
val = SAMPLES;
snd_pcm_hw_params_set_rate_near(handle,params,&val,&dir);
/* set period size */
// 周期长度(帧数)
frames = FRAMES;
snd_pcm_hw_params_set_period_size_near(handle,params,&frames,&dir);
/* write parameters to the driver */
// 将配置写入驱动程序中
// 判断是否已经配置正确
rc = snd_pcm_hw_params(handle,params);
if ( rc < 0 )
{
fprintf(stderr,"unable to set hw parameters: %s\n",snd_strerror(rc));
exit(1);
}
/* use a buffer large enough to hold one period */
// 配置一个缓冲区用来缓冲数据,缓冲区要足够大,此处看意思应该是只配置了
// 够两个声道用的缓冲内存
snd_pcm_hw_params_get_period_size(params,&frames,&dir);
size = frames * 12; /* 2 bytes/sample, 2channels */
buffer = ( short * ) malloc(size);
buffertemp4 = ( short * )malloc(frames*sizeof(short));
bufferAgcOutData = ( short * )malloc(frames*sizeof(short));
bufferNsOutData = ( short * )malloc(frames*sizeof(short));
bufferAecOutData = ( short * )malloc(frames*sizeof(short));
bufferAecMicinData = ( short * )malloc(frames*sizeof(short));
bufferAecSpeakerData = ( short * )malloc(frames*sizeof(short));
// 记录声音的长度,单位uS
snd_pcm_hw_params_get_period_time(params, &val, &dir);
loops = 3000000 / val;
void *aecmInst = NULL;
WebRtcAec_Create(&aecmInst);
WebRtcAec_Init(aecmInst, SAMPLES, SAMPLES);
AecConfig config;
config.nlpMode = kAecNlpConservative;
WebRtcAec_set_config(aecmInst, config);
void *agcHandle = NULL;
WebRtcAgc_Create(&agcHandle);
int minLevel = 0;
int maxLevel = 255;
int agcMode = 3; // 3 - Fixed Digital Gain 0dB
WebRtcAgc_Init(agcHandle, minLevel, maxLevel, agcMode, SAMPLES);
WebRtcAgc_config_t agcConfig;
agcConfig.compressionGaindB = 20;
agcConfig.limiterEnable = 1;
agcConfig.targetLevelDbfs = 3;
WebRtcAgc_set_config(agcHandle, agcConfig);
NsHandle *pNS_inst = NULL;
int nMode = 1;
if (0 != WebRtcNs_Create(&pNS_inst))
{
printf("Noise_Suppression WebRtcNs_Create err! \n");
}
if (0 != WebRtcNs_Init(pNS_inst,SAMPLES))
{
printf("Noise_Suppression WebRtcNs_Init err! \n");
}
if (0 != WebRtcNs_set_policy(pNS_inst,nMode))
{
printf("Noise_Suppression WebRtcNs_set_policy err! \n");
}
while( loops > 0 )
{
loops--;
rc = snd_pcm_readi(handle,buffer,frames); // 读取录音数据
if ( rc == -EPIPE )
{
/* EPIPE means overrun */
fprintf(stderr,"overrun occured\n");
snd_pcm_prepare(handle);
}
else if ( rc < 0 )
{
fprintf(stderr,"error from read: %s\n",snd_strerror(rc));
}
else if ( rc != (int)frames)
{
fprintf(stderr,"short read, read %d frames\n",rc);
}
// 将音频数据写入文件,把buffer中的数据写入到out-fd中
buffertemp1 = buffer;
buffertemp2 = buffer;
buffertemp5 = buffertemp4;
buffertempmicin = bufferAecMicinData;
buffertempspeaker = bufferAecSpeakerData;
int loopfor;
for(loopfor = 1;loopfor <= frames;loopfor++)
{
int loopwhile = 6;
buffertemp2++;
buffertemp3 = buffertemp1;
rc1 = fwrite(buffertemp3, 1, 2, out_fd1);
buffertemp3 = buffertemp3 + 1;
rc2 = fwrite(buffertemp3, 1, 2, out_fd2);
*buffertemp5 = *buffertemp3;
buffertemp5++;
buffertemp3 = buffertemp3 + 1;
rc3 = fwrite(buffertemp3, 1, 2, out_fd3);
*buffertempmicin = *buffertemp3;
buffertempmicin++;
buffertemp3 = buffertemp3 + 1;
rc4 = fwrite(buffertemp3, 1, 2, out_fd4);
buffertemp3 = buffertemp3 + 1;
rc5 = fwrite(buffertemp3, 1, 2, out_fd5);
buffertemp3 = buffertemp3 + 1;
rc6 = fwrite(buffertemp3, 1, 2, out_fd6);
*buffertempspeaker = *buffertemp3;
buffertempspeaker++;
buffertemp1 = buffertemp1 + 6;
}
WebRtcAgcProc(agcHandle,buffertemp4,out_fdAgc,frames,bufferAgcOutData);
// WebRtcAec_BufferFarend(aecmInst, far_frame, FRAMES);//对参考声音(回声)的处理
WebRtcAecProc(aecmInst,bufferAecMicinData,bufferAecSpeakerData,out_fdAec,frames,bufferAecOutData);
webRtcNsProc(pNS_inst,bufferAgcOutData,out_fdNs,frames,bufferNsOutData,filter_state1,filter_state12,Synthesis_state1,Synthesis_state12);
if ( rc != frames )
{
fprintf(stderr,"short write: wrote %d bytes\n \n",rc);
}
}
WebRtcNs_Free(pNS_inst);
WebRtcAgc_Free(agcHandle);
WebRtcAec_Free(aecmInst);
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
free(buffertemp1);
free(buffertemp2);
free(buffertemp3);
free(buffertempmicin);
free(buffertempspeaker);
free(bufferAgcOutData);
free(bufferNsOutData);
free(bufferAecOutData);
free(bufferAecMicinData);
free(bufferAecSpeakerData);
fclose(out_fd1);
fclose(out_fd2);
fclose(out_fd3);
fclose(out_fd4);
fclose(out_fd5);
fclose(out_fd6);
fclose(out_fdAgc);
fclose(out_fdNs);
fclose(out_fdAec);
}
上面的程序配合之前安装的动态库,就完成了相应的AEC、NS、AGC处理