FFmpeg音频重采样

planar和packet

ffmpeg定义了以下音频数据的存储方式和存储格式：

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double

    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
    AV_SAMPLE_FMT_S64,         ///< signed 64 bits
    AV_SAMPLE_FMT_S64P,        ///< signed 64 bits, planar

    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

存储格式就是8位，16位，32位，浮点等格式，存储方式则有planar和packet类型，planar为平面存储，packet为连续存储，以左右声道LR为例：

planar的存储方式

LLLLLLLLL···

RRRRRRRRR···

packet的存储方式

LRLRLRLRLRLRLRLRLR···

函数参数详解

swr_alloc_set_opts

/**
 * Allocate SwrContext if needed and set/reset common parameters.
 *
 * This function does not require s to be allocated with swr_alloc(). On the
 * other hand, swr_alloc() can use swr_alloc_set_opts() to set the parameters
 * on the allocated context.
 *
 * @param s               existing Swr context if available, or NULL if not
 * @param out_ch_layout   output channel layout (AV_CH_LAYOUT_*)
 * @param out_sample_fmt  output sample format (AV_SAMPLE_FMT_*).
 * @param out_sample_rate output sample rate (frequency in Hz)
 * @param in_ch_layout    input channel layout (AV_CH_LAYOUT_*)
 * @param in_sample_fmt   input sample format (AV_SAMPLE_FMT_*).
 * @param in_sample_rate  input sample rate (frequency in Hz)
 * @param log_offset      logging level offset
 * @param log_ctx         parent logging context, can be NULL
 *
 * @see swr_init(), swr_free()
 * @return NULL on error, allocated context otherwise
 */
struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
                                      int log_offset, void *log_ctx);

swr_init

/**
 * Initialize context after user parameters have been set.
 * @note The context must be configured using the AVOption API.
 *
 * @see av_opt_set_int()
 * @see av_opt_set_dict()
 *
 * @param[in,out]   s Swr context to initialize
 * @return AVERROR error code in case of failure.
 */
int swr_init(struct SwrContext *s);

swr_convert

/** Convert audio.
 *
 * in and in_count can be set to 0 to flush the last few samples out at the
 * end.
 *
 * If more input is provided than output space, then the input will be buffered.
 * You can avoid this buffering by using swr_get_out_samples() to retrieve an
 * upper bound on the required number of output samples for the given number of
 * input samples. Conversion will run directly without copying whenever possible.
 *
 * @param s         allocated Swr context, with parameters set
 * @param out       output buffers, only the first one need be set in case of packed audio
 * @param out_count amount of space available for output in samples per channel
 * @param in        input buffers, only the first one need to be set in case of packed audio
 * @param in_count  number of input samples available in one channel
 *
 * @return number of samples output per channel, negative value on error
 */
int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
                                const uint8_t **in , int in_count);

swr_free

/**
 * @}
 *
 * @name SwrContext destructor functions
 * @{
 */

/**
 * Free the given SwrContext and set the pointer to NULL.
 *
 * @param[in] s a pointer to a pointer to Swr context
 */
void swr_free(struct SwrContext **s);

重采样函数封装

int resample(AVCodecContext *cc, AVFrame *avf, uint8_t *out_pcm,
	enum AVSampleFormat out_sample_format, int out_sample_rate, uint64_t out_channel_layout) {

	int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);

	SwrContext *swr_ctx = swr_alloc_set_opts(NULL,
		out_channel_layout, out_sample_format, out_sample_rate,
		cc->channel_layout, cc->sample_fmt, cc->sample_rate,
		NULL, NULL);
	int r = swr_init(swr_ctx);

	// resample
	int samples_per_chl = swr_convert(swr_ctx, &out_pcm, AVENGINE_AUDIO_MAX_FRAME_SIZE,
		(const uint8_t **)avf->data, avf->nb_samples);

	// real convert pcm data size(theoretically be equal to max_out_pcm_size)
	int samples_all_chl = av_samples_get_buffer_size(NULL,
		out_channels, samples_per_chl, out_sample_format, 1);

	swr_free(&swr_ctx);
	return samples_all_chl;
}

cc：音频编解码器上下文

avf：音频帧的AVFrame

out_pcm：输出重采样后的PCM数据

out_sample_format：重采样数据格式

out_sample_rate：重采样采样率

out_channel_layout：重采样通道

return：重采样后所有通道的数据和

遇到的坑总结

当指定resample输出的格式为packet类型时，用一个一级指针存储连续的LRLRLR数据即可，此时(const uint8_t **)avframe->data也可替换为avframe->extended_data。

// resample
// 当指定resample输出的格式为packet类型时，用一个一级指针存储连续的LRLRLR数据即可
// 此时(const uint8_t **)avframe->data也可替换为avframe->extended_data。
uint8_t *out_pcm;
int samples_per_chl = swr_convert(swr_ctx, &out_pcm, AVENGINE_AUDIO_MAX_FRAME_SIZE,
	(const uint8_t **)avframe->data, avframe->nb_samples);

当指定resample输出的格式为planar类型时，用一个指针数组的每一个元素指针存储分开的LLL, RRR数据，指针数组的大小根据声道数确定，最大为8。如果输出的格式为planar类型，使用一级指针会奔溃。

// resample	
uint8_t *out_pcm[audio_channels];
// 当指定resample输出的格式为planar类型时，用一个指针数组的每一个元素指针存储分开
// 的LLL, RRR数据，指针数组的大小根据声道数确定，最大为8。如果输出的格式为planar
// 类型，使用一级指针会奔溃。
int samples_per_chl = swr_convert(swr_ctx, out_pcm, AVENGINE_AUDIO_MAX_FRAME_SIZE,
	(const uint8_t **)avframe->data, avframe->nb_samples);

KayChanGeek

发布了131 篇原创文章 · 获赞 195 · 访问量 38万+

他的留言板关注

相关函数