音视频入门基础:FLV专题(19)——FFmpeg源码中,解码Audio Tag的AudioTagHeader,并提取AUDIODATA的实现

一、引言

从《音视频入门基础:FLV专题(18)——Audio Tag简介》可以知道,未加密的情况下,FLV文件中的一个Audio Tag = Tag header + AudioTagHeader + AUDIODATA。本文讲述FFmpeg源码中是怎样解码Audio Tag的AudioTagHeader ,拿到里面的信息的,以及是怎样提取AUDIODATA的(以音频压缩编码格式为AAC为例)。

二、flv_read_packet函数

从《音视频入门基础:FLV专题(8)——FFmpeg源码中,解码Tag header的实现》可以知道,FFmpeg源码中使用flv_read_packet函数来读取每个Tag的信息,该函数的前半部分实现了解码Tag header,获取其TagType属性的功能。然后根据TagType属性的值,判断该Tag为音频Tag、视频Tag还是脚本Tag。根据Tag的类型分别执行不同的解码操作:

    if (type == FLV_TAG_TYPE_AUDIO) {
        //...
    } else if (type == FLV_TAG_TYPE_VIDEO) {
        //...
    }else if (type == FLV_TAG_TYPE_META) {
        //...
    }else{
        //...
    }
    //...

如果在flv_read_packet函数的前半部分判断出该Tag为Audio Tag,flv_read_packet函数中会执行如下逻辑解码Audio Tag的AudioTagHeader:

    if (type == FLV_TAG_TYPE_AUDIO) {
        stream_type = FLV_STREAM_TYPE_AUDIO;
        flags    = avio_r8(s->pb);
        size--;
    } 

//...

    if (stream_type == FLV_STREAM_TYPE_AUDIO) {
        int bits_per_coded_sample;
        channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1;
        sample_rate = 44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >>
                                FLV_AUDIO_SAMPLERATE_OFFSET) >> 3;
        bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;
        if (!av_channel_layout_check(&st->codecpar->ch_layout) ||
            !st->codecpar->sample_rate ||
            !st->codecpar->bits_per_coded_sample) {
            av_channel_layout_default(&st->codecpar->ch_layout, channels);
            st->codecpar->sample_rate           = sample_rate;
            st->codecpar->bits_per_coded_sample = bits_per_coded_sample;
        }
        if (!st->codecpar->codec_id) {
            flv_set_audio_codec(s, st, st->codecpar,
                                flags & FLV_AUDIO_CODECID_MASK);
            flv->last_sample_rate =
            sample_rate           = st->codecpar->sample_rate;
            flv->last_channels    =
            channels              = st->codecpar->ch_layout.nb_channels;
        } else {
            AVCodecParameters *par = avcodec_parameters_alloc();
            if (!par) {
                ret = AVERROR(ENOMEM);
                goto leave;
            }
            par->sample_rate = sample_rate;
            par->bits_per_coded_sample = bits_per_coded_sample;
            flv_set_audio_codec(s, st, par, flags & FLV_AUDIO_CODECID_MASK);
            sample_rate = par->sample_rate;
            avcodec_parameters_free(&par);
        }
    }

//...

    if (st->codecpar->codec_id == AV_CODEC_ID_AAC ||
        st->codecpar->codec_id == AV_CODEC_ID_H264 ||
        st->codecpar->codec_id == AV_CODEC_ID_MPEG4 ||
        st->codecpar->codec_id == AV_CODEC_ID_HEVC ||
        st->codecpar->codec_id == AV_CODEC_ID_AV1 ||
        st->codecpar->codec_id == AV_CODEC_ID_VP9) {
        int type = 0;
        if (enhanced_flv && stream_type == FLV_STREAM_TYPE_VIDEO) {
            type = flags & 0x0F;
        } else {
            type = avio_r8(s->pb);
            size--;
        }

        if (size < 0) {
            ret = AVERROR_INVALIDDATA;
            goto leave;
        }

        if (enhanced_flv && stream_type == FLV_STREAM_TYPE_VIDEO && flv->meta_color_info_flag) {
            flv_update_video_color_info(s, st); // update av packet side data
            flv->meta_color_info_flag = 0;
        }

        if (st->codecpar->codec_id == AV_CODEC_ID_H264 || st->codecpar->codec_id == AV_CODEC_ID_MPEG4 ||
            (st->codecpar->codec_id == AV_CODEC_ID_HEVC && type == PacketTypeCodedFrames)) {
            // sign extension
            int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
            pts = av_sat_add64(dts, cts);
            if (cts < 0) { // dts might be wrong
                if (!flv->wrong_dts)
                    av_log(s, AV_LOG_WARNING,
                        "Negative cts, previous timestamps might be wrong.\n");
                flv->wrong_dts = 1;
            } else if (FFABS(dts - pts) > 1000*60*15) {
                av_log(s, AV_LOG_WARNING,
                       "invalid timestamps %"PRId64" %"PRId64"\n", dts, pts);
                dts = pts = AV_NOPTS_VALUE;
            }
            size -= 3;
        }
        if (type == 0 && (!st->codecpar->extradata || st->codecpar->codec_id == AV_CODEC_ID_AAC ||
            st->codecpar->codec_id == AV_CODEC_ID_H264 || st->codecpar->codec_id == AV_CODEC_ID_HEVC ||
            st->codecpar->codec_id == AV_CODEC_ID_AV1 || st->codecpar->codec_id == AV_CODEC_ID_VP9)) {
            AVDictionaryEntry *t;

            if (st->codecpar->extradata) {
                if ((ret = flv_queue_extradata(flv, s->pb, stream_type, size)) < 0)
                    return ret;
                ret = FFERROR_REDO;
                goto leave;
            }
            if ((ret = flv_get_extradata(s, st, size)) < 0)
                return ret;

            /* Workaround for buggy Omnia A/XE encoder */
            t = av_dict_get(s->metadata, "Encoder", NULL, 0);
            if (st->codecpar->codec_id == AV_CODEC_ID_AAC && t && !strcmp(t->value, "Omnia A/XE"))
                st->codecpar->extradata_size = 2;

            ret = FFERROR_REDO;
            goto leave;
        }
    }

//...


下面我们分析上述代码块中解码Audio Tag的AudioTagHeader的原理。

三、flv_read_packet函数中解码Audio Tag的AudioTagHeader的实现

上述代码块中,首先通过avio_r8函数获取AudioTagHeader的第一个字节,也就是SoundFormat(占4位) + SoundRate(占2位) + SoundSize(占1位) + SoundType(占1位),存贮到局部变量flags中。关于avio_r8函数的用法可以参考:《FFmpeg源码:avio_r8、avio_rl16、avio_rl24、avio_rl32、avio_rl64函数分析》:

    if (type == FLV_TAG_TYPE_AUDIO) {
        stream_type = FLV_STREAM_TYPE_AUDIO;
        flags    = avio_r8(s->pb);
        size--;
    } 

FLV文件相关的宏,定义在libavformat/flv.h中:

/* offsets for packed values */
#define FLV_AUDIO_SAMPLESSIZE_OFFSET 1
#define FLV_AUDIO_SAMPLERATE_OFFSET  2
#define FLV_AUDIO_CODECID_OFFSET     4

#define FLV_VIDEO_FRAMETYPE_OFFSET   4


/* bitmasks to isolate specific values */
#define FLV_AUDIO_CHANNEL_MASK    0x01
#define FLV_AUDIO_SAMPLESIZE_MASK 0x02
#define FLV_AUDIO_SAMPLERATE_MASK 0x0c
#define FLV_AUDIO_CODECID_MASK    0xf0

通过下面语句将AudioTagHeader的SoundType属性提取出来,转换得到音频声道数目。将频声道数目存贮到局部变量channels中:

        channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1;

通过下面语句将AudioTagHeader的SoundRate属性提取出来,转换得到音频采样频率。将音频采样频率存贮到局部变量sample_rate中:

        sample_rate = 44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >>
                                FLV_AUDIO_SAMPLERATE_OFFSET) >> 3;

通过下面语句将AudioTagHeader的SoundSize属性提取出来,转换得到Bit depth。将Bit depth存贮到局部变量bits_per_coded_sample中:

        bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;

将上述得到的音频声道数目赋值给st->codecpar->ch_layout,将音频采样频率赋值给st->codecpar->sample_rate,将Bit depth赋值给st->codecpar->bits_per_coded_sample:

        if (!av_channel_layout_check(&st->codecpar->ch_layout) ||
            !st->codecpar->sample_rate ||
            !st->codecpar->bits_per_coded_sample) {
            av_channel_layout_default(&st->codecpar->ch_layout, channels);
            st->codecpar->sample_rate           = sample_rate;
            st->codecpar->bits_per_coded_sample = bits_per_coded_sample;
        }

将AudioTagHeader的SoundFormat属性提取出来,转换得到音频压缩编码格式。将音频压缩编码格式赋值给st->codecpar->codec_id中:

        if (!st->codecpar->codec_id) {
            flv_set_audio_codec(s, st, st->codecpar,
                                flags & FLV_AUDIO_CODECID_MASK);
        //...
        }

由于st等价于s->streams[stream_index],stream_index为该音频流的流索引,指针s指向AVFormatContext变量。所以通过上面两步的赋值操作后,可以在flv_read_packet函数外部,通过(s->streams[stream_index])->codecpar拿到该音频的音频声道数目、音频采样频率、Bit depth、音频压缩编码格式。

当FLV文件中的音频为AAC格式时,AudioTagHeader包含AACPacketType属性。通过语句:type = avio_r8(s->pb) 获取AACPacketType属性:

    if (st->codecpar->codec_id == AV_CODEC_ID_AAC ||
        st->codecpar->codec_id == AV_CODEC_ID_H264 ||
        st->codecpar->codec_id == AV_CODEC_ID_MPEG4 ||
        st->codecpar->codec_id == AV_CODEC_ID_HEVC ||
        st->codecpar->codec_id == AV_CODEC_ID_AV1 ||
        st->codecpar->codec_id == AV_CODEC_ID_VP9) {
        int type = 0;
        if (enhanced_flv && stream_type == FLV_STREAM_TYPE_VIDEO) {
            type = flags & 0x0F;
        } else {
            type = avio_r8(s->pb);
            size--;
        }
//...
}

至此,AudioTagHeader中的属性已被全部解析出来。然后flv_read_packet函数会继续往下执行,提取Audio Tag的AUDIODATA。

四、提取Audio Tag的AUDIODATA

从《音视频入门基础:FLV专题(18)——Audio Tag简介》可以知道,未加密的情况下,FLV文件中的一个Audio Tag = Tag header + AudioTagHeader + AUDIODATA。AUDIODATA为AudioTagBody。FLV文件的音频压缩编码格式为AAC时,AudioTagBody为AACAUDIODATA,当AACPacketType值为0时,AACAUDIODATA为AudioSpecificConfig;当AACPacketType值为1时,AACAUDIODATA包含一帧AAC音频压缩数据,所以下面得分情况讨论。

(一)AACPacketType的值为0

AACPacketType的值为0时,AACAUDIODATA为AudioSpecificConfig。flv_read_packet函数通过下面代码提取AudioSpecificConfig:

        if (type == 0 && (!st->codecpar->extradata || st->codecpar->codec_id == AV_CODEC_ID_AAC ||
            st->codecpar->codec_id == AV_CODEC_ID_H264 || st->codecpar->codec_id == AV_CODEC_ID_HEVC ||
            st->codecpar->codec_id == AV_CODEC_ID_AV1 || st->codecpar->codec_id == AV_CODEC_ID_VP9)) {
            AVDictionaryEntry *t;
 
            if (st->codecpar->extradata) {
                if ((ret = flv_queue_extradata(flv, s->pb, stream_type, size)) < 0)
                    return ret;
                ret = FFERROR_REDO;
                goto leave;
            }
            if ((ret = flv_get_extradata(s, st, size)) < 0)
                return ret;
 
            /* Workaround for buggy Omnia A/XE encoder */
            t = av_dict_get(s->metadata, "Encoder", NULL, 0);
            if (st->codecpar->codec_id == AV_CODEC_ID_AAC && t && !strcmp(t->value, "Omnia A/XE"))
                st->codecpar->extradata_size = 2;
 
            ret = FFERROR_REDO;
            goto leave;
        }

上面的代码块中,局部变量type存贮AudioTagHeader的AACPacketType属性。当AACPacketType值为0并且音频压缩编码格式为AAC并且还未获取AudioSpecificConfig时,会执行下面的代码块,从而拿到AudioSpecificConfig。下面代码块的作用是:读取该Audio Tag的AudioSpecificConfig,将其存贮到s->streams[stream_index]->codecpar->extradata指向的缓冲区中。其中stream_index为该路音频流在FLV文件中的流索引,size为AudioSpecificConfig所占的存贮空间(以字节为单位):

            if ((ret = flv_get_extradata(s, st, size)) < 0)
                return ret;

然后之后在flv_read_packet函数外部会通过decode_audio_specific_config_gb函数解码上述提取出来的AudioSpecificConfig,具体可以参考:《音视频入门基础:AAC专题(12)——FFmpeg源码中,解码AudioSpecificConfig的实现》。

(二)AACPacketType的值为1

当AACPacketType值为1时,AACAUDIODATA包含一帧AAC音频压缩数据。flv_read_packet函数通过下面代码提取AUDIODATA,即通过av_get_packet函数读取一帧AAC音频压缩数据,保存到pkt->data指向的缓冲区中。关于av_get_packet函数可以参考:《FFmpeg源码:append_packet_chunked、av_get_packet、av_append_packet函数分析》。这样在执行下面的代码块后,pkt->data会得到该帧的实际的压缩后的AAC音频数据;pkt->dts会得到该帧的解码时间戳,解码时间戳来源于Tag header的Timestamp和TimestampExtended属性,具体可以参考:《音视频入门基础:FLV专题(8)——FFmpeg源码中,解码Tag header的实现》;pkt->pts会得到该帧的显示时间戳,对于音频,显示时间戳等于解码时间戳:

    ret = av_get_packet(s->pb, pkt, size);
    if (ret < 0)
        return ret;
    pkt->dts          = dts;
    pkt->pts          = pts == AV_NOPTS_VALUE ? dts : pts;
    pkt->stream_index = st->index;
    pkt->pos          = pos;

猜你喜欢

转载自blog.csdn.net/u014552102/article/details/143322059