一、引言
从《音视频入门基础:RTP专题(3)——SDP简介》可以知道,一个SDP会话描述由若干行文本组成,每行的格式为:<type>=<value>。
当<type>的值为'a',并且a=fmtp时,表示该行允许以 SDP 所不具备的方式传递特定格式的参数。此时该行的格式为:a=fmtp:<format> <format specific parameters>
对于H.264视频,该行格式一般为:a=fmtp:XX packetization-mode=X; sprop-parameter-sets=XXX,XXX; profile-level-id=XXX。
其中,
1.packetization-mode属性表示RTP有效载荷类型的属性或接收器实现的功能。当packetization-mode值等于0或不存在packetization-mode时,表示必须使用单一的NAL模式;当packetization-mode的值等于1时,表示必须使用非交错模式;当 packetization-mode的值等于 2 时,表示必须使用交错模式。
2.profile-level-id属性基base16(十六进制)表示法在序列参数集 的NAL单元中指定profile_idc、constraint_set0_flag、constraint_set1_flag、constraint_set2_flag、constraint_set3_flag、constraint_set4_flag、constraint_set5_flag 、reserved_zero_2bits和level_idc。
3.sprop-parameter-sets属性的'='号之后携带以BASE64编码和逗号分隔的SPS和PPS。
本文讲述FFmpeg源码中,是如何将上述属性中的内容解析出来的。
二、总调用堆栈
由《音视频入门基础:RTP专题(5)——FFmpeg源码中,解析SDP的实现》可以知道,FFmpeg源码中通过sdp_parse_line函数解析SDP中的一行数据。当a=fmtp时,sdp_parse_line函数中会执行下面代码块,通过语句parse_fmtp(s, rt, payload_type, buf)对fmtp进行解析:
else if (av_strstart(p, "fmtp:", &p) ||
av_strstart(p, "framesize:", &p)) {
// let dynamic protocol handlers have a stab at the line.
get_word(buf1, sizeof(buf1), &p);
payload_type = atoi(buf1);
if (s1->seen_rtpmap) {
parse_fmtp(s, rt, payload_type, buf);
} else {
s1->seen_fmtp = 1;
av_strlcpy(s1->delayed_fmtp, buf, sizeof(s1->delayed_fmtp));
}
}
parse_fmtp函数定义如下。可以看到parse_fmtp函数中会执行语句
rtsp_st->dynamic_handler->parse_sdp_a_line(s, rtsp_st->stream_index,rtsp_st->dynamic_protocol_context, line)。parse_sdp_a_line是函数指针,当解析的是a=fmtp,并且该行包含H.264视频信息时,parse_sdp_a_line指向parse_h264_sdp_line函数:
static void parse_fmtp(AVFormatContext *s, RTSPState *rt,
int payload_type, const char *line)
{
int i;
for (i = 0; i < rt->nb_rtsp_streams; i++) {
RTSPStream *rtsp_st = rt->rtsp_streams[i];
if (rtsp_st->sdp_payload_type == payload_type &&
rtsp_st->dynamic_handler &&
rtsp_st->dynamic_handler->parse_sdp_a_line) {
rtsp_st->dynamic_handler->parse_sdp_a_line(s, rtsp_st->stream_index,
rtsp_st->dynamic_protocol_context, line);
}
}
}
parse_h264_sdp_line函数定义如下。可以看到parse_h264_sdp_line函数中执行了语句:return ff_parse_fmtp(s, stream, h264_data, p, sdp_parse_fmtp_config_h264):
static int parse_h264_sdp_line(AVFormatContext *s, int st_index,
PayloadContext *h264_data, const char *line)
{
AVStream *stream;
const char *p = line;
if (st_index < 0)
return 0;
stream = s->streams[st_index];
if (av_strstart(p, "framesize:", &p)) {
ff_h264_parse_framesize(stream->codecpar, p);
} else if (av_strstart(p, "fmtp:", &p)) {
return ff_parse_fmtp(s, stream, h264_data, p, sdp_parse_fmtp_config_h264);
} else if (av_strstart(p, "cliprect:", &p)) {
// could use this if we wanted.
}
return 0;
}
ff_parse_fmtp函数定义如下。可以看到ff_parse_fmtp函数中执行了语句:res = parse_fmtp(s, stream, data, attr, value),即执行了形参parse_fmtp指向的回调函数,对于H.264,就是执行了sdp_parse_fmtp_config_h264函数:
int ff_parse_fmtp(AVFormatContext *s,
AVStream *stream, PayloadContext *data, const char *p,
int (*parse_fmtp)(AVFormatContext *s,
AVStream *stream,
PayloadContext *data,
const char *attr, const char *value))
{
char attr[256];
char *value;
int res;
int value_size = strlen(p) + 1;
if (!(value = av_malloc(value_size))) {
av_log(s, AV_LOG_ERROR, "Failed to allocate data for FMTP.\n");
return AVERROR(ENOMEM);
}
// remove protocol identifier
while (*p && *p == ' ')
p++; // strip spaces
while (*p && *p != ' ')
p++; // eat protocol identifier
while (*p && *p == ' ')
p++; // strip trailing spaces
while (ff_rtsp_next_attr_and_value(&p,
attr, sizeof(attr),
value, value_size)) {
res = parse_fmtp(s, stream, data, attr, value);
if (res < 0 && res != AVERROR_PATCHWELCOME) {
av_free(value);
return res;
}
}
av_free(value);
return 0;
}
sdp_parse_fmtp_config_h264函数定义如下:
static int sdp_parse_fmtp_config_h264(AVFormatContext *s,
AVStream *stream,
PayloadContext *h264_data,
const char *attr, const char *value)
{
AVCodecParameters *par = stream->codecpar;
if (!strcmp(attr, "packetization-mode")) {
av_log(s, AV_LOG_DEBUG, "RTP Packetization Mode: %d\n", atoi(value));
h264_data->packetization_mode = atoi(value);
/*
* Packetization Mode:
* 0 or not present: Single NAL mode (Only nals from 1-23 are allowed)
* 1: Non-interleaved Mode: 1-23, 24 (STAP-A), 28 (FU-A) are allowed.
* 2: Interleaved Mode: 25 (STAP-B), 26 (MTAP16), 27 (MTAP24), 28 (FU-A),
* and 29 (FU-B) are allowed.
*/
if (h264_data->packetization_mode > 1)
av_log(s, AV_LOG_ERROR,
"Interleaved RTP mode is not supported yet.\n");
} else if (!strcmp(attr, "profile-level-id")) {
if (strlen(value) == 6)
parse_profile_level_id(s, h264_data, value);
} else if (!strcmp(attr, "sprop-parameter-sets")) {
int ret;
if (*value == 0 || value[strlen(value) - 1] == ',') {
av_log(s, AV_LOG_WARNING, "Missing PPS in sprop-parameter-sets, ignoring\n");
return 0;
}
par->extradata_size = 0;
av_freep(&par->extradata);
ret = ff_h264_parse_sprop_parameter_sets(s, &par->extradata,
&par->extradata_size, value);
av_log(s, AV_LOG_DEBUG, "Extradata set to %p (size: %d)\n",
par->extradata, par->extradata_size);
return ret;
}
return 0;
}
三、解析packetization-mode
sdp_parse_fmtp_config_h264函数中,通过下面代码块将SDP的packetization-mode属性解析出来,存放到h264_data->packetization_mode中:
if (!strcmp(attr, "packetization-mode")) {
av_log(s, AV_LOG_DEBUG, "RTP Packetization Mode: %d\n", atoi(value));
h264_data->packetization_mode = atoi(value);
/*
* Packetization Mode:
* 0 or not present: Single NAL mode (Only nals from 1-23 are allowed)
* 1: Non-interleaved Mode: 1-23, 24 (STAP-A), 28 (FU-A) are allowed.
* 2: Interleaved Mode: 25 (STAP-B), 26 (MTAP16), 27 (MTAP24), 28 (FU-A),
* and 29 (FU-B) are allowed.
*/
if (h264_data->packetization_mode > 1)
av_log(s, AV_LOG_ERROR,
"Interleaved RTP mode is not supported yet.\n");
}
从上上级的parse_fmtp函数的源码可以看出来,就是把packetization-mode属性保存到rt->rtsp_streams[i]->dynamic_protocol_context->packetization_mode中:
static void parse_fmtp(AVFormatContext *s, RTSPState *rt,
int payload_type, const char *line)
{
int i;
for (i = 0; i < rt->nb_rtsp_streams; i++) {
RTSPStream *rtsp_st = rt->rtsp_streams[i];
if (rtsp_st->sdp_payload_type == payload_type &&
rtsp_st->dynamic_handler &&
rtsp_st->dynamic_handler->parse_sdp_a_line) {
rtsp_st->dynamic_handler->parse_sdp_a_line(s, rtsp_st->stream_index,
rtsp_st->dynamic_protocol_context, line);
}
}
}
而rt = s->priv_data,所以packetization-mode属性会最终保存到((RTSPState *)(s->priv_data))->rtsp_streams[i]->dynamic_protocol_context->packetization_mode中,s指向AVFormatContext结构体变量:
static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
int letter, const char *buf)
{
RTSPState *rt = s->priv_data;
//...
}
四、解析profile-level-id
sdp_parse_fmtp_config_h264函数中,通过parse_profile_level_id函数将SDP的profile-level-id属性解析出来
else if (!strcmp(attr, "profile-level-id")) {
if (strlen(value) == 6)
parse_profile_level_id(s, h264_data, value);
}
parse_profile_level_id函数定义如下。可以看到执行parse_profile_level_id函数后,h264_data->profile_idc(即((RTSPState *)(s->priv_data))->rtsp_streams[i]->dynamic_protocol_context->profile_idc)会得到解析出来的profile_idc;h264_data->profile_iop(即((RTSPState *)(s->priv_data))->rtsp_streams[i]->dynamic_protocol_context->profile_iop)会得到解析出来的
constraint_set0_flag、constraint_set1_flag、constraint_set2_flag、constraint_set3_flag、constraint_set4_flag、constraint_set5_flag 和 reserved_zero_2bits;h264_data->level_idc(即((RTSPState *)(s->priv_data))->rtsp_streams[i]->dynamic_protocol_context->level_idc)会得到解析出来的level_idc:
static void parse_profile_level_id(AVFormatContext *s,
PayloadContext *h264_data,
const char *value)
{
char buffer[3];
// 6 characters=3 bytes, in hex.
uint8_t profile_idc;
uint8_t profile_iop;
uint8_t level_idc;
buffer[0] = value[0];
buffer[1] = value[1];
buffer[2] = '\0';
profile_idc = strtol(buffer, NULL, 16);
buffer[0] = value[2];
buffer[1] = value[3];
profile_iop = strtol(buffer, NULL, 16);
buffer[0] = value[4];
buffer[1] = value[5];
level_idc = strtol(buffer, NULL, 16);
av_log(s, AV_LOG_DEBUG,
"RTP Profile IDC: %x Profile IOP: %x Level: %x\n",
profile_idc, profile_iop, level_idc);
h264_data->profile_idc = profile_idc;
h264_data->profile_iop = profile_iop;
h264_data->level_idc = level_idc;
}
五、解析sprop-parameter-sets
sdp_parse_fmtp_config_h264函数中,通过ff_h264_parse_sprop_parameter_sets函数将SDP的sprop-parameter-sets属性解析出来:
else if (!strcmp(attr, "sprop-parameter-sets")) {
int ret;
if (*value == 0 || value[strlen(value) - 1] == ',') {
av_log(s, AV_LOG_WARNING, "Missing PPS in sprop-parameter-sets, ignoring\n");
return 0;
}
par->extradata_size = 0;
av_freep(&par->extradata);
ret = ff_h264_parse_sprop_parameter_sets(s, &par->extradata,
&par->extradata_size, value);
av_log(s, AV_LOG_DEBUG, "Extradata set to %p (size: %d)\n",
par->extradata, par->extradata_size);
return ret;
}
sdp_parse_fmtp_config_h264函数定义如下。可以看到sdp_parse_fmtp_config_h264函数中通过av_base64_decode函数(关于该函数用法可以参考:《FFmpeg源码:av_base64_decode函数分析》)将 SDP中的以BASE64编码和逗号分隔的SPS和PPS进行BASE64解码,将解码后的数据存入数组decoded_packet中,即存入s->streams[st_index]->codecpar->extradata(AVCodecParameters的extradata)中,s指向AVFormatContext结构体变量:
int ff_h264_parse_sprop_parameter_sets(AVFormatContext *s,
uint8_t **data_ptr, int *size_ptr,
const char *value)
{
char base64packet[1024];
uint8_t decoded_packet[1024];
int packet_size;
while (*value) {
char *dst = base64packet;
while (*value && *value != ','
&& (dst - base64packet) < sizeof(base64packet) - 1) {
*dst++ = *value++;
}
*dst++ = '\0';
if (*value == ',')
value++;
packet_size = av_base64_decode(decoded_packet, base64packet,
sizeof(decoded_packet));
if (packet_size > 0) {
uint8_t *dest = av_realloc(*data_ptr,
packet_size + sizeof(start_sequence) +
*size_ptr +
AV_INPUT_BUFFER_PADDING_SIZE);
if (!dest) {
av_log(s, AV_LOG_ERROR,
"Unable to allocate memory for extradata!\n");
return AVERROR(ENOMEM);
}
*data_ptr = dest;
memcpy(dest + *size_ptr, start_sequence,
sizeof(start_sequence));
memcpy(dest + *size_ptr + sizeof(start_sequence),
decoded_packet, packet_size);
memset(dest + *size_ptr + sizeof(start_sequence) +
packet_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
*size_ptr += sizeof(start_sequence) + packet_size;
}
}
return 0;
}
然后avformat_open_input中会执行语句:update_stream_avctx(s):
int avformat_open_input(AVFormatContext **ps, const char *filename,
const AVInputFormat *fmt, AVDictionary **options)
{
//...
update_stream_avctx(s);
//...
}
update_stream_avctx函数中又会执行语句:ret = avcodec_parameters_to_context(sti->avctx, st->codecpar):
static int update_stream_avctx(AVFormatContext *s)
{
int ret;
for (unsigned i = 0; i < s->nb_streams; i++) {
//...
ret = avcodec_parameters_to_context(sti->avctx, st->codecpar);
if (ret < 0)
return ret;
//...
}
return 0;
}
avcodec_parameters_to_context中会通过语句:memcpy(codec->extradata, par->extradata, par->extradata_size)将AVCodecParameters的extradata拷贝到AVCodecContext的extradata中:
int avcodec_parameters_to_context(AVCodecContext *codec,
const AVCodecParameters *par)
{
//...
if (par->extradata) {
codec->extradata = av_mallocz(par->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
if (!codec->extradata)
return AVERROR(ENOMEM);
memcpy(codec->extradata, par->extradata, par->extradata_size);
codec->extradata_size = par->extradata_size;
}
}
然后h264_decode_init中会调用ff_h264_decode_extradata函数解码上述AVCodecContext的extradata中的数据(进行BASE64解码后的SPS和PPS数据)。ff_h264_decode_extradata内部会调用ff_h264_decode_seq_parameter_set函数解码SPS(具体可以参考:《音视频入门基础:H.264专题(10)——FFmpeg源码中,存放SPS属性的结构体和解码SPS的函数分析》),调用ff_h264_decode_picture_parameter_set函数解码pps:
static av_cold int h264_decode_init(AVCodecContext *avctx)
{
//...
if (!avctx->internal->is_copy) {
if (avctx->extradata_size > 0 && avctx->extradata) {
ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
&h->ps, &h->is_avc, &h->nal_length_size,
avctx->err_recognition, avctx);
if (ret < 0) {
int explode = avctx->err_recognition & AV_EF_EXPLODE;
av_log(avctx, explode ? AV_LOG_ERROR: AV_LOG_WARNING,
"Error decoding the extradata\n");
if (explode) {
return ret;
}
ret = 0;
}
}
}
//...
return 0;
}