FFmpeg的一些关键的数据结构(一)

ffmpeg定义的数据结构很有特色:有一些是动态与静态的关系，比如， URLProtocol 和 URLContex t ，AVInputFormat 和 AVFormatContext ， AVCodec 和 AVCodecContext。从前面播放器的一般原理我们可知，播放器内部要实现的几大功能是，读文件，识别格式，音视频解码，音视频渲染。其中音视频渲染由 SDL 实现，我们不讨论。ffplay 把其他的每个大功能抽象成一个相当于 C++ 中 COM 接口的数据结构，着重于功能函数，同时这些功能函数指针在编译的时候就能静态确定。每一个大功能都要支持多种类型的广义数据，ffplay 把这多种类型的广义数据的共同的部分抽象成对应的 Context 结构，这些对应的 context 结构着重于动态性，其核心成员只能在程序运行时动态确定其值。并且 COM 接口类的数据结构在程序运行时有很多很多实例，而相应的 Context 类只有一个实例，这里同时体现了数据结构的划分原则，如果有一对多的关系就要分开定义。

有一些是指针表述的排他性包含关系(因为程序运行时同一类型的多种数据只支持一种，所以就有排他性 )。比如，AVCodecContext 用 priv_data包含 MsrleContext 或 TSContext，AVFormatContext 用 priv_data 包含 AVIContext 或其他类 Context，AVStream 用 priv_data 包含 AVIStream 或其他类 Stream。由前面数据结构的动态与静态关系可知，ffplay 把多种类型的广义数据的共同部分抽象成 context 结构，那么广义数据的各个特征不同部分就抽象成各种具体类型的 context，然后用 priv_data 字段表述的指针排他性的关联起来。由于瘦身后的 ffplay 只支持有限类型，所以 AVFormatContext 只能关联包含 AVIContext，AVStream 只能关联包含 AVIStream。

有一些是扩展包含关系，比如，ByteIOCon text 包含 URLContext ，就是在应用层把没有缓存的 URLContext

扩展有缓冲区的广义文件 ByteIOCon text ，改善程序 IO 性能。

有一些是直接包含关系，比如，AVFrame 包含 AVPicture，这两个结构共有的字段，其定义类型、大小、顺序都一模一样，除了更准确的描述各自的意义便于阅读理解维护代码外，还可以方便的把 AVFrame 大结构强制转换 AVPicture 小结构。

我们先来重点分析 AVCodec/AVCodecContext/MsrleContex t 这几个数据结构，这几个数据结构定义了编解码器的核心架构，相当于 Directshow 中的各种音视频解码器 decoder。

解协议（http,rtsp,rtmp,mms）→解封装（flv,avi,rmvb,mp4）→解码h264,mpeg2,aac,mp3）→存数据

4.1 AVCodec结构体

typedef struct AVCodec

{

// 标示Codec 的名字, 比如，"h264" "h263" 等。

const char *name;

// 标示Codec 的类型，有video ，audio等类型。

enum CodecType type;

// 标示Codec 的ID，有CODEC_ID_H264等。

enum CodecID id;

// 标示具体的Codec 对应的Context 的size,如：H264Context。

int priv_data_size;

// 以下标示Codec 对外提供的操作,每一种解码器都会实现这些操作。

int(*init)(AVCodecContext*);

int(*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data);

int(*close)(AVCodecContext*);

int(*decode)(AVCodecContext *, void *outdata, int *outdata_size, uint8_t *buf, int buf_size);

struct AVCodec *next;

}AVCodec;

H264的主要结构的初始化如下：

AVCodec ff_h264_decoder = {

"h264",

AVMEDIA_TYPE_VIDEO,

CODEC_ID_H264,

sizeof(H264Context),

ff_h264_decode_init,

NULL,

ff_h264_decode_end,

decode_frame

}

说明：

AVCodec 是类似 COM 接口的数据结构，表示音视频编解码器，着重于功能函数，一种媒体类型对应一个 AVCodec 结构，在程序运行时有多个实例。next 变量用于把所有支持的编解码器连接成链表，便于遍历查找；id 确定了唯一编解码器； priv_data_size 表示具体的 Codec 对应的 Context 结构大小，比如 MsrleContext 或 TSContext，这些具体的结够定义散落于各个.c 文件中，为避免太多的 if else 类语句判断类型再计算大小，这里就直接指明大小，因为这是一个编译时静态确定的字段，所以放在 AVCodec 而不是 AVCodecContex t 中。

4.2 AVCodecContext结构体

typedef struct AVCodecContext

{

int bit_rate;

int frame_number;

//扩展数据，如mov格式中audio trak中aac格式中esds的附加解码信息。

unsigned char *extradata;

//扩展数据的size

int extradata_size;

//视频的原始的宽度与高度

int width, height; // 此逻辑段仅针对视频

//视频一帧图像的格式，如YUV420

enum PixelFormat pix_fmt;

//音频的采样率

int sample_rate;

//音频的声道的数目

int channels;

int bits_per_sample;

int block_align;

// 指向相应的解码器，如：ff_h264_decoder

struct AVCodec *codec;

//指向具体相应的解码器的context，如H264Context

void *priv_data;

//公共操作函数

int(*get_buffer)(struct AVCodecContext *c, AVFrame *pic);

void(*release_buffer)(struct AVCodecContext *c, AVFrame *pic);

int(*reget_buffer)(struct AVCodecContext *c, AVFrame *pic);

}AVCodecContext;

说明：

AVCodecContext 结构表示程序运行的当前Codec 使用的上下文，着重于所有Codec 共有的属性(并且是在程序运行时才能确定其值)和关联其他结构的字段。extradata 和extradata_size 两个字段表述了相应Codec 使用的私有数据；codec 字段关联相应的编解码器；priv_data 字段关联各个具体编解码器独有的属性context，和AVCodec 结构中的priv_data_size 配对使用。

4.3 AVInputFormat结构体

typedef struct AVInputFormat

{

// 标示format的名字, 比如，“mov” “mp4” 等。

const char *name;

// 标示具体的format对应的Context 的size,如：MovContext。

int priv_data_size;

//具体的操作函数

int(*read_probe)(AVProbeData*);

int(*read_header)(struct AVFormatContext *,AVFormatParameters *ap);

int(*read_packet)(struct AVFormatContext *, AVPacket *pkt);

int(*read_close)(struct AVFormatContext*);

struct AVInputFormat *next;

} AVInputFormat;

Mov或mp4的主要结构的初始化如下：

AVInputFormat ff_mov_demuxer = {

"mov,mp4,m4a,3gp,3g2,mj2",

NULL_IF_CONFIG_SMALL("QuickTime/MPEG-4/Motion JPEG 2000 format"),

sizeof(MOVContext),

mov_probe,

mov_read_header,

mov_read_packet,

mov_read_close,

mov_read_seek,

}

说明：

AVInputFormat 是类似COM 接口的数据结构，表示输入文件容器格式，着重于功能函数，一种文件容器格式对应一个AVInputFormat 结构，在程序运行时有多个实例。next变量用于把所有支持的输入文件容器格式连接成链表，便于遍历查找。priv_data_size 标示具体的文件容器格式对应的Context的大小，在本例中是MovContext，这些具体的结够定义散落于各个.c 文件中。

4.4 AVFormatContext结构体

typedef struct AVFormatContext

{

//指向AVInputFormat，如对于mp4或mov为ff_mov_demuxer

struct AVInputFormat *iformat;

// 指向具体的格式对应的Context，如：MovContext。

void *priv_data;

//指向数据读取统一接口context

ByteIOContext pb;

//流的数目

int nb_streams;

//至少2个指针元素分别指向video stream和audio stream

AVStream *streams[MAX_STREAMS];

} AVFormatContext;

说明：

AVFormatContext 结构表示程序运行的当前文件容器格式使用的上下文，着重于所有文件容器共有的属性(并且是在程序运行时才能确定其值)和关联其他结构的字段。iformat字段关联相应的文件容器格式；pb 关联广义的输入文件；streams 关联音视频流；priv_data 字段关联各个具体文件容器独有的属性上下文，和priv_data_size 配对使用。

4.5 MovContext结构体

typedef struct MovContext

{

//临时持有AVFormatContext 的指针

AVFormatContext *fc;

//时间缩放因子

int time_scale;

//视频的时长

int64_t duration;

//拆包时是否发现”moov“头

int found_moov;

//拆包时是否发现"mdat"头

int found_mdat;

int isom;

MOVFragment fragment;

MOVTrackExt *trex_data;

unsigned trex_count;

int itunes_metadata; ///< metadata are itunes style

int chapter_track;

} MOVContext;

说明：

MOVContext定义了mp4 中流的一些属性。

4.6 URLProtocol结构体

typedef struct URLProtocol

{

const char *name;

//用的统一的模板函数

int(*url_open)(URLContext *h, const char *filename, int flags);

int(*url_read)(URLContext *h, unsigned char *buf, int size);

int(*url_write)(URLContext *h, unsigned char *buf, int size);

offset_t(*url_seek)(URLContext *h, offset_t pos, int whence);

int(*url_close)(URLContext *h);

struct URLProtocol *next;

} URLProtocol;ffurl_connect

file的主要结构的初始化如下：

URLProtocol ff_file_protocol = {

.name = "file",

.url_open = file_open,

.url_read = file_read,

.url_write = file_write,

.url_seek = file_seek,

.url_close = file_close,

.url_get_file_handle = file_get_handle,

.url_check = file_check,

}

说明：

URLProtocol 是类似COM 接口的数据结构，表示广义的输入文件，着重于功能函数，一种广义的输入文件对应一个URLProtocol 结构，比如file，pipe，tcp 等等，定义了对file tcp等方式的通用模板函数。next 变量用于把所有支持的广义的输入文件连接成链表，便于遍历查找。

4.7 URLContext结构体

typedef struct URLContext

{

//指向相应的协议(协议为从初始化链表中注册的),如ff_file_protocol

struct URLProtocol *prot;

int flags;

int max_packet_size;

//相应通信方式的句柄，对于文件为fd句柄，对于网络为socket句柄等

void *priv_data;

//文件的名字，不区分本地和网络

char *filename;

} URLContext

说明：

URLContext 结构表示程序运行的当前广义输入文件使用的context，着重于所有广义输入文件共有的属性(并且是在程序运行时才能确定其值)和关联其他结构的字段。prot 字段关联相应的广义输入文件；priv_data 字段关联各个具体广义输入文件的句柄。

4.8 AVIOContext结构体(老版本为：ByteIOContext)

typedef struct ByteIOContext

{

//数据缓冲区

unsigned char *buffer;

//数据缓冲size

int buffer_size;

//数据读取标记指针

unsigned char *buf_ptr, *buf_end;

//该指针指向相应的URLContext，关联URLContext

void *opaque;

int (*read_packet)(void *opaque, uint8_t *buf, int buf_size);

int (*write_packet)(void *opaque, uint8_t *buf, int buf_size);

offset_t(*seek)(void *opaque, offset_t offset, int whence);

//当前buffer在文件中的位置

offset_t pos;

//表示要进行seek，冲刷数据

int must_flush;

//是否到达了文件末尾

int eof_reached; // true if eof reached

int write_flag;

int max_packet_size;

int error; // contains the error code or 0 if no error happened

} ByteIOContext;

说明：

ByteIOContext 结构扩展URLProtocol 结构成内部有缓冲机制的广泛意义上的文件，改善广义输入文件的IO性能。由其数据结构定义的字段可知，主要是缓冲区相关字段，标记字段，和一个关联字段opaque 来完成广义文件读写操作。opaque 关联字段用于关联URLContext 结构，间接关联并扩展URLProtocol 结构。

4.9 AVStream结构体

typedef struct AVStream

{

//指向解码器context，用于关联解码器

AVCodecContext *actx;

//codec解析器，每一种编码器在进行压缩时都会对实际负载数据进行封装，加//入头信息，如h264，需要解析nal单元，关联通过avav_find_stream_info()

struct AVCodecParserContext *parser;

//指向解复用的流的context，比如mp4的MovStreamcontext

void *priv_data;

AVRational time_base;

//用于seek时使用，用于快速索引关键帧，如flv的keyframes索引表和mp4的I

//帧的索引表都存于此，很重要

AVIndexEntry *index_entries;

//index_entries的元素的个数

int nb_index_entries;

int index_entries_allocated_size;

double frame_last_delay;

} AVStream;

说明：

AVStream 结构表示当前媒体流的上下文，着重于所有媒体流共有的属性(并且是在程序运行时才能确定其值)和关联其他结构的字段。actx 字段关联当前音视频媒体使用的编解码器的context；priv_data 字段关联解析各个具体媒体流解复用拆包用的context；还有关键帧的索引表也存于此。

4.10 MOVStreamContext 结构体

typedef struct MOVStreamContext {

//流的索引,0或者1

int ffindex;

//临时变量，保存下一个chunk块的编号

int next_chunk;

//chunk的个数(在mp4的文件格式中,从stco中取值肯定为chunk的总数)

unsigned int chunk_count;

//chunk在文件中的偏移量数组(每个chunk中的sample在文件中的物理存储//是连续的),用于保存scto表

int64_t *chunk_offsets;

//stts的元素的个数

unsigned int stts_count;

//stts时间数据表

MOVStts *stts_data;

//ctts(用于在有B帧混合时进行纠正时间戳)的元素的个数

unsigned int ctts_count;

//ctts数据表

MOVStts *ctts_data;

//stsc(空间分布表)的元素的个数

unsigned int stsc_count;

//stsc数据表

MOVStsc *stsc_data;

//临时变量，记录当前使用的ctts表的索引

int ctts_index;

//记录当前的ctts元素作用的sample的索引

int ctts_sample;

//stsz表中可能smaple的size相同，如果相同使用该值

unsigned int sample_size;

//stsz中元素的个数

unsigned int sample_count;//sample的个数

//stsz数据表，记录每个sample的size，如果sample_size=0，该表才不会//空

int *sample_sizes;

//stss(关键帧索引表)中元素的个数

unsigned int keyframe_count;

//关键帧数据表

int *keyframes;

//dref的元素的个数，一般为1

unsigned drefs_count;

//dref数据表

MOVDref *drefs;

//tkhd宽度

int width;

//tkhd高度

int height;

} MOVStreamContext;

说明：

MOVStreamContext结构用于保存从mov或mp4中进行拆包解复用从头部得到的信息。

by: czc1009 《FFmpeg基础库编程开发》