WebRTC音频系统 之audio技术栈简介-2

1.8 ACM模块

ACM是audio coding module的简称。

WebRTC 的audio coding 模块可以处理音频接收和发送,acm2目录是接收和发送的API实现。每个音频发送帧使用包含时长为10ms音频数据,通过Add10MsData()接口提供给音频编码模块,音频编码模块使用对应的编码器对音频帧进行编码,并将编码好的数据传给预先注册的音频分组回调,该回调将编码的音频打包成RT包,并通过传输层发送出去,WebRTC内置的音频编码器包括G711、G722, ilbc, isac, opus,pcm16b等,音频网络适配器为音频编码器(目前仅限于OPU)提供附加功能,以使音频编码器适应网络条件(带宽、丢包率等)。音频接收包通过IncomingPacket()实现,接收到的数据包将有jitter buffer(NetEq)模块处理,处理的内容包括解码,音频解码器通过解码器工厂类创建,解码后的数据通过PlayData10Ms()获取。
请添加图片描述

1.8.1 编码模块接口类

这里的接口包括编码和解码两个部分,

音频编码接口类的核心内容如下:

// modules/audio_coding/include/audio_coding_module.h 
 30 // forward declarations
 31 class AudioDecoder;
 32 class AudioEncoder;
 33 class AudioFrame;
 34 struct RTPHeader;
 62 class AudioCodingModule {
    
    
 66  public:
 67   struct Config {
    
    
 68     explicit Config(
 69         rtc::scoped_refptr<AudioDecoderFactory> decoder_factory = nullptr);
 70     Config(const Config&);
 71     ~Config();
 72
 73     NetEq::Config neteq_config;
 74     Clock* clock;
   //工厂类创建解码器
 75     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory;
   //工厂类创建NetEq
 76     NetEqFactory* neteq_factory = nullptr;
 77   };
 //这是设计模式中类和对象的创建方法,即通过这个static 方法创建  
 79   static AudioCodingModule* Create(const Config& config);

  //这里定义成了纯虚函数,这种是接口类的常用方法,纯虚函数的好处是子类必须实现
  //这些类对应的方法,否则编译报错。
 136   virtual int32_t Add10MsData(const AudioFrame& audio_frame) = 0;
 172   virtual int32_t InitializeReceiver() = 0;
 192   virtual int32_t IncomingPacket(const uint8_t* incoming_payload,
 193                                  size_t payload_len_bytes,
 194                                  const RTPHeader& rtp_header) = 0;
 216   virtual int32_t PlayoutData10Ms(int32_t desired_freq_hz,
 217                                   AudioFrame* audio_frame,
 218                                   bool* muted) = 0;
 }

这里使用工厂类设计模式,是对编解码接口类的实现,这一接口对大部分实时音视频频应用场景,并不需要做更改,和编码相比,接收的待解码数据是通过网络传输的,这会导致丢包、抖动以及延迟到达、乱序等问题,所以需要实现jitter buffer功能,因而AudioCodingModuleImpl在实现编码接口类AudioCodingModule方法的时候,定义了AcmReceiver(NetEq以及解码)成员,接收到的数据被送到这个模块去解码了。

 //acm2/audio_coding_module.cc
42 class AudioCodingModuleImpl final : public AudioCodingModule {
    
    
  //override是c++中重写关键词,即其集成的父类中必须有Add10MsData这个虚函数定义,否则编译报错
58   // Add 10 ms of raw (PCM) audio data to the encoder.
59   int Add10MsData(const AudioFrame& audio_frame) override;
72   // Initialize receiver, resets codec database etc.
73   int InitializeReceiver() override;
77   // Incoming packet from network parsed and ready for decode.
78   int IncomingPacket(const uint8_t* incoming_payload,
79                      const size_t payload_length,
80                      const RTPHeader& rtp_info) override;
82   // Get 10 milliseconds of raw audio data to play out, and
83   // automatic resample to the requested frequency if > 0.
84   int PlayoutData10Ms(int desired_freq_hz,
85                       AudioFrame* audio_frame,
86                       bool* muted) override;

98  private:
128   int Add10MsDataInternal(const AudioFrame& audio_frame, InputData* input_data)
129       RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);
130
131   // TODO(bugs.webrtc.org/10739): change `absolute_capture_timestamp_ms` to
132   // int64_t when it always receives a valid value.
133   int Encode(const InputData& input_data,
134              absl::optional<int64_t> absolute_capture_timestamp_ms)

137   int InitializeReceiverSafe() RTC_EXCLUSIVE_LOCKS_REQUIRED(acm_mutex_);

162   rtc::Buffer encode_buffer_ RTC_GUARDED_BY(acm_mutex_);
163   uint32_t expected_codec_ts_ RTC_GUARDED_BY(acm_mutex_);
164   uint32_t expected_in_ts_ RTC_GUARDED_BY(acm_mutex_);
165   acm2::ACMResampler resampler_ RTC_GUARDED_BY(acm_mutex_);
166   acm2::AcmReceiver receiver_;  // AcmReceiver has it's own internal lock.
}
//这是设计模式一种思想,通过Create方法创建实现,但返回类型是接口类型,这样实现了接口和实现的隔离,当接口类和实现不在同一个库中时,可以做到只需要直接连接重新编译的实现,而接口类所在库不用再次编译,开发上进行了隔离。
209 AudioCodingModuleImpl::AudioCodingModuleImpl(
210     const AudioCodingModule::Config& config)
211     : expected_codec_ts_(0xD87F3F9F),
212       expected_in_ts_(0xD87F3F9F),
213       receiver_(config),
214       bitrate_logger_("WebRTC.Audio.TargetBitrateInKbps"),
215       encoder_stack_(nullptr),
216       previous_pltype_(255),
217       receiver_initialized_(false),
218       first_10ms_data_(false),
219       first_frame_(true),
220       packetization_callback_(NULL),
221       codec_histogram_bins_log_(),
222       number_of_consecutive_empty_packets_(0) {
    
    
223   if (InitializeReceiverSafe() < 0) {
    
    
224     RTC_LOG(LS_ERROR) << "Cannot initialize receiver";
225   }
226   RTC_LOG(LS_INFO) << "Created";
227 }
633 AudioCodingModule* AudioCodingModule::Create(const Config& config) {
    
    
634   return new AudioCodingModuleImpl(config);
635 }

1.8.2 编码数据流

接收到的数据包经过合法性、按需混音等操作后,发送给注册号的编码模块编码,这里集中于解码数据流,encoder_stack_是一个AudioEncoder类的一个智能指针。

231 int32_t AudioCodingModuleImpl::Encode(
232     const InputData& input_data,
233     absl::optional<int64_t> absolute_capture_timestamp_ms) {
    
    
234   // TODO(bugs.webrtc.org/10739): add dcheck that
235   // `audio_frame.absolute_capture_timestamp_ms()` always has a value.
236   AudioEncoder::EncodedInfo encoded_info;
237   uint8_t previous_pltype;
264   encoded_info = encoder_stack_->Encode(
265       rtp_timestamp,
266       rtc::ArrayView<const int16_t>(
267           input_data.audio,
268           input_data.audio_channel * input_data.length_per_channel),
269       &encode_buffer_);
}

334 // Add 10MS of raw (PCM) audio data to the encoder.
335 int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
    
    
336   MutexLock lock(&acm_mutex_);
//做声道数、采样率等合法性检查,适当的混音
337   int r = Add10MsDataInternal(audio_frame, &input_data_);
338   // TODO(bugs.webrtc.org/10739): add dcheck that
339   // `audio_frame.absolute_capture_timestamp_ms()` always has a value.
340   return r < 0
341              ? r
342              : Encode(input_data_, audio_frame.absolute_capture_timestamp_ms());
343 }

Encode是提供给上层的接口类,实际内部调用protected类型的EncodeImpl()实现编码,因而每个编解码器(opus、pcm16、g711)必须实现该方法。

1.8.3 收包解码数据流

相比编码而言,因实时音频引用一般要求的网络延迟在300ms以内,因而收包要做抖动处理,而解码正是由处理抖动的模块调用的,所以使用了AcmReceiver这个类定义了接受模块公共的内容。

559 // Incoming packet from network parsed and ready for decode.
560 int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload,
561                                           const size_t payload_length,
562                                           const RTPHeader& rtp_header) {
    
    
563   RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr);
564   return receiver_.InsertPacket(
565       rtp_header,
566       rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));
567 }

1.8.4 获取解码数据流

解码的数据流在NetEq模块中,有实时性的概念和要求在这里,因而直接调用receiver_类的方法获取数据。

569 // Get 10 milliseconds of raw audio data to play out.
570 // Automatic resample to the requested frequency.
571 int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz,
572                                            AudioFrame* audio_frame,
573                                            bool* muted) {
    
    
574   // GetAudio always returns 10 ms, at the requested sample rate.
575   if (receiver_.GetAudio(desired_freq_hz, audio_frame, muted) != 0) {
    
    
576     RTC_LOG(LS_ERROR) << "PlayoutData failed, RecOut Failed";
577     return -1;
578   }
579   return 0;
580 }

接下来看实现编码和接收的AudioEncoder类和AcmReceiver类。

1.8.5AudioEncoder类

AudioEncoder作为编码器的接口类,其定义于api/audio_codecs/audio_encoder.h,编码类是一个通用的类型,因为Opus、G711等具体实现会继承这个类,因而这个类定义了编码器一些公共的内容,比如编码比特率、编码、FEC、DTX等,此外由于是实时场景,所以网络情况会影响编码器最优参数的选择,同样的这里忽略网络统计相关的实现。

 64 // This is the interface class for encoders in AudioCoding module. Each codec
 65 // type must have an implementation of this class.
 66 class AudioEncoder {
    
    
 67  public:
 68   // Used for UMA logging of codec usage. The same codecs, with the
 69   // same values, must be listed in
 70   // src/tools/metrics/histograms/histograms.xml in chromium to log
 71   // correct values.
 72   enum class CodecType {
    
    
 73     kOther = 0,  // Codec not specified, and/or not listed in this enum
 74     kOpus = 1,
 75     kIsac = 2,
 76     kPcmA = 3,
 77     kPcmU = 4,
 78     kG722 = 5,
 79     kIlbc = 6,
 80
 81     // Number of histogram bins in the UMA logging of codec types. The
 82     // total number of different codecs that are logged cannot exceed this
 83     // number.
 84     kMaxLoggedAudioCodecTypes
 85   };

 144   // Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 *
 145   // NumChannels() samples). Multi-channel audio must be sample-interleaved.
 146   // The encoder appends zero or more bytes of output to `encoded` and returns
 147   // additional encoding information.  Encode() checks some preconditions, calls
 148   // EncodeImpl() which does the actual work, and then checks some
 149   // postconditions.
 150   EncodedInfo Encode(uint32_t rtp_timestamp,
 151                      rtc::ArrayView<const int16_t> audio,
 152                      rtc::Buffer* encoded);

 154   // Resets the encoder to its starting state, discarding any input that has
 155   // been fed to the encoder but not yet emitted in a packet.
 156   virtual void Reset() = 0;
 157
 158   // Enables or disables codec-internal FEC (forward error correction). Returns
 159   // true if the codec was able to comply. The default implementation returns
 160   // true when asked to disable FEC and false when asked to enable it (meaning
 161   // that FEC isn't supported).
 162   virtual bool SetFec(bool enable);
 163
 164   // Enables or disables codec-internal VAD/DTX. Returns true if the codec was
 165   // able to comply. The default implementation returns true when asked to
 166   // disable DTX and false when asked to enable it (meaning that DTX isn't
 167   // supported).
 168   virtual bool SetDtx(bool enable);
 169
 170   // Returns the status of codec-internal DTX. The default implementation always
 171   // returns false.
 172   virtual bool GetDtx() const;
 174   // Sets the application mode. Returns true if the codec was able to comply.
 175   // The default implementation just returns false.
 176   enum class Application {
    
     kSpeech, kAudio };
 177   virtual bool SetApplication(Application application);
 //在接口类中调用子类的编码具体实现,以实现用同一个接口调用不同的编码器
 //这是每一个不同类型编码器必须实现的接口方法
  protected:
  // Subclasses implement this to perform the actual encoding. Called by
  // Encode().
  virtual EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
                                 rtc::ArrayView<const int16_t> audio,
                                 rtc::Buffer* encoded) = 0;
 }

这个接口类编码API Encode的实现位于api/audio_codecs/audio_encoder.cc

//编码的信息存放于EncodedInfo
AudioEncoder::EncodedInfo AudioEncoder::Encode(
   uint32_t rtp_timestamp, //rtp时间戳的作用是用于音视频播放和同步
   rtc::ArrayView<const int16_t> audio, //带编码PCM数据
   rtc::Buffer* encoded) {
    
    //编码后的数据
 TRACE_EVENT0("webrtc", "AudioEncoder::Encode");
 RTC_CHECK_EQ(audio.size(),
              static_cast<size_t>(NumChannels() * SampleRateHz() / 100));

 const size_t old_size = encoded->size();
 EncodedInfo info = EncodeImpl(rtp_timestamp, audio, encoded);
 RTC_CHECK_EQ(encoded->size() - old_size, info.encoded_bytes);
 return info;
}

1.8.6 Opus编码器类实现

opus第三方库开源实现都是基于c代码的,开源的第三方库都放在src/third_party目录下,这样做的好处是将第三方库和webrtc实现隔离,当第三方库没有改动时并不需要重新编译,对单个第三库而言节约的时间也许并不多,但是当第三库很多时,编译还是很耗时间的,使用这种解耦设计可以大大提高开发效率。

// modules/audio_coding/codecs/opus/audio_encoder_opus.h

 32 class AudioEncoderOpusImpl final : public AudioEncoder {
    
    
 33  public:
 136  protected:
 137   EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
 138                          rtc::ArrayView<const int16_t> audio,
 139                          rtc::Buffer* encoded) override;
 141  private:
 146   static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
 185   std::vector<int16_t> input_buffer_;
 186   OpusEncInst* inst_;
 199   friend struct AudioEncoderOpus;
 }
 ``
 其实现位于
 ```c
 // modules/audio_coding/codecs/opus/audio_encoder_opus.cc
648 AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
649     uint32_t rtp_timestamp,
650     rtc::ArrayView<const int16_t> audio,
651     rtc::Buffer* encoded) {
    
    
652   MaybeUpdateUplinkBandwidth();
654   if (input_buffer_.empty())
655     first_timestamp_in_buffer_ = rtp_timestamp;
656
657   input_buffer_.insert(input_buffer_.end(), audio.cbegin(), audio.cend());
658   if (input_buffer_.size() <
659       (Num10msFramesPerPacket() * SamplesPer10msFrame())) {
    
    
660     return EncodedInfo();
661   }
//调用WebRtcOpus_Encode完成具体编码,这个函数实现于opus_interface.cc,interface.cc就是对第三方opus库函数的封装和调用,其主要是调用opus_encode或opus_multistream_encode编码。
665   const size_t max_encoded_bytes = SufficientOutputBufferSize();
666   EncodedInfo info;
667   info.encoded_bytes = encoded->AppendData(
668       max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
    
    
669         int status = WebRtcOpus_Encode(
670             inst_, &input_buffer_[0],
671             rtc::CheckedDivExact(input_buffer_.size(), config_.num_channels),
672             rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded.data());
673
674         RTC_CHECK_GE(status, 0);  // Fails only if fed invalid data.
675
676         return static_cast<size_t>(status);
677       });
678   input_buffer_.clear();
679
680   bool dtx_frame = (info.encoded_bytes <= 2);
681

682   // Will use new packet size for next encoding.
683   config_.frame_size_ms = next_frame_length_ms_;
684
685   if (adjust_bandwidth_ && bitrate_changed_) {
    
    
686     const auto bandwidth = GetNewBandwidth(config_, inst_);
687     if (bandwidth) {
    
    
688       RTC_CHECK_EQ(0, WebRtcOpus_SetBandwidth(inst_, *bandwidth));
689     }
690     bitrate_changed_ = false;
691   }

上文说到这个类是每个编码器都要实现的,在比如ilbc编码的实现如下:

AudioEncoder::EncodedInfo AudioEncoderIlbcImpl::EncodeImpl(
    uint32_t rtp_timestamp,
    rtc::ArrayView<const int16_t> audio,
    rtc::Buffer* encoded) {
    
    
  // Save timestamp if starting a new packet.
  if (num_10ms_frames_buffered_ == 0)
    first_timestamp_in_buffer_ = rtp_timestamp;

  // Buffer input.
  std::copy(audio.cbegin(), audio.cend(),
            input_buffer_ + kSampleRateHz / 100 * num_10ms_frames_buffered_);

  // If we don't yet have enough buffered input for a whole packet, we're done
  // for now.
  if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) {
    
    
    return EncodedInfo();
  }

  // Encode buffered input.
  RTC_DCHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_);
  num_10ms_frames_buffered_ = 0;
  //同样的其调用第三方库中的WebRtcIlbcfix_Encode去实现具体的编码
  size_t encoded_bytes = encoded->AppendData(
      RequiredOutputSizeBytes(), [&](rtc::ArrayView<uint8_t> encoded) {
    
    
        const int r = WebRtcIlbcfix_Encode(
            encoder_, input_buffer_,
            kSampleRateHz / 100 * num_10ms_frames_per_packet_, encoded.data());
        RTC_CHECK_GE(r, 0);

        return static_cast<size_t>(r);
      });

  RTC_DCHECK_EQ(encoded_bytes, RequiredOutputSizeBytes());

  EncodedInfo info;
  info.encoded_bytes = encoded_bytes;
  info.encoded_timestamp = first_timestamp_in_buffer_;
  info.payload_type = payload_type_;
  info.encoder_type = CodecType::kIlbc;
  return info;
}

至此可以看到具体的编码器是如何通编码器接口类AudioEncoder派生出来的了,那还有一个问题,就是这些派生出来的编码器类是如何最终被应用程序调用的? 这涉及到call、stream、channel的概念了。 channel包括send和receive两种,channel是和ssrc挂钩的,用于表示来源,比如在视频通信时,发送的可能是麦克风采集的声音,也可以是分享的ppt上播放的声音,这两种声音就是通过ssrc来区分的,对于接收是一个道理,同时接收多个人会议音频时,ssrc也是不同的,每一个ssrc对应于一个channel,因为每一路处理的方式可能并不一样,比如麦克风采集信号要做语音增强,但是ppt分享的声音则不需要,channel主要是编码器和rtp的封装以及端到端加密等信息。 stream也分发送和接收,属于transport层,即对channel层打包好的rtp/rtcp包进行发送和接收。 call的概念就是双向通话会议了,也许有些实现命名不一样,但是不论是双人还是对人会议,通话这个概念是必须有的。这个call的类就是peer connection例子使用的通话类。

//pc/peer_connection.h
  // Creates a PeerConnection and initializes it with the given values.
  // If the initialization fails, the function releases the PeerConnection
  // and returns nullptr.
  //
  // Note that the function takes ownership of dependencies, and will
  // either use them or release them, whether it succeeds or fails.
  static RTCErrorOr<rtc::scoped_refptr<PeerConnection>> Create(
      rtc::scoped_refptr<ConnectionContext> context,
      const PeerConnectionFactoryInterface::Options& options,
      std::unique_ptr<RtcEventLog> event_log,
      std::unique_ptr<Call> call,
      const PeerConnectionInterface::RTCConfiguration& configuration,
      PeerConnectionDependencies dependencies);

对channel、stream以及call的细节实现这里不展开,待到具体层再分析。至此,AudioEncoder类实现和使用流程已结束,接下来是jitter buffer的接收类的功能和实现。

1.8.7 AcmReceiver

这里再罗列一下third_party/webrtc/modules/audio_coding/acm2/audio_coding_module.cc中调用AudioCodingModuleImpl中实现接收的一些方法。

 acm2::AcmReceiver receiver_;  // AcmReceiver has it's own internal lock.

 receiver_.SetCodecs(codecs);
 receiver_.InsertPacket(
      rtp_header,
      rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));
 receiver_.GetAudio(desired_freq_hz, audio_frame, muted);
 receiver_.GetNetworkStatistics(statistics);

AcmRecevier的主要方法就是上述代码段417-422中调用的,而InsertPacket和GetAudio这两个接收数据流内部调用了neteq_同名方法,其创建的方式如下,主要是调用neteq工程类创建该对象,并且将其和解码器关联。

  37 std::unique_ptr<NetEq> CreateNetEq(
 38     NetEqFactory* neteq_factory,
 39     const NetEq::Config& config,
 40     Clock* clock,
 41     const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
    
    
 42   if (neteq_factory) {
    
    
 43     return neteq_factory->CreateNetEq(config, decoder_factory, clock);
 44   }
 45   return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock);
 46 }

 50 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
 51     : last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
 52       neteq_(CreateNetEq(config.neteq_factory,
 53                          config.neteq_config,
 54                          config.clock,
 55                          config.decoder_factory)),
 56       clock_(config.clock),
 57       resampled_last_output_frame_(true) {
    
    
 58   RTC_DCHECK(clock_);
 59   memset(last_audio_buffer_.get(), 0,
 60          sizeof(int16_t) * AudioFrame::kMaxDataSizeSamples);
 61 }

neteq_也是通过工厂类的方法创建,NetEqFactory有两种,一种是default一种是customer,NetEq是一个接口类,这个类返回的是NetEq对象,

// Creates NetEq instances using the settings provided in the config struct.
class NetEqFactory {
    
    
public:
virtual ~NetEqFactory() = default;

// Creates a new NetEq object, with parameters set in `config`. The `config`
// object will only have to be valid for the duration of the call to this
// method.
virtual std::unique_ptr<NetEq> CreateNetEq(
    const NetEq::Config& config,
    const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
    Clock* clock) const = 0;
};

WebRTC在实现NetEqFactory的时候,在这个基础之上又封装了两个factory类,

class CustomNetEqFactory : public NetEqFactory {
    
    
 private:
  std::unique_ptr<NetEqControllerFactory> controller_factory_;
}
和
class DefaultNetEqFactory : public NetEqFactory {
    
    

 private:
  const DefaultNetEqControllerFactory controller_factory_;
}

这两个类中private字段是比NetEqFactory类多出来的,大多数情况下用default的就可以了,除非针对自己的应用场景需要调节相关内容,才真正需要自己customer。之后就是调用NetEQ算法细节内容见《实时语音处理实践指南》第11章,11.2小节。

1.9 MediaStream和MediaStreamTrack

多媒体英文是Multimedia,Multimedia是由multi和media两个单词组成的,multi中文的意思是多,而media的中文意思是用于信息传播的媒介,比如文字、图片、视频、音频等都可以作为信息传输的媒介,WebRTC通信依托的主要媒介是视频和音频。但是由于WebRTC(Web real time communication)具有real这一特性,因而WebRTC使用了streaming multimedia,steaming想要表达的意思是通过网络流式传输压缩过的音视频信息,并且在接收端流式实时播放的通信过程。streaming的内容也可以是mp4之类的文件、也可以是camera实时采集的视频或麦克风实时采集的音频,接收端一般边收边播(也称流式播放),流式播放缓冲的时间非常短,一般在800ms以内,这一实时性的要求使得网络传输上采用了不可靠的UDP/RTP/RTSP协议栈传输多媒体内容。

在WebRTC中MediaStream主要有两个API,MediaStreamTrack和MediaStream ,MediaStreamTrack (MST)对象表示的是来自最终用户的单一类型媒体,该媒体的源可以是物理麦克风或者摄像头,在使用RTP协议传输时,不同的媒体源使用 Synchronization Source (SSRC)标识,同一个会议中不同源的SSRC是不一样的,通过SSRC就可以区别源,MediaStream则用于将多个MediaStreamTrack对象聚合在一起,聚合后可作为一个整体用于播放或者采集,即在播放渲染的时候音视频是同步的,MediaStreamTrack依赖媒体源MediaSource提供媒体数据。

Track这一个概念表示的是一个多媒体轨道,比如在歌手录歌/演唱会的时候,歌手嗓音对应一个采集轨道、配乐吉他也可以对应一个采集轨道、鼓手也对应一个采集轨道,这些轨道同时采集混合后播放出来。StreamTrack是流式的多媒体轨道。一个MediaStreamTrack可以表示多通道的多媒体内容,如立体声、5.1声道以及3D视频等,不同多媒体通道之间时间上式配合好的,Track是针对采集而言的(想想歌手录歌),而channel是针对播放而言的,即可以用若干Track格式录歌,但是最终可以录成5.1或者立体声(2 channel)等格式的。

MediaStreamTrack对象的接口类实现如下,一个状态标识是否启用,类型标识是video还是audio。

//third_party/webrtc/api/media_stream_interface.h
// C++ version of MediaStreamTrack.
// See: https://www.w3.org/TR/mediacapture-streams/#mediastreamtrack
class RTC_EXPORT MediaStreamTrackInterface : public rtc::RefCountInterface,
                                             public NotifierInterface {
    
    
 public:
  enum TrackState {
    
    
    kLive,
    kEnded,
  };

  static const char* const kAudioKind;
  static const char* const kVideoKind;

  // 当该类是AudioTrackInterfaceThe子类时kind()返回kAudioKind.
  // 当该类是VideoTrackInterface子类时kind()返回kVideoKind.
  virtual std::string kind() const = 0;

  // Track ID.
  virtual std::string id() const = 0;

  // disabled audio类型track生成静音数据,disabled video类型track生成黑屏帧
  virtual bool enabled() const = 0;
  virtual bool set_enabled(bool enable) = 0;

  // track状态有Live、ended两种. 终止之后的track不会再进入Live状态.
  virtual TrackState state() const = 0;

 protected:
  ~MediaStreamTrackInterface() override = default;
};

在实现video stream track和audio stream tack的时候会继承该类,这样就可以使用父类的API访问子类的方法,这两个类的实现如下:

class RTC_EXPORT AudioTrackInterface : public MediaStreamTrackInterface {
    
    
 public:
  virtual AudioSourceInterface* GetSource() const = 0;

  // 添加和删除从track接收数据的sink
  virtual void AddSink(AudioTrackSinkInterface* sink) = 0;
  virtual void RemoveSink(AudioTrackSinkInterface* sink) = 0;

  // 从audio track获取信号的幅度.
  virtual bool GetSignalLevel(int* level);

  // 获取用于 audio track的audio processor. 如果该track没有任何processor,则返回 null。
  virtual rtc::scoped_refptr<AudioProcessorInterface> GetAudioProcessor();

 protected:
  ~AudioTrackInterface() override = default;
};


class RTC_EXPORT VideoTrackInterface
    : public MediaStreamTrackInterface,
      public rtc::VideoSourceInterface<VideoFrame> {
    
    
 public:
  // Video track 内容提示,帮助视频处理和编解码算法对不同内容设置不同参数或采用不同方法,Fluid适合电影游戏类,动作比较重要,而Detailed和Text适合演讲、网页以及字、绘画等场景,Detailed更在意静态的细节,而text更针对文字场景。
  //不同的音视频增强和编解码技术在处理不同的音视频内容的时候可能是最优的,比如WebRTC的APM模块主要是针对语音调优的参数,对音乐就不是最优的,类似的对于视频内容,如果是文本类视频,其对量化是敏感的,而电影和游戏对量化则没那么敏感。
  //
  // 参考 https://crbug.com/653531 and https://w3c.github.io/mst-content-hint.
  enum class ContentHint {
    
     kNone, kFluid, kDetailed, kText };

  // 为该track注册 video sink . 用于将track和video engine连接。
  void AddOrUpdateSink(rtc::VideoSinkInterface<VideoFrame>* sink,
                       const rtc::VideoSinkWants& wants) override {
    
    }
  void RemoveSink(rtc::VideoSinkInterface<VideoFrame>* sink) override {
    
    }

  virtual VideoTrackSourceInterface* GetSource() const = 0;

  virtual ContentHint content_hint() const;
  virtual void set_content_hint(ContentHint hint) {
    
    }

 protected:
  ~VideoTrackInterface() override = default;
};

这两个也都是接口类,为了线程安全,WebRTC使用了proxy的方法封装了该类实现的调用,audio和video对应track方法实现位于

webrtc/pc/audio_track.cc和webrtc/pc/video_track.cc两个文件。

MediaStream类定义如下:

//third_party/webrtc/api/media_stream_interface.h
// C++ version of https://www.w3.org/TR/mediacapture-streams/#mediastream.
// 由于PeerConnection/RtpReceiver接收到的远端audio/video tracks不能简单通过将他们添加到同一个MediaStream中同步,SDP协议中描述会话的msid(MediaStream ID)属性需要传递下去以用于同步多媒体源之间的同步,因而MediaStreamInterface接口类仅存储tracks。
class MediaStreamInterface : public rtc::RefCountInterface,
                             public NotifierInterface {
    
    
 public:
  virtual std::string id() const = 0;

  virtual AudioTrackVector GetAudioTracks() = 0;
  virtual VideoTrackVector GetVideoTracks() = 0;
  virtual rtc::scoped_refptr<AudioTrackInterface> FindAudioTrack(
      const std::string& track_id) = 0;
  virtual rtc::scoped_refptr<VideoTrackInterface> FindVideoTrack(
      const std::string& track_id) = 0;

  // Takes ownership of added tracks.
  // Note: Default implementations are for avoiding link time errors in
  // implementations that mock this API.
  // TODO(bugs.webrtc.org/13980): Remove default implementations.
  virtual bool AddTrack(rtc::scoped_refptr<AudioTrackInterface> track) {
    
    
    RTC_CHECK_NOTREACHED();
  }
  virtual bool AddTrack(rtc::scoped_refptr<VideoTrackInterface> track) {
    
    
    RTC_CHECK_NOTREACHED();
  }
  virtual bool RemoveTrack(rtc::scoped_refptr<AudioTrackInterface> track) {
    
    
    RTC_CHECK_NOTREACHED();
  }
  virtual bool RemoveTrack(rtc::scoped_refptr<VideoTrackInterface> track) {
    
    
    RTC_CHECK_NOTREACHED();
  }

 protected:
  ~MediaStreamInterface() override = default;
};

MediaStream是多个track的聚合,这也意味着可以有多个video(共享屏幕、camera采集等)和audio track的情况,在MediaStream将video和audio用不同vector存储起来,类型分别是AudioTrackInterface和VideoTrackInterface。

//third_party/webrtc/api/media_stream_interface.h
typedef std::vector<rtc::scoped_refptr<AudioTrackInterface> > AudioTrackVector;
typedef std::vector<rtc::scoped_refptr<VideoTrackInterface> > VideoTrackVector;

1.10 voiceEngine

voiceEngine这个类里比较重要的几个成员变量定义如下,如英文注释所示,包括了ADM、APM以及编解码相关的,之所以没有直接使用ACM模块,这是因为ACM涉及到网络传输,这就涉及到网络抖动、丢包、延迟以及网络带宽的限制,因而ACM模块由NetEQ模块调用,但是NetEQ模块的jitter buffer配置放在了VoiceEngine里。

 // The audio device module.
  rtc::scoped_refptr<webrtc::AudioDeviceModule> adm_;
  rtc::scoped_refptr<webrtc::AudioEncoderFactory> encoder_factory_;
  rtc::scoped_refptr<webrtc::AudioDecoderFactory> decoder_factory_;
  rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer_;
  // The audio processing module.
  rtc::scoped_refptr<webrtc::AudioProcessing> apm_;
  // Asynchronous audio processing.
  webrtc::AudioFrameProcessor* const audio_frame_processor_;
  // The primary instance of WebRtc VoiceEngine.
  rtc::scoped_refptr<webrtc::AudioState> audio_state_;
  std::vector<AudioCodec> send_codecs_;
  std::vector<AudioCodec> recv_codecs_;


  // Jitter buffer settings for new streams.
  size_t audio_jitter_buffer_max_packets_ = 200;
  bool audio_jitter_buffer_fast_accelerate_ = false;
  int audio_jitter_buffer_min_delay_ms_ = 0;

media_engine_里直接调用了voice engine的init方法,WebRTC的webrtc::cricket::WebRtcVoiceEngine类的init方法的核心如下代码片段,该函数需要ADM模块,ADM模块可以在构造的时候作为参数传递进来,也可以默认创建平台相关的ADM模块,然后根据配置创建AudioState对象,在创建好AudioState对象之后,为ADM模块注册一个AudioTranport对象,这样就将ADM和音频发送路径连接了起来。这个方法主要的流程如下:

void WebRtcVoiceEngine::Init() {
    
    

// Load our audio codec lists.
send_codecs_ = CollectCodecs(encoder_factory_->GetSupportedEncoders());
recv_codecs_ = CollectCodecs(decoder_factory_->GetSupportedDecoders());

// No ADM supplied? Create a default one. 在创建ppeer_connection_factory_时ADM参数等于NULL,故这里创建平台默认的ADM对象。这里见2.1
if (!adm_) {
    
    
  adm_ = webrtc::AudioDeviceModule::Create(
      webrtc::AudioDeviceModule::kPlatformDefaultAudio, task_queue_factory_);
}

webrtc::adm_helpers::Init(adm());

// Set up AudioState.
  {
    
    
  webrtc::AudioState::Config config;
  if (audio_mixer_) {
    
    
    config.audio_mixer = audio_mixer_;
  } else {
    
    
    config.audio_mixer = webrtc::AudioMixerImpl::Create();
  }
  config.audio_processing = apm_;
  config.audio_device_module = adm_;
  if (audio_frame_processor_)
    config.async_audio_processing_factory =
        rtc::make_ref_counted<webrtc::AsyncAudioProcessing::Factory>(
            *audio_frame_processor_, *task_queue_factory_);
  audio_state_ = webrtc::AudioState::Create(config);
}

  // Connect the ADM to our audio path.
adm()->RegisterAudioCallback(audio_state()->audio_transport());

  initialized_ = true;
}

这里将audio_transport注册给到(audio_device_impl.cc)adm模块,这最终会注册到(audio_device_buffer.cc)AudioDeviceBuffer对象中,因为这存放了音频数据,当采集数据来了之后,通过回调的方式完成对数据的处理,比如编码发送。

adm()->RegisterAudioCallback(audio_state()->audio_transport());

ADM调用的RegisterAudioCallback会调用AduioDeviceBuffer::RegisterAudioCallback当ADM采集到数据之后,会调用tranport层的RecordedDataIsAvailable方法,将数据的后续处理权交给transport层。

//webrtc/modules/audio_device/audio_device_buffer.cc
// Not used in Chromium. Process captured audio and distribute to all sending
// streams, and try to do this at the lowest possible sample rate.
int32_t AudioTransportImpl::RecordedDataIsAvailable(
    const void* audio_data,
    const size_t number_of_frames,
    const size_t bytes_per_sample,
    const size_t number_of_channels,
    const uint32_t sample_rate,
    const uint32_t audio_delay_milliseconds,
    const int32_t /*clock_drift*/,
    const uint32_t /*volume*/,
    const bool key_pressed,
    uint32_t& /*new_mic_volume*/,
    const int64_t
        estimated_capture_time_ns) {
    
      // NOLINT: to avoid changing APIs
  RTC_DCHECK(audio_data);
  RTC_DCHECK_GE(number_of_channels, 1);
  RTC_DCHECK_LE(number_of_channels, 2);
  RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
  RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
  // 100 = 1 second / data duration (10 ms).
  RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
  RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
                AudioFrame::kMaxDataSizeBytes);

  int send_sample_rate_hz = 0;
  size_t send_num_channels = 0;
  bool swap_stereo_channels = false;
  {
    
    
    MutexLock lock(&capture_lock_);
    send_sample_rate_hz = send_sample_rate_hz_;
    send_num_channels = send_num_channels_;
    swap_stereo_channels = swap_stereo_channels_;
  }

  std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
  InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
                         send_num_channels, audio_frame.get());
  voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
                        number_of_frames, number_of_channels, sample_rate,
                        &capture_resampler_, audio_frame.get());
  ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
                      swap_stereo_channels, audio_processing_,
                      audio_frame.get());
  audio_frame->set_absolute_capture_timestamp_ms(estimated_capture_time_ns /
                                                 1000000);

  RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
  if (async_audio_processing_)
    async_audio_processing_->Process(std::move(audio_frame));
  else
    SendProcessedData(std::move(audio_frame));

  return 0;
}

因可能存在混音,因而需要创建audio mixer,audiostate保存了多个webrtc::Call实例需要用音频处理时的音频状态(apm、adm以及amxier)
具体的音频数据流通路之前,先看一下AudioTranport对象的定义:

//webrtc/modules/audio_device/include/audio_device_defines.h
class AudioTransport {
    
    
 public:
//硬件设备采集到麦克风数据回调函数,keyPressed是键盘按键是否按下的标志,方便APM处理
  virtual int32_t RecordedDataIsAvailable(
      const void* audioSamples,
      size_t nSamples,
      size_t nBytesPerSample,
      size_t nChannels,
      uint32_t samplesPerSec,
      uint32_t totalDelayMS,
      int32_t clockDrift,
      uint32_t currentMicLevel,
      bool keyPressed,
      uint32_t& newMicLevel,
      int64_t estimatedCaptureTimeNS) {
    
      // NOLINT

    return RecordedDataIsAvailable(
        audioSamples, nSamples, nBytesPerSample, nChannels, samplesPerSec,
        totalDelayMS, clockDrift, currentMicLevel, keyPressed, newMicLevel);
  }

  // 播放音频流
  virtual int32_t NeedMorePlayData(size_t nSamples,
                                   size_t nBytesPerSample,
                                   size_t nChannels,
                                   uint32_t samplesPerSec,
                                   void* audioSamples,
                                   size_t& nSamplesOut,  // NOLINT
                                   int64_t* elapsed_time_ms,
                                   int64_t* ntp_time_ms) = 0;  // NOLINT

  // 这是chrom上的NeedMorePlayData方法
  virtual void PullRenderData(int bits_per_sample,
                              int sample_rate,
                              size_t number_of_channels,
                              size_t number_of_frames,
                              void* audio_data,
                              int64_t* elapsed_time_ms,
                              int64_t* ntp_time_ms) = 0;

 protected:
  virtual ~AudioTransport() {
    
    }
};

1.11 Audio Encoder/Decoder Factory

1.10小节的voiceEngine里并没有直接使用使用AudioEncoder类,而是使用了AudioEncoderFactory对创建编码器进行了封装,Audio encoder factory 用于创建各种 audio codec 编码的 encoder 对象,audio decoder factory 则用于创建各种 audio codec 解码的 decoder 对象。

WebRTC 的 Audio Encoder Factory 接口的定义(位于 webrtc/src/api/audio_codecs/audio_encoder_factory.h)如下:

namespace webrtc {
    
    

// A factory that creates AudioEncoders.
class AudioEncoderFactory : public rtc::RefCountInterface {
    
    
 public:
  // Returns a prioritized list of audio codecs, to use for signaling etc.
  virtual std::vector<AudioCodecSpec> GetSupportedEncoders() = 0;

  // Returns information about how this format would be encoded, provided it's
  // supported. More format and format variations may be supported than those
  // returned by GetSupportedEncoders().
  virtual absl::optional<AudioCodecInfo> QueryAudioEncoder(
      const SdpAudioFormat& format) = 0;

  // Creates an AudioEncoder for the specified format. The encoder will tags its
  // payloads with the specified payload type. The `codec_pair_id` argument is
  // used to link encoders and decoders that talk to the same remote entity: if
  // a AudioEncoderFactory::MakeAudioEncoder() and a
  // AudioDecoderFactory::MakeAudioDecoder() call receive non-null IDs that
  // compare equal, the factory implementations may assume that the encoder and
  // decoder form a pair. (The intended use case for this is to set up
  // communication between the AudioEncoder and AudioDecoder instances, which is
  // needed for some codecs with built-in bandwidth adaptation.)
  //
  // Note: Implementations need to be robust against combinations other than
  // one encoder, one decoder getting the same ID; such encoders must still
  // work.
  //
  // TODO(ossu): Try to avoid audio encoders having to know their payload type.
  virtual std::unique_ptr<AudioEncoder> MakeAudioEncoder(
      int payload_type,
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) = 0;
};

}  // namespace webrtc

直觉上,AudioEncoderFactory 应该使用类似于组合模式的方式实现:

  • 首先为每个 audio codec 实现一个 AudioEncoderFactory 的子类实现,比如定义一个 AudioEncoderFactory 的子类实现用于创建 OPUS 的 encoder,定义一个 AudioEncoderFactory 的子类实现用于创建 AAC 的 encoder 等;
  • 然后创建一个组合的 AudioEncoderFactory 子类实现,作为所有要支持的 audio codec 的 encoder factory 的容器,并借助于各个 audio codec 的 encoder factory 实现来实现其接口功能;
  • 最后创建一个 AudioEncoderFactory 的工厂方法,该方法中创建组合的 AudioEncoderFactory 子类对象,且创建每个要支持的 audio codec 的 encoder factory 对象并注册给组合的 AudioEncoderFactory 子类对象,然后返回组合的 AudioEncoderFactory 子类对象。

一个可能的实现如下。首先是 audio encoder factory 的工厂方法声明:

#ifndef API_AUDIO_CODECS_FAKE_AUDIO_ENCODER_FACTORY_H_
#define API_AUDIO_CODECS_FAKE_AUDIO_ENCODER_FACTORY_H_

#include "api/audio_codecs/audio_encoder_factory.h"
#include "rtc_base/scoped_ref_ptr.h"

namespace webrtc {
    
    

class CodecAudioEncoderFactory: public AudioEncoderFactory {
    
    
public:
  virtual bool IsSupported(const SdpAudioFormat &format) = 0;
};

// Creates a new factory that can create the built-in types of audio encoders.
// NOTE: This function is still under development and may change without notice.
rtc::scoped_refptr<AudioEncoderFactory> CreateBuiltinAudioEncoderFactory();

}  // namespace webrtc

#endif /* API_AUDIO_CODECS_FAKE_AUDIO_ENCODER_FACTORY_H_ */

除了 audio encoder factory 的工厂方法声明外,还创建了一个新的 AudioEncoderFactory 子类,用于描述具体的 audio codec 的 encoder factory 的接口 CodecAudioEncoderFactory, 并为该接口类添加了一个接口函数 IsSupported(),用于判断一个 encoder factory 对于特定 SdpFormat 的支持情况,以辅助组合的 AudioEncoderFactory 子类对象的实现。

然后是相关几个类的实现:

#include "fake_audio_encoder_factory.h"
#include <vector>
#include "rtc_base/refcountedobject.h"

namespace webrtc {
    
    

class OpusEncoderFactory : public CodecAudioEncoderFactory {
    
    
public:
  std::vector<AudioCodecSpec> GetSupportedEncoders() override {
    
    
    std::vector<AudioCodecSpec> specs;

    return specs;
  }

  absl::optional<AudioCodecInfo> QueryAudioEncoder(const SdpAudioFormat &format)
      override {
    
    
    return absl::nullopt;
  }

  std::unique_ptr<AudioEncoder> MakeAudioEncoder(int payload_type,
      const SdpAudioFormat &format,
      absl::optional<AudioCodecPairId> codec_pair_id) override {
    
    

    return nullptr;
  }

  bool IsSupported(const SdpAudioFormat &format) override {
    
    
    return true;
  }
};

class FakeAudioEncoderFactory: public AudioEncoderFactory {
    
    
public:
  std::vector<AudioCodecSpec> GetSupportedEncoders() override {
    
    
    std::vector<AudioCodecSpec> specs;

    for (auto &factory : audio_encoder_factories) {
    
    
      specs.insert(specs.end(), factory->GetSupportedEncoders().begin(), factory->GetSupportedEncoders().end());
    }

    return specs;
  }

  absl::optional<AudioCodecInfo> QueryAudioEncoder(const SdpAudioFormat &format)
      override {
    
    
    for (auto &factory : audio_encoder_factories) {
    
    
      if (factory->IsSupported(format)) {
    
    
        return factory->QueryAudioEncoder(format);
      }
    }

    return absl::nullopt;
  }

  std::unique_ptr<AudioEncoder> MakeAudioEncoder(int payload_type,
      const SdpAudioFormat &format,
      absl::optional<AudioCodecPairId> codec_pair_id) override {
    
    
    for (auto &factory : audio_encoder_factories) {
    
    
      if (factory->IsSupported(format)) {
    
    
        return factory->MakeAudioEncoder(payload_type, format, codec_pair_id);
      }
    }

    return nullptr;
  }

  void AddAudioEncoderFactory(rtc::scoped_refptr<CodecAudioEncoderFactory> factory) {
    
    
    audio_encoder_factories.push_back(factory);
  }

private:
  std::vector<rtc::scoped_refptr<CodecAudioEncoderFactory>> audio_encoder_factories;
};

rtc::scoped_refptr<AudioEncoderFactory> CreateBuiltinAudioEncoderFactory() {
    
    
  rtc::scoped_refptr<FakeAudioEncoderFactory> factory(new rtc::RefCountedObject<FakeAudioEncoderFactory>);

  rtc::scoped_refptr<OpusEncoderFactory> opus_factory(new rtc::RefCountedObject<OpusEncoderFactory>);
  factory->AddAudioEncoderFactory(opus_factory);

  return factory;
}

然而,WebRTC 的 builtin audio encoder factory 没有以类似的这种方式实现。

webrtc/src/api/audio_codecs/builtin_audio_encoder_factory.h 文件中声明了用于创建 AudioEncoderFactory 的工厂方法:

namespace webrtc {
    
    

// Creates a new factory that can create the built-in types of audio encoders.
// NOTE: This function is still under development and may change without notice.
rtc::scoped_refptr<AudioEncoderFactory> CreateBuiltinAudioEncoderFactory();

}  // namespace webrtc

webrtc/src/api/audio_codecs/builtin_audio_encoder_factory.cc 文件中,CreateBuiltinAudioEncoderFactory() 函数的实现如下:

namespace webrtc {
    
    

namespace {
    
    

// Modify an audio encoder to not advertise support for anything.
template <typename T>
struct NotAdvertised {
    
    
  using Config = typename T::Config;
  static absl::optional<Config> SdpToConfig(
      const SdpAudioFormat& audio_format) {
    
    
    return T::SdpToConfig(audio_format);
  }
  static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs) {
    
    
    // Don't advertise support for anything.
  }
  static AudioCodecInfo QueryAudioEncoder(const Config& config) {
    
    
    return T::QueryAudioEncoder(config);
  }
  static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
      const Config& config,
      int payload_type,
      absl::optional<AudioCodecPairId> codec_pair_id = absl::nullopt) {
    
    
    return T::MakeAudioEncoder(config, payload_type, codec_pair_id);
  }
};

}  // namespace

rtc::scoped_refptr<AudioEncoderFactory> CreateBuiltinAudioEncoderFactory() {
    
    
  return CreateAudioEncoderFactory<

#if WEBRTC_USE_BUILTIN_OPUS
      AudioEncoderOpus,
#endif

      AudioEncoderIsac, AudioEncoderG722,

#if WEBRTC_USE_BUILTIN_ILBC
      AudioEncoderIlbc,
#endif

      AudioEncoderG711, NotAdvertised<AudioEncoderL16>>();
}

}  // namespace webrtc

CreateBuiltinAudioEncoderFactory() 函数的实现中规中矩,它调用了一个模板函数 CreateAudioEncoderFactory() 创建 audio encoder factory,多个带有 Encoder 字眼的 class 作为模板函数的类型参数。

模板函数 CreateAudioEncoderFactory() 的定义位于 webrtc/src/api/audio_codecs/audio_encoder_factory_template.h 文件中:

namespace webrtc {
    
    

namespace audio_encoder_factory_template_impl {
    
    

template <typename... Ts>
struct Helper;

// Base case: 0 template parameters.
template <>
struct Helper<> {
    
    
  static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs) {
    
    }
  static absl::optional<AudioCodecInfo> QueryAudioEncoder(
      const SdpAudioFormat& format) {
    
    
    return absl::nullopt;
  }
  static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
      int payload_type,
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) {
    
    
    return nullptr;
  }
};

// Inductive case: Called with n + 1 template parameters; calls subroutines
// with n template parameters.
template <typename T, typename... Ts>
struct Helper<T, Ts...> {
    
    
  static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs) {
    
    
    T::AppendSupportedEncoders(specs);
    Helper<Ts...>::AppendSupportedEncoders(specs);
  }
  static absl::optional<AudioCodecInfo> QueryAudioEncoder(
      const SdpAudioFormat& format) {
    
    
    auto opt_config = T::SdpToConfig(format);
    static_assert(std::is_same<decltype(opt_config),
                               absl::optional<typename T::Config>>::value,
                  "T::SdpToConfig() must return a value of type "
                  "absl::optional<T::Config>");
    return opt_config ? absl::optional<AudioCodecInfo>(
                            T::QueryAudioEncoder(*opt_config))
                      : Helper<Ts...>::QueryAudioEncoder(format);
  }
  static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
      int payload_type,
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) {
    
    
    auto opt_config = T::SdpToConfig(format);
    if (opt_config) {
    
    
      return T::MakeAudioEncoder(*opt_config, payload_type, codec_pair_id);
    } else {
    
    
      return Helper<Ts...>::MakeAudioEncoder(payload_type, format,
                                             codec_pair_id);
    }
  }
};

template <typename... Ts>
class AudioEncoderFactoryT : public AudioEncoderFactory {
    
    
 public:
  std::vector<AudioCodecSpec> GetSupportedEncoders() override {
    
    
    std::vector<AudioCodecSpec> specs;
    Helper<Ts...>::AppendSupportedEncoders(&specs);
    return specs;
  }

  absl::optional<AudioCodecInfo> QueryAudioEncoder(
      const SdpAudioFormat& format) override {
    
    
    return Helper<Ts...>::QueryAudioEncoder(format);
  }

  std::unique_ptr<AudioEncoder> MakeAudioEncoder(
      int payload_type,
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) override {
    
    
    return Helper<Ts...>::MakeAudioEncoder(payload_type, format, codec_pair_id);
  }
};

}  // namespace audio_encoder_factory_template_impl

// Make an AudioEncoderFactory that can create instances of the given encoders.
//
// Each encoder type is given as a template argument to the function; it should
// be a struct with the following static member functions:
//
//   // Converts |audio_format| to a ConfigType instance. Returns an empty
//   // optional if |audio_format| doesn't correctly specify an encoder of our
//   // type.
//   absl::optional<ConfigType> SdpToConfig(const SdpAudioFormat& audio_format);
//
//   // Appends zero or more AudioCodecSpecs to the list that will be returned
//   // by AudioEncoderFactory::GetSupportedEncoders().
//   void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
//
//   // Returns information about how this format would be encoded. Used to
//   // implement AudioEncoderFactory::QueryAudioEncoder().
//   AudioCodecInfo QueryAudioEncoder(const ConfigType& config);
//
//   // Creates an AudioEncoder for the specified format. Used to implement
//   // AudioEncoderFactory::MakeAudioEncoder().
//   std::unique_ptr<AudioDecoder> MakeAudioEncoder(
//       const ConfigType& config,
//       int payload_type,
//       absl::optional<AudioCodecPairId> codec_pair_id);
//
// ConfigType should be a type that encapsulates all the settings needed to
// create an AudioEncoder. T::Config (where T is the encoder struct) should
// either be the config type, or an alias for it.
//
// Whenever it tries to do something, the new factory will try each of the
// encoders in the order they were specified in the template argument list,
// stopping at the first one that claims to be able to do the job.
//
// NOTE: This function is still under development and may change without notice.
//
// TODO(kwiberg): Point at CreateBuiltinAudioEncoderFactory() for an example of
// how it is used.
template <typename... Ts>
rtc::scoped_refptr<AudioEncoderFactory> CreateAudioEncoderFactory() {
    
    
  // There's no technical reason we couldn't allow zero template parameters,
  // but such a factory couldn't create any encoders, and callers can do this
  // by mistake by simply forgetting the <> altogether. So we forbid it in
  // order to prevent caller foot-shooting.
  static_assert(sizeof...(Ts) >= 1,
                "Caller must give at least one template parameter");

  return rtc::scoped_refptr<AudioEncoderFactory>(
      new rtc::RefCountedObject<
          audio_encoder_factory_template_impl::AudioEncoderFactoryT<Ts...>>());
}

}  // namespace webrtc

对照我们前面实现的 FakeAudioEncoderFactory 来理解 WebRTC 的实现:

  • Audio codec encoder 的 factory 的列表不是一个动态的列表,而是借助于模板机制构建的静态的列表;

  • 模板类 Helper 充当了 audio codec encoder 的 factory 列表的遍历者和访问者的角色;

  1. audio codec encoder factory 的接口没有复用 AudioEncoderFactory,而是隐式地定义了另一个接口,如 CreateAudioEncoderFactory() 模板函数的注释中的说明,这个接口包含如下几个成员函数:
  absl::optional<ConfigType> SdpToConfig(const SdpAudioFormat& audio_format);
  void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
  AudioCodecInfo QueryAudioEncoder(const ConfigType& config);
  std::unique_ptr<AudioDecoder> MakeAudioEncoder(
      const ConfigType& config,
      int payload_type,
      absl::optional<AudioCodecPairId> codec_pair_id);
  1. AudioEncoderFactory 接口的最终实现者为 AudioEncoderFactoryT,它的各个接口实现主要借助于模板类 Helper 完成。
  2. 我们前面为 CodecAudioEncoderFactory 接口添加的 IsSupported() 大体等价于 WebRTC 的 SdpToConfig()

可以具体看一个 *Encoder* 的实现,如 AudioEncoderOpus 的声明如下 (位于 webrtc/src/api/audio_codecs/opus/audio_encoder_opus.h):

namespace webrtc {
    
    

// Opus encoder API for use as a template parameter to
// CreateAudioEncoderFactory<...>().
//
// NOTE: This struct is still under development and may change without notice.
struct AudioEncoderOpus {
    
    
  using Config = AudioEncoderOpusConfig;
  static absl::optional<AudioEncoderOpusConfig> SdpToConfig(
      const SdpAudioFormat& audio_format);
  static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
  static AudioCodecInfo QueryAudioEncoder(const AudioEncoderOpusConfig& config);
  static std::unique_ptr<AudioEncoder> MakeAudioEncoder(
      const AudioEncoderOpusConfig& config,
      int payload_type,
      absl::optional<AudioCodecPairId> codec_pair_id = absl::nullopt);
};

}  // namespace webrtc

AudioEncoderOpus 的实现如下 (位于 webrtc/src/api/audio_codecs/opus/audio_encoder_opus.cc):

namespace webrtc {
    
    

absl::optional<AudioEncoderOpusConfig> AudioEncoderOpus::SdpToConfig(
    const SdpAudioFormat& format) {
    
    
  return AudioEncoderOpusImpl::SdpToConfig(format);
}

void AudioEncoderOpus::AppendSupportedEncoders(
    std::vector<AudioCodecSpec>* specs) {
    
    
  AudioEncoderOpusImpl::AppendSupportedEncoders(specs);
}

AudioCodecInfo AudioEncoderOpus::QueryAudioEncoder(
    const AudioEncoderOpusConfig& config) {
    
    
  return AudioEncoderOpusImpl::QueryAudioEncoder(config);
}

std::unique_ptr<AudioEncoder> AudioEncoderOpus::MakeAudioEncoder(
    const AudioEncoderOpusConfig& config,
    int payload_type,
    absl::optional<AudioCodecPairId> /*codec_pair_id*/) {
    
    
  return AudioEncoderOpusImpl::MakeAudioEncoder(config, payload_type);
}

}  // namespace webrtc

尽管 AudioEncoderOpus 名字中只有 encoder 字眼,没有 factory 字眼,但它却是个货真价实的 encoder factory。

WebRTC 的 decoder factory 的实现与其 encoder factory 的实现非常相似。

webrtc/src/api/audio_codecs/audio_decoder_factory.h 文件中定义了 AudioDecoderFactory 接口:

namespace webrtc {
    
    

// A factory that creates AudioDecoders.
// NOTE: This class is still under development and may change without notice.
class AudioDecoderFactory : public rtc::RefCountInterface {
    
    
 public:
  virtual std::vector<AudioCodecSpec> GetSupportedDecoders() = 0;

  virtual bool IsSupportedDecoder(const SdpAudioFormat& format) = 0;

  // Create a new decoder instance. The `codec_pair_id` argument is used to
  // link encoders and decoders that talk to the same remote entity; if a
  // MakeAudioEncoder() and a MakeAudioDecoder() call receive non-null IDs that
  // compare equal, the factory implementations may assume that the encoder and
  // decoder form a pair.
  //
  // Note: Implementations need to be robust against combinations other than
  // one encoder, one decoder getting the same ID; such decoders must still
  // work.
  virtual std::unique_ptr<AudioDecoder> MakeAudioDecoder(
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) = 0;
};

}  // namespace webrtc

文件 webrtc/src/api/audio_codecs/builtin_audio_decoder_factory.h 中声明了 CreateBuiltinAudioDecoderFactory() 工厂方法:

namespace webrtc {
    
    

// Creates a new factory that can create the built-in types of audio decoders.
// NOTE: This function is still under development and may change without notice.
rtc::scoped_refptr<AudioDecoderFactory> CreateBuiltinAudioDecoderFactory();

}  // namespace webrtc

文件 webrtc/src/api/audio_codecs/builtin_audio_decoder_factory.ccCreateBuiltinAudioDecoderFactory() 工厂方法的定义如下:

namespace webrtc {
    
    

namespace {
    
    

// Modify an audio decoder to not advertise support for anything.
template <typename T>
struct NotAdvertised {
    
    
  using Config = typename T::Config;
  static absl::optional<Config> SdpToConfig(
      const SdpAudioFormat& audio_format) {
    
    
    return T::SdpToConfig(audio_format);
  }
  static void AppendSupportedDecoders(std::vector<AudioCodecSpec>* specs) {
    
    
    // Don't advertise support for anything.
  }
  static std::unique_ptr<AudioDecoder> MakeAudioDecoder(
      const Config& config,
      absl::optional<AudioCodecPairId> codec_pair_id = absl::nullopt) {
    
    
    return T::MakeAudioDecoder(config, codec_pair_id);
  }
};

}  // namespace

rtc::scoped_refptr<AudioDecoderFactory> CreateBuiltinAudioDecoderFactory() {
    
    
  return CreateAudioDecoderFactory<

#if WEBRTC_USE_BUILTIN_OPUS
      AudioDecoderOpus,
#endif

      AudioDecoderIsac, AudioDecoderG722,

#if WEBRTC_USE_BUILTIN_ILBC
      AudioDecoderIlbc,
#endif

      AudioDecoderG711, NotAdvertised<AudioDecoderL16>>();
}

}  // namespace webrtc

其中的 CreateAudioDecoderFactory() 模板函数的定义位于 webrtc/src/api/audio_codecs/audio_decoder_factory_template.h

namespace webrtc {
    
    

namespace audio_decoder_factory_template_impl {
    
    

template <typename... Ts>
struct Helper;

// Base case: 0 template parameters.
template <>
struct Helper<> {
    
    
  static void AppendSupportedDecoders(std::vector<AudioCodecSpec>* specs) {
    
    }
  static bool IsSupportedDecoder(const SdpAudioFormat& format) {
    
     return false; }
  static std::unique_ptr<AudioDecoder> MakeAudioDecoder(
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) {
    
    
    return nullptr;
  }
};

// Inductive case: Called with n + 1 template parameters; calls subroutines
// with n template parameters.
template <typename T, typename... Ts>
struct Helper<T, Ts...> {
    
    
  static void AppendSupportedDecoders(std::vector<AudioCodecSpec>* specs) {
    
    
    T::AppendSupportedDecoders(specs);
    Helper<Ts...>::AppendSupportedDecoders(specs);
  }
  static bool IsSupportedDecoder(const SdpAudioFormat& format) {
    
    
    auto opt_config = T::SdpToConfig(format);
    static_assert(std::is_same<decltype(opt_config),
                               absl::optional<typename T::Config>>::value,
                  "T::SdpToConfig() must return a value of type "
                  "absl::optional<T::Config>");
    return opt_config ? true : Helper<Ts...>::IsSupportedDecoder(format);
  }
  static std::unique_ptr<AudioDecoder> MakeAudioDecoder(
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) {
    
    
    auto opt_config = T::SdpToConfig(format);
    return opt_config ? T::MakeAudioDecoder(*opt_config, codec_pair_id)
                      : Helper<Ts...>::MakeAudioDecoder(format, codec_pair_id);
  }
};

template <typename... Ts>
class AudioDecoderFactoryT : public AudioDecoderFactory {
    
    
 public:
  std::vector<AudioCodecSpec> GetSupportedDecoders() override {
    
    
    std::vector<AudioCodecSpec> specs;
    Helper<Ts...>::AppendSupportedDecoders(&specs);
    return specs;
  }

  bool IsSupportedDecoder(const SdpAudioFormat& format) override {
    
    
    return Helper<Ts...>::IsSupportedDecoder(format);
  }

  std::unique_ptr<AudioDecoder> MakeAudioDecoder(
      const SdpAudioFormat& format,
      absl::optional<AudioCodecPairId> codec_pair_id) override {
    
    
    return Helper<Ts...>::MakeAudioDecoder(format, codec_pair_id);
  }
};

}  // namespace audio_decoder_factory_template_impl

// Make an AudioDecoderFactory that can create instances of the given decoders.
//
// Each decoder type is given as a template argument to the function; it should
// be a struct with the following static member functions:
//
//   // Converts |audio_format| to a ConfigType instance. Returns an empty
//   // optional if |audio_format| doesn't correctly specify an decoder of our
//   // type.
//   absl::optional<ConfigType> SdpToConfig(const SdpAudioFormat& audio_format);
//
//   // Appends zero or more AudioCodecSpecs to the list that will be returned
//   // by AudioDecoderFactory::GetSupportedDecoders().
//   void AppendSupportedDecoders(std::vector<AudioCodecSpec>* specs);
//
//   // Creates an AudioDecoder for the specified format. Used to implement
//   // AudioDecoderFactory::MakeAudioDecoder().
//   std::unique_ptr<AudioDecoder> MakeAudioDecoder(
//       const ConfigType& config,
//       absl::optional<AudioCodecPairId> codec_pair_id);
//
// ConfigType should be a type that encapsulates all the settings needed to
// create an AudioDecoder. T::Config (where T is the decoder struct) should
// either be the config type, or an alias for it.
//
// Whenever it tries to do something, the new factory will try each of the
// decoder types in the order they were specified in the template argument
// list, stopping at the first one that claims to be able to do the job.
//
// NOTE: This function is still under development and may change without notice.
//
// TODO(kwiberg): Point at CreateBuiltinAudioDecoderFactory() for an example of
// how it is used.
template <typename... Ts>
rtc::scoped_refptr<AudioDecoderFactory> CreateAudioDecoderFactory() {
    
    
  // There's no technical reason we couldn't allow zero template parameters,
  // but such a factory couldn't create any decoders, and callers can do this
  // by mistake by simply forgetting the <> altogether. So we forbid it in
  // order to prevent caller foot-shooting.
  static_assert(sizeof...(Ts) >= 1,
                "Caller must give at least one template parameter");

  return rtc::scoped_refptr<AudioDecoderFactory>(
      new rtc::RefCountedObject<
          audio_decoder_factory_template_impl::AudioDecoderFactoryT<Ts...>>());
}

}  // namespace webrtc

一个 audio codec decoder factory 的实现 AudioDecoderOpus 声明位于 webrtc/src/api/audio_codecs/opus/audio_decoder_opus.h

namespace webrtc {
    
    

// Opus decoder API for use as a template parameter to
// CreateAudioDecoderFactory<...>().
//
// NOTE: This struct is still under development and may change without notice.
struct AudioDecoderOpus {
    
    
  struct Config {
    
    
    int num_channels;
  };
  static absl::optional<Config> SdpToConfig(const SdpAudioFormat& audio_format);
  static void AppendSupportedDecoders(std::vector<AudioCodecSpec>* specs);
  static std::unique_ptr<AudioDecoder> MakeAudioDecoder(
      Config config,
      absl::optional<AudioCodecPairId> codec_pair_id = absl::nullopt);
};

}  // namespace webrtc

AudioDecoderOpus 的定义位于 webrtc/src/api/audio_codecs/opus/audio_decoder_opus.cc

namespace webrtc {
    
    

absl::optional<AudioDecoderOpus::Config> AudioDecoderOpus::SdpToConfig(
    const SdpAudioFormat& format) {
    
    
  const auto num_channels = [&]() -> absl::optional<int> {
    
    
    auto stereo = format.parameters.find("stereo");
    if (stereo != format.parameters.end()) {
    
    
      if (stereo->second == "0") {
    
    
        return 1;
      } else if (stereo->second == "1") {
    
    
        return 2;
      } else {
    
    
        return absl::nullopt;  // Bad stereo parameter.
      }
    }
    return 1;  // Default to mono.
  }();

  if (STR_CASE_CMP(format.name.c_str(), "opus") == 0 &&
      format.clockrate_hz == 16000 && format.num_channels == 1 &&
      num_channels) {
    
    
    return Config{
    
    static_cast<int>(format.num_channels)};
  } else if (STR_CASE_CMP(format.name.c_str(), "opusswb") == 0 &&
      format.clockrate_hz == 32000 && format.num_channels == 1 &&
      num_channels) {
    
    
    return Config{
    
    static_cast<int>(format.num_channels)};
  } else if (STR_CASE_CMP(format.name.c_str(), "opusfb") == 0 &&
      format.clockrate_hz == 48000 && format.num_channels == 2 &&
      num_channels) {
    
    
    return Config{
    
    static_cast<int>(format.num_channels)};
  } else if (STR_CASE_CMP(format.name.c_str(), "opusfb") == 0 &&
      format.clockrate_hz == 48000 && format.num_channels == 1 &&
      num_channels) {
    
    
    return Config{
    
    static_cast<int>(format.num_channels)};
  } else {
    
    
    return absl::nullopt;
  }
}

void AudioDecoderOpus::AppendSupportedDecoders(
    std::vector<AudioCodecSpec>* specs) {
    
    
  AudioCodecInfo opus_info{
    
    48000, 1, 64000, 6000, 510000};
  opus_info.allow_comfort_noise = false;
  opus_info.supports_network_adaption = true;
  SdpAudioFormat opus_format(
      {
    
    "opus", 48000, 2, {
    
    {
    
    "minptime", "10"}, {
    
    "useinbandfec", "1"}}});
  specs->push_back({
    
    std::move(opus_format), std::move(opus_info)});
}

std::unique_ptr<AudioDecoder> AudioDecoderOpus::MakeAudioDecoder(
    Config config,
    absl::optional<AudioCodecPairId> /*codec_pair_id*/) {
    
    
  return absl::make_unique<AudioDecoderOpusImpl>(config.num_channels);
}

}  // namespace webrtc

WebRTC builtin audio decoder factory 和 builtin audio encoder factory 的实现套路几乎完全一样,此处不再赘述。

猜你喜欢

转载自blog.csdn.net/shichaog/article/details/128881021