2022-09-29发表2022-09-30更新WebRTC2 小时读完 (大约16473个字)

Android平台WebRTC开启H265编解码

不像H264，WebRTC已经在内部处理好了相关逻辑，我们只需要稍作修改即可实现H264编解码（硬编只需指定sdp，软编只需打开开关@see 《Android平台WebRTC开启H264软编解码》）。如要实现H265，不仅要增加接口，还需要添加封解RTP包的逻辑，本文基于M86。

开启H265编解码

打开支持

增加sdp支持：

sdk/android/api/org/webrtc/HardwareVideoEncoderFactory.java

    @Override
public VideoCodecInfo[] getSupportedCodecs() {
...
    for (VideoCodecMimeType type : new VideoCodecMimeType[] {
         VideoCodecMimeType.VP8, VideoCodecMimeType.VP9, VideoCodecMimeType.H264,
		    VideoCodecMimeType.H265})
...
    return supportedCodecInfos.toArray(new VideoCodecInfo[supportedCodecInfos.size()]);
}

增加sps/pps/vps支持：

sdk/android/src/java/org/webrtc/HardwareVideoEncoder.java

protected void deliverEncodedImage() {
...
    final ByteBuffer frameBuffer;
    if (isKeyFrame && (codecType == VideoCodecMimeType.H264
                       || codecType == VideoCodecMimeType.H265))
          
}

封包支持

增加解包器初始化入口

modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc

...
 #ifndef DISABLE_H265
 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h"
 #endif

...

 #ifndef DISABLE_H265
   case kVideoCodecH265:
     return std::make_unique<VideoRtpDepacketizerH265>();
 #endif

...

foramt基类增加H265支持

modules/rtp_rtcp/source/rtp_format.cc

...
 #ifndef DISABLE_H265
 #include "modules/rtp_rtcp/source/rtp_format_h265.h"
 #endif
...
 #ifndef DISABLE_H265
 #include "modules/video_coding/codecs/h265/include/h265_globals.h"
 #endif
...
std::unique_ptr<RtpPacketizer> RtpPacketizer::Create(
    absl::optional<VideoCodecType> type,
    rtc::ArrayView<const uint8_t> payload,
    PayloadSizeLimits limits,
    // Codec-specific details.
    const RTPVideoHeader& rtp_video_header) {
  if (!type) {
    // Use raw packetizer.
    return std::make_unique<RtpPacketizerGeneric>(payload, limits);
  }

  switch (*type) {
    case kVideoCodecH264: {
      const auto& h264 =
          absl::get<RTPVideoHeaderH264>(rtp_video_header.video_type_header);
      return std::make_unique<RtpPacketizerH264>(payload, limits,
                                                 h264.packetization_mode);
    }
 #ifndef DISABLE_H265
     case kVideoCodecH265: {
       const auto& h265 =
           absl::get<RTPVideoHeaderH265>(rtp_video_header.video_type_header);
       return absl::make_unique<RtpPacketizerH265>(
           payload, limits, h265.packetization_mode);
     }
#endif

...
  }
}

新增H265 format实现类rtp_format_h265：

modules/rtp_rtcp/source/rtp_format_h265.cc >folded

#include <string.h>

#include "absl/types/optional.h"
#include "absl/types/variant.h"

#include "common_video/h264/h264_common.h"
#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_pps_parser.h"
#include "common_video/h265/h265_sps_parser.h"
#include "common_video/h265/h265_vps_parser.h"
#include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/source/byte_io.h"
#include "modules/rtp_rtcp/source/rtp_format_h265.h"
#include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
#include "rtc_base/logging.h"

using namespace rtc;

namespace webrtc {
namespace {

enum NaluType {
  kTrailN = 0,
  kTrailR = 1,
  kTsaN = 2,
  kTsaR = 3,
  kStsaN = 4,
  kStsaR = 5,
  kRadlN = 6,
  kRadlR = 7,
  kBlaWLp = 16,
  kBlaWRadl = 17,
  kBlaNLp = 18,
  kIdrWRadl = 19,
  kIdrNLp = 20,
  kCra = 21,
  kVps = 32,
  kHevcSps = 33,
  kHevcPps = 34,
  kHevcAud = 35,
  kPrefixSei = 39,
  kSuffixSei = 40,
  kHevcAp = 48,
  kHevcFu = 49
};

/*
   0                   1                   2                   3
   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  |    PayloadHdr (Type=49)       |   FU header   | DONL (cond)   |
  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-|
*/
// Unlike H.264, HEVC NAL header is 2-bytes.
static const size_t kHevcNalHeaderSize = 2;
// H.265's FU is constructed of 2-byte payload header, and 1-byte FU header
static const size_t kHevcFuHeaderSize = 1;
static const size_t kHevcLengthFieldSize = 2;

enum HevcNalHdrMasks {
  kHevcFBit = 0x80,
  kHevcTypeMask = 0x7E,
  kHevcLayerIDHMask = 0x1,
  kHevcLayerIDLMask = 0xF8,
  kHevcTIDMask = 0x7,
  kHevcTypeMaskN = 0x81,
  kHevcTypeMaskInFuHeader = 0x3F
};

// Bit masks for FU headers.
enum HevcFuDefs { kHevcSBit = 0x80, kHevcEBit = 0x40, kHevcFuTypeBit = 0x3F };

}  // namespace

RtpPacketizerH265::RtpPacketizerH265(
    rtc::ArrayView<const uint8_t> payload,
    PayloadSizeLimits limits,
    H265PacketizationMode packetization_mode)
    : limits_(limits),
      num_packets_left_(0) {
  // Guard against uninitialized memory in packetization_mode.
  RTC_CHECK(packetization_mode == H265PacketizationMode::NonInterleaved ||
            packetization_mode == H265PacketizationMode::SingleNalUnit);

  for (const auto& nalu :
       H264::FindNaluIndices(payload.data(), payload.size())) {
    input_fragments_.push_back(
        payload.subview(nalu.payload_start_offset, nalu.payload_size));
  }

  if (!GeneratePackets(packetization_mode)) {
    // If failed to generate all the packets, discard already generated
    // packets in case the caller would ignore return value and still try to
    // call NextPacket().
    num_packets_left_ = 0;
    while (!packets_.empty()) {
      packets_.pop();
    }
  }
}

RtpPacketizerH265::~RtpPacketizerH265() {}

size_t RtpPacketizerH265::NumPackets() const {
  return num_packets_left_;
}

bool RtpPacketizerH265::GeneratePackets(
    H265PacketizationMode packetization_mode) {
  // For HEVC we follow non-interleaved mode for the packetization,
  // and don't support single-nalu mode at present.
  for (size_t i = 0; i < input_fragments_.size();) {
    int fragment_len = input_fragments_[i].size();
    int single_packet_capacity = limits_.max_payload_len;
    if (input_fragments_.size() == 1)
      single_packet_capacity -= limits_.single_packet_reduction_len;
    else if (i == 0)
      single_packet_capacity -= limits_.first_packet_reduction_len;
    else if (i + 1 == input_fragments_.size()) {
      // Pretend that last fragment is larger instead of making last packet
      // smaller.
      single_packet_capacity -= limits_.last_packet_reduction_len;
    }
    if (fragment_len > single_packet_capacity) {
      PacketizeFu(i);
      ++i;
    } else {
      PacketizeSingleNalu(i);
      ++i;
    }
  }
  return true;
}

bool RtpPacketizerH265::PacketizeFu(size_t fragment_index) {
  // Fragment payload into packets (FU).
  // Strip out the original header and leave room for the FU header.
  rtc::ArrayView<const uint8_t> fragment = input_fragments_[fragment_index];
  PayloadSizeLimits limits = limits_;
  limits.max_payload_len -= kHevcFuHeaderSize + kHevcNalHeaderSize;

  // Update single/first/last packet reductions unless it is single/first/last
  // fragment.
  if (input_fragments_.size() != 1) {
    // if this fragment is put into a single packet, it might still be the
    // first or the last packet in the whole sequence of packets.
    if (fragment_index == input_fragments_.size() - 1) {
      limits.single_packet_reduction_len = limits_.last_packet_reduction_len;
    } else if (fragment_index == 0) {
      limits.single_packet_reduction_len = limits_.first_packet_reduction_len;
    } else {
      limits.single_packet_reduction_len = 0;
    }
  }
  if (fragment_index != 0)
    limits.first_packet_reduction_len = 0;
  if (fragment_index != input_fragments_.size() - 1)
    limits.last_packet_reduction_len = 0;

  // Strip out the original header.
  size_t payload_left = fragment.size() - kHevcNalHeaderSize;
  int offset = kHevcNalHeaderSize;

  std::vector<int> payload_sizes = SplitAboutEqually(payload_left, limits);
  if (payload_sizes.empty())
    return false;

  for (size_t i = 0; i < payload_sizes.size(); ++i) {
    int packet_length = payload_sizes[i];
    RTC_CHECK_GT(packet_length, 0);
    uint16_t header = (fragment[0] << 8) | fragment[1];
    packets_.push(PacketUnit(fragment.subview(offset, packet_length),
                             /*first_fragment=*/i == 0,
                             /*last_fragment=*/i == payload_sizes.size() - 1,
                             false, header));
    offset += packet_length;
    payload_left -= packet_length;
  }
  num_packets_left_ += payload_sizes.size();
  RTC_CHECK_EQ(0, payload_left);
  return true;
}


bool RtpPacketizerH265::PacketizeSingleNalu(size_t fragment_index) {
  // Add a single NALU to the queue, no aggregation.
  size_t payload_size_left = limits_.max_payload_len;
  if (input_fragments_.size() == 1)
    payload_size_left -= limits_.single_packet_reduction_len;
  else if (fragment_index == 0)
    payload_size_left -= limits_.first_packet_reduction_len;
  else if (fragment_index + 1 == input_fragments_.size())
    payload_size_left -= limits_.last_packet_reduction_len;
  rtc::ArrayView<const uint8_t> fragment = input_fragments_[fragment_index];
  if (payload_size_left < fragment.size()) {
    RTC_LOG(LS_ERROR) << "Failed to fit a fragment to packet in SingleNalu "
                         "packetization mode. Payload size left "
                      << payload_size_left << ", fragment length "
                      << fragment.size() << ", packet capacity "
                      << limits_.max_payload_len;
    return false;
  }
  RTC_CHECK_GT(fragment.size(), 0u);
  packets_.push(PacketUnit(fragment, true /* first */, true /* last */,
                           false /* aggregated */, fragment[0]));
  ++num_packets_left_;
  return true;
}

int RtpPacketizerH265::PacketizeAp(size_t fragment_index) {
  // Aggregate fragments into one packet (STAP-A).
  size_t payload_size_left = limits_.max_payload_len;
  if (input_fragments_.size() == 1)
    payload_size_left -= limits_.single_packet_reduction_len;
  else if (fragment_index == 0)
    payload_size_left -= limits_.first_packet_reduction_len;
  int aggregated_fragments = 0;
  size_t fragment_headers_length = 0;
  rtc::ArrayView<const uint8_t> fragment = input_fragments_[fragment_index];
  RTC_CHECK_GE(payload_size_left, fragment.size());
  ++num_packets_left_;

  auto payload_size_needed = [&] {
    size_t fragment_size = fragment.size() + fragment_headers_length;
    if (input_fragments_.size() == 1) {
      // Single fragment, single packet, payload_size_left already adjusted
      // with limits_.single_packet_reduction_len.
      return fragment_size;
    }
    if (fragment_index == input_fragments_.size() - 1) {
      // Last fragment, so StrapA might be the last packet.
      return fragment_size + limits_.last_packet_reduction_len;
    }
    return fragment_size;
  };

  while (payload_size_left >= payload_size_needed()) {
    RTC_CHECK_GT(fragment.size(), 0);
    packets_.push(PacketUnit(fragment, aggregated_fragments == 0, false, true,
                             fragment[0]));
    payload_size_left -= fragment.size();
    payload_size_left -= fragment_headers_length;

    fragment_headers_length = kHevcLengthFieldSize;
    // If we are going to try to aggregate more fragments into this packet
    // we need to add the STAP-A NALU header and a length field for the first
    // NALU of this packet.
    if (aggregated_fragments == 0)
      fragment_headers_length += kHevcNalHeaderSize + kHevcLengthFieldSize;
    ++aggregated_fragments;

    // Next fragment.
    ++fragment_index;
    if (fragment_index == input_fragments_.size())
      break;
    fragment = input_fragments_[fragment_index];
  }
  RTC_CHECK_GT(aggregated_fragments, 0);
  packets_.back().last_fragment = true;
  return fragment_index;
}

bool RtpPacketizerH265::NextPacket(RtpPacketToSend* rtp_packet) {
  RTC_DCHECK(rtp_packet);

  if (packets_.empty()) {
    return false;
  }

  PacketUnit packet = packets_.front();

  if (packet.first_fragment && packet.last_fragment) {
    // Single NAL unit packet.
    size_t bytes_to_send = packet.source_fragment.size();
    uint8_t* buffer = rtp_packet->AllocatePayload(bytes_to_send);
    memcpy(buffer, packet.source_fragment.data(), bytes_to_send);
    packets_.pop();
    input_fragments_.pop_front();
  } else if (packet.aggregated) {
    bool is_last_packet = num_packets_left_ == 1;
    NextAggregatePacket(rtp_packet, is_last_packet);
  } else {
    NextFragmentPacket(rtp_packet);
  }
  rtp_packet->SetMarker(packets_.empty());
  --num_packets_left_;
  return true;
}

void RtpPacketizerH265::NextAggregatePacket(RtpPacketToSend* rtp_packet,
                                            bool last) {
  size_t payload_capacity = rtp_packet->FreeCapacity();
  RTC_CHECK_GE(payload_capacity, kHevcNalHeaderSize);
  uint8_t* buffer = rtp_packet->AllocatePayload(payload_capacity);
  RTC_CHECK(buffer);
  PacketUnit* packet = &packets_.front();
  RTC_CHECK(packet->first_fragment);
  uint8_t payload_hdr_h = packet->header >> 8;
  uint8_t payload_hdr_l = packet->header & 0xFF;
  uint8_t layer_id_h = payload_hdr_h & kHevcLayerIDHMask;

  payload_hdr_h =
      (payload_hdr_h & kHevcTypeMaskN) | (kHevcAp << 1) | layer_id_h;

  buffer[0] = payload_hdr_h;
  buffer[1] = payload_hdr_l;
  int index = kHevcNalHeaderSize;
  bool is_last_fragment = packet->last_fragment;
  while (packet->aggregated) {
    // Add NAL unit length field.
    rtc::ArrayView<const uint8_t> fragment = packet->source_fragment;
    ByteWriter<uint16_t>::WriteBigEndian(&buffer[index], fragment.size());
    index += kHevcLengthFieldSize;
    // Add NAL unit.
    memcpy(&buffer[index], fragment.data(), fragment.size());
    index += fragment.size();
    packets_.pop();
    input_fragments_.pop_front();
    if (is_last_fragment)
      break;
    packet = &packets_.front();
    is_last_fragment = packet->last_fragment;
  }
  RTC_CHECK(is_last_fragment);
  rtp_packet->SetPayloadSize(index);
}

void RtpPacketizerH265::NextFragmentPacket(RtpPacketToSend* rtp_packet) {
  PacketUnit* packet = &packets_.front();
  // NAL unit fragmented over multiple packets (FU).
  // We do not send original NALU header, so it will be replaced by the
  // PayloadHdr of the first packet.
  uint8_t payload_hdr_h =
      packet->header >> 8;  // 1-bit F, 6-bit type, 1-bit layerID highest-bit
  uint8_t payload_hdr_l = packet->header & 0xFF;
  uint8_t layer_id_h = payload_hdr_h & kHevcLayerIDHMask;
  uint8_t fu_header = 0;
  // S | E |6 bit type.
  fu_header |= (packet->first_fragment ? kHevcSBit : 0);
  fu_header |= (packet->last_fragment ? kHevcEBit : 0);
  uint8_t type = (payload_hdr_h & kHevcTypeMask) >> 1;
  fu_header |= type;
  // Now update payload_hdr_h with FU type.
  payload_hdr_h =
      (payload_hdr_h & kHevcTypeMaskN) | (kHevcFu << 1) | layer_id_h;
  rtc::ArrayView<const uint8_t> fragment = packet->source_fragment;
  uint8_t* buffer = rtp_packet->AllocatePayload(
      kHevcFuHeaderSize + kHevcNalHeaderSize + fragment.size());
  RTC_CHECK(buffer);
  buffer[0] = payload_hdr_h;
  buffer[1] = payload_hdr_l;
  buffer[2] = fu_header;

  if (packet->last_fragment) {
    memcpy(buffer + kHevcFuHeaderSize + kHevcNalHeaderSize, fragment.data(),
           fragment.size());
  } else {
    memcpy(buffer + kHevcFuHeaderSize + kHevcNalHeaderSize, fragment.data(),
           fragment.size());
  }
  packets_.pop();
}

}  // namespace webrtc

modules/rtp_rtcp/source/rtp_format_h265.h >folded

#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_
#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_

#include <memory>
#include <queue>
#include <string>
#include "api/array_view.h"
#include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/source/rtp_format.h"
#include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
#include "modules/rtp_rtcp/source/rtp_format.h"
#include "modules/video_coding/codecs/h265/include/h265_globals.h"
#include "rtc_base/buffer.h"
#include "rtc_base/constructor_magic.h"

namespace webrtc {

class RtpPacketizerH265 : public RtpPacketizer {
 public:
  // Initialize with payload from encoder.
  // The payload_data must be exactly one encoded H.265 frame.
  RtpPacketizerH265(rtc::ArrayView<const uint8_t> payload,
                    PayloadSizeLimits limits,
                    H265PacketizationMode packetization_mode);

   ~RtpPacketizerH265() override;

  size_t NumPackets() const override;

  // Get the next payload with H.265 payload header.
  // buffer is a pointer to where the output will be written.
  // bytes_to_send is an output variable that will contain number of bytes
  // written to buffer. The parameter last_packet is true for the last packet of
  // the frame, false otherwise (i.e., call the function again to get the
  // next packet).
  // Returns true on success or false if there was no payload to packetize.
  bool NextPacket(RtpPacketToSend* rtp_packet) override;

 private:
  struct Packet {
    Packet(size_t offset,
           size_t size,
           bool first_fragment,
           bool last_fragment,
           bool aggregated,
           uint16_t header)
        : offset(offset),
          size(size),
          first_fragment(first_fragment),
          last_fragment(last_fragment),
          aggregated(aggregated),
          header(header) {}

    size_t offset;
    size_t size;
    bool first_fragment;
    bool last_fragment;
    bool aggregated;
    uint16_t header;  // Different from H264
  };
  struct PacketUnit {
    PacketUnit(rtc::ArrayView<const uint8_t> source_fragment,
               bool first_fragment,
               bool last_fragment,
               bool aggregated,
               uint16_t header)
        : source_fragment(source_fragment),
          first_fragment(first_fragment),
          last_fragment(last_fragment),
          aggregated(aggregated),
          header(header) {}

    rtc::ArrayView<const uint8_t> source_fragment;
    bool first_fragment;
    bool last_fragment;
    bool aggregated;
    uint16_t header;
  };
  typedef std::queue<Packet> PacketQueue;
  std::deque<rtc::ArrayView<const uint8_t>> input_fragments_;
  std::queue<PacketUnit> packets_;

  bool GeneratePackets(H265PacketizationMode packetization_mode);
  bool PacketizeFu(size_t fragment_index);
  int PacketizeAp(size_t fragment_index);
  bool PacketizeSingleNalu(size_t fragment_index);

  void NextAggregatePacket(RtpPacketToSend* rtp_packet, bool last);
  void NextFragmentPacket(RtpPacketToSend* rtp_packet);

  const PayloadSizeLimits limits_;
  size_t num_packets_left_;

  RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH265);
};
}  // namespace webrtc
#endif  // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H265_H_

视频渲染支持,vpx通过picture_id,temporal_id,tl0_pic_id标识Nalu间的关系及是否可连续解码，H26x通过seqnum，是否有sps，pps来判断帧间的解码连续性。故，我们把H265的temporalId置为kNoTemporalIdx即可：

modules/rtp_rtcp/source/rtp_sender_video.cc

uint8_t RTPSenderVideo::GetTemporalId(const RTPVideoHeader& header) {
  struct TemporalIdGetter {
    uint8_t operator()(const RTPVideoHeaderVP8& vp8) { return vp8.temporalIdx; }
    uint8_t operator()(const RTPVideoHeaderVP9& vp9) {
      return vp9.temporal_idx;
    }
    uint8_t operator()(const RTPVideoHeaderH264&) { return kNoTemporalIdx; }
 #ifndef DISABLE_H265
     uint8_t operator()(const RTPVideoHeaderH265&) { return kNoTemporalIdx; }
 #endif
...

  return absl::visit(TemporalIdGetter(), header.video_type_header);
}

RTP头增加H265支持构造函数,注意#ifndef与#ifdef的区别：

modules/rtp_rtcp/source/rtp_video_header.h

#include "modules/video_coding/codecs/h264/include/h264_globals.h"
 #ifndef DISABLE_H265
 #include "modules/video_coding/codecs/h265/include/h265_globals.h"
 #endif
#include "modules/video_coding/codecs/vp8/include/vp8_globals.h"

... 

 #ifdef DISABLE_H265
using RTPVideoTypeHeader = absl::variant<absl::monostate,
                                         RTPVideoHeaderVP8,
                                         RTPVideoHeaderVP9,
                                         RTPVideoHeaderH264,
                                         RTPVideoHeaderLegacyGeneric>;
 #else
 using RTPVideoTypeHeader = absl::variant<absl::monostate,
                                         RTPVideoHeaderVP8,
                                         RTPVideoHeaderVP9,
                                         RTPVideoHeaderH264,
                                         RTPVideoHeaderH265,
                                         RTPVideoHeaderLegacyGeneric>;
 #endif

新增H265解包逻辑video_rtp_depacketizer_h265：

modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h >folded

#ifndef MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H265_H_
#define MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H265_H_

#include "absl/types/optional.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
#include "rtc_base/copy_on_write_buffer.h"

namespace webrtc {
class VideoRtpDepacketizerH265 : public VideoRtpDepacketizer {
 public:
  ~VideoRtpDepacketizerH265() override = default;

  absl::optional<ParsedRtpPayload> Parse(
      rtc::CopyOnWriteBuffer rtp_payload) override;

private:
  struct ParsedPayload {
    RTPVideoHeader& video_header() { return video; }
    const RTPVideoHeader& video_header() const { return video; }

    RTPVideoHeader video;

    const uint8_t* payload;
    size_t payload_length;
  };

  bool Parse(ParsedPayload* parsed_payload,
             const uint8_t* payload_data,
             size_t payload_data_length);

  bool ParseFuNalu(ParsedPayload* parsed_payload,
                   const uint8_t* payload_data);
  bool ProcessApOrSingleNalu(ParsedPayload* parsed_payload,
                             const uint8_t* payload_data);

  size_t offset_;
  size_t length_;
  std::unique_ptr<rtc::Buffer> modified_buffer_;

};
}  // namespace webrtc

#endif  // MODULES_RTP_RTCP_SOURCE_VIDEO_RTP_DEPACKETIZER_H265_H_

modules/rtp_rtcp/source/video_rtp_depacketizer_h265.cc >folded

#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h265.h"

#include <cstddef>
#include <cstdint>
#include <utility>
#include <vector>

#include "absl/base/macros.h"
#include "absl/types/optional.h"
#include "absl/types/variant.h"
#include "common_video/h264/h264_common.h"
#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_pps_parser.h"
#include "common_video/h265/h265_sps_parser.h"
#include "common_video/h265/h265_vps_parser.h"
#include "modules/rtp_rtcp/source/byte_io.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
#include "rtc_base/checks.h"
#include "rtc_base/copy_on_write_buffer.h"
#include "rtc_base/logging.h"

namespace webrtc {
namespace {

/*
   0                   1                   2                   3
   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  |    PayloadHdr (Type=49)       |   FU header   | DONL (cond)   |
  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-|
*/
// Unlike H.264, HEVC NAL header is 2-bytes.
static const size_t kHevcNalHeaderSize = 2;
// H.265's FU is constructed of 2-byte payload header, and 1-byte FU header
static const size_t kHevcFuHeaderSize = 1;
static const size_t kHevcLengthFieldSize = 2;
static const size_t kHevcApHeaderSize =
    kHevcNalHeaderSize + kHevcLengthFieldSize;

enum HevcNalHdrMasks {
  kHevcFBit = 0x80,
  kHevcTypeMask = 0x7E,
  kHevcLayerIDHMask = 0x1,
  kHevcLayerIDLMask = 0xF8,
  kHevcTIDMask = 0x7,
  kHevcTypeMaskN = 0x81,
  kHevcTypeMaskInFuHeader = 0x3F
};

// Bit masks for FU headers.
enum HevcFuDefs { kHevcSBit = 0x80, kHevcEBit = 0x40, kHevcFuTypeBit = 0x3F };

// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer.
bool ParseApStartOffsets(const uint8_t* nalu_ptr,
                         size_t length_remaining,
                         std::vector<size_t>* offsets) {
  size_t offset = 0;
  while (length_remaining > 0) {
    // Buffer doesn't contain room for additional nalu length.
    if (length_remaining < sizeof(uint16_t))
      return false;
    uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
    nalu_ptr += sizeof(uint16_t);
    length_remaining -= sizeof(uint16_t);
    if (nalu_size > length_remaining)
      return false;
    nalu_ptr += nalu_size;
    length_remaining -= nalu_size;

    offsets->push_back(offset + kHevcApHeaderSize);
    offset += kHevcLengthFieldSize + nalu_size;
  }
  return true;
}

}  // namespace

bool VideoRtpDepacketizerH265::Parse(ParsedPayload* parsed_payload,
                                const uint8_t* payload_data,
                                size_t payload_data_length) {
  RTC_CHECK(parsed_payload != nullptr);
  if (payload_data_length == 0) {
    RTC_LOG(LS_ERROR) << "Empty payload.";
    return false;
  }

  offset_ = 0;
  length_ = payload_data_length;
  modified_buffer_.reset();

  uint8_t nal_type = (payload_data[0] & kHevcTypeMask) >> 1;
  parsed_payload->video_header()
      .video_type_header.emplace<RTPVideoHeaderH265>();

  if (nal_type == H265::NaluType::kFU) {
    // Fragmented NAL units (FU-A).
    if (!ParseFuNalu(parsed_payload, payload_data))
      return false;
  } else {
    // We handle STAP-A and single NALU's the same way here. The jitter buffer
    // will depacketize the STAP-A into NAL units later.
    // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec.
    if (!ProcessApOrSingleNalu(parsed_payload, payload_data))
      return false;
  }

  const uint8_t* payload =
      modified_buffer_ ? modified_buffer_->data() : payload_data;

  parsed_payload->payload = payload + offset_;
  parsed_payload->payload_length = length_;
  return true;
}

bool VideoRtpDepacketizerH265::ProcessApOrSingleNalu(
    ParsedPayload* parsed_payload,
    const uint8_t* payload_data) {
  parsed_payload->video_header().width = 0;
  parsed_payload->video_header().height = 0;
  parsed_payload->video_header().codec = kVideoCodecH265;
  parsed_payload->video_header().is_first_packet_in_frame = true;
  auto& h265_header = absl::get<RTPVideoHeaderH265>(
      parsed_payload->video_header().video_type_header);

  const uint8_t* nalu_start = payload_data + kHevcNalHeaderSize;
  const size_t nalu_length = length_ - kHevcNalHeaderSize;
  uint8_t nal_type = (payload_data[0] & kHevcTypeMask) >> 1;
  std::vector<size_t> nalu_start_offsets;
  if (nal_type == H265::NaluType::kAP) {
    // Skip the StapA header (StapA NAL type + length).
    if (length_ <= kHevcApHeaderSize) {
      RTC_LOG(LS_ERROR) << "AP header truncated.";
      return false;
    }

    if (!ParseApStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
      RTC_LOG(LS_ERROR) << "AP packet with incorrect NALU packet lengths.";
      return false;
    }

    h265_header.packetization_type = kH265AP;
    // nal_type = (payload_data[kHevcApHeaderSize] & kHevcTypeMask) >> 1;
  } else {
    h265_header.packetization_type = kH265SingleNalu;
    nalu_start_offsets.push_back(0);
  }
  h265_header.nalu_type = nal_type;
  parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta;

  nalu_start_offsets.push_back(length_ + kHevcLengthFieldSize);  // End offset.
  for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
    size_t start_offset = nalu_start_offsets[i];
    // End offset is actually start offset for next unit, excluding length field
    // so remove that from this units length.
    size_t end_offset = nalu_start_offsets[i + 1] - kHevcLengthFieldSize;
    if (end_offset - start_offset < kHevcNalHeaderSize) {  // Same as H.264.
      RTC_LOG(LS_ERROR) << "AP packet too short";
      return false;
    }

    H265NaluInfo nalu;
    nalu.type = (payload_data[start_offset] & kHevcTypeMask) >> 1;
    nalu.vps_id = -1;
    nalu.sps_id = -1;
    nalu.pps_id = -1;
    start_offset += kHevcNalHeaderSize;
    switch (nalu.type) {
      case H265::NaluType::kVps: {
        absl::optional<H265VpsParser::VpsState> vps = H265VpsParser::ParseVps(
            &payload_data[start_offset], end_offset - start_offset);
        if (vps) {
          nalu.vps_id = vps->id;
        } else {
          RTC_LOG(LS_WARNING) << "Failed to parse VPS id from VPS slice.";
        }
        break;
      }
      case H265::NaluType::kSps: {
        // Check if VUI is present in SPS and if it needs to be modified to
        // avoid excessive decoder latency.

        // Copy any previous data first (likely just the first header).
        std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
        if (start_offset)
          output_buffer->AppendData(payload_data, start_offset);

        absl::optional<H265SpsParser::SpsState> sps = H265SpsParser::ParseSps(
            &payload_data[start_offset], end_offset - start_offset);

        if (sps) {
          parsed_payload->video_header().width = sps->width;
          parsed_payload->video_header().height = sps->height;
          nalu.sps_id = sps->id;
          nalu.vps_id = sps->vps_id;
        } else {
          RTC_LOG(LS_WARNING)
              << "Failed to parse SPS and VPS id from SPS slice.";
        }
        parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey;
        break;
      }
      case H265::NaluType::kPps: {
        uint32_t pps_id;
        uint32_t sps_id;
        if (H265PpsParser::ParsePpsIds(&payload_data[start_offset],
                                       end_offset - start_offset, &pps_id,
                                       &sps_id)) {
          nalu.pps_id = pps_id;
          nalu.sps_id = sps_id;
        } else {
          RTC_LOG(LS_WARNING)
              << "Failed to parse PPS id and SPS id from PPS slice.";
        }
        break;
      }
      case H265::NaluType::kIdrWRadl:
      case H265::NaluType::kIdrNLp:
      case H265::NaluType::kCra:
        parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey;
        ABSL_FALLTHROUGH_INTENDED;
      case H265::NaluType::kTrailN:
      case H265::NaluType::kTrailR: {
        absl::optional<uint32_t> pps_id =
            H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp(
                &payload_data[start_offset], end_offset - start_offset,
                nalu.type);
        if (pps_id) {
          nalu.pps_id = *pps_id;
        } else {
          RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
                              << static_cast<int>(nalu.type);
        }
        break;
      }
      // Slices below don't contain SPS or PPS ids.
      case H265::NaluType::kAud:
      case H265::NaluType::kTsaN:
      case H265::NaluType::kTsaR:
      case H265::NaluType::kStsaN:
      case H265::NaluType::kStsaR:
      case H265::NaluType::kRadlN:
      case H265::NaluType::kRadlR:
      case H265::NaluType::kBlaWLp:
      case H265::NaluType::kBlaWRadl:
      case H265::NaluType::kPrefixSei:
      case H265::NaluType::kSuffixSei:
        break;
      case H265::NaluType::kAP:
      case H265::NaluType::kFU:
        RTC_LOG(LS_WARNING) << "Unexpected AP or FU received.";
        return false;
    }

    if (h265_header.nalus_length == kMaxNalusPerPacket) {
      RTC_LOG(LS_WARNING)
          << "Received packet containing more than " << kMaxNalusPerPacket
          << " NAL units. Will not keep track sps and pps ids for all of them.";
    } else {
      h265_header.nalus[h265_header.nalus_length++] = nalu;
    }
  }
  return true;
}

bool VideoRtpDepacketizerH265::ParseFuNalu(
    ParsedPayload* parsed_payload,
    const uint8_t* payload_data) {
  if (length_ < kHevcFuHeaderSize + kHevcNalHeaderSize) {
    RTC_LOG(LS_ERROR) << "FU NAL units truncated.";
    return false;
  }
  uint8_t f = payload_data[0] & kHevcFBit;
  uint8_t layer_id_h = payload_data[0] & kHevcLayerIDHMask;
  uint8_t layer_id_l_unshifted = payload_data[1] & kHevcLayerIDLMask;
  uint8_t tid = payload_data[1] & kHevcTIDMask;

  uint8_t original_nal_type = payload_data[2] & kHevcTypeMaskInFuHeader;
  bool first_fragment = payload_data[2] & kHevcSBit;
  H265NaluInfo nalu;
  nalu.type = original_nal_type;
  nalu.vps_id = -1;
  nalu.sps_id = -1;
  nalu.pps_id = -1;
  if (first_fragment) {
    offset_ = 1;
    length_ -= 1;
    absl::optional<uint32_t> pps_id =
        H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp(
            payload_data + kHevcNalHeaderSize + kHevcFuHeaderSize,
            length_ - kHevcFuHeaderSize, nalu.type);
    if (pps_id) {
      nalu.pps_id = *pps_id;
    } else {
      RTC_LOG(LS_WARNING)
          << "Failed to parse PPS from first fragment of FU NAL "
             "unit with original type: "
          << static_cast<int>(nalu.type);
    }
    uint8_t* payload = const_cast<uint8_t*>(payload_data + offset_);
    payload[0] = f | original_nal_type << 1 | layer_id_h;
    payload[1] = layer_id_l_unshifted | tid;
  } else {
    offset_ = kHevcNalHeaderSize + kHevcFuHeaderSize;
    length_ -= (kHevcNalHeaderSize + kHevcFuHeaderSize);
  }

  if (original_nal_type == H265::NaluType::kIdrWRadl
      || original_nal_type == H265::NaluType::kIdrNLp
      || original_nal_type == H265::NaluType::kCra) {
    parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameKey;
  } else {
    parsed_payload->video_header().frame_type = VideoFrameType::kVideoFrameDelta;
  }
  parsed_payload->video_header().width = 0;
  parsed_payload->video_header().height = 0;
  parsed_payload->video_header().codec = kVideoCodecH265;
  parsed_payload->video_header().is_first_packet_in_frame = first_fragment;
  auto& h265_header = absl::get<RTPVideoHeaderH265>(
      parsed_payload->video_header().video_type_header);
  h265_header.packetization_type = kH265FU;
  h265_header.nalu_type = original_nal_type;
  if (first_fragment) {
    h265_header.nalus[h265_header.nalus_length] = nalu;
    h265_header.nalus_length = 1;
  }
  return true;
}

absl::optional<VideoRtpDepacketizer::ParsedRtpPayload>
VideoRtpDepacketizerH265::Parse(rtc::CopyOnWriteBuffer rtp_payload) {
  // borrowed from https://webrtc.googlesource.com/src/+/
  // 07b17df771af20a6dd98b795592acc62a623c56f
  // /modules/rtp_rtcp/source/create_video_rtp_depacketizer.cc
  ParsedPayload parsed_payload;
  if (!Parse(&parsed_payload, rtp_payload.cdata(), rtp_payload.size())) {
    return absl::nullopt;
  }
  absl::optional<ParsedRtpPayload> result(absl::in_place);
  result->video_header = parsed_payload.video;
  result->video_payload.SetData(parsed_payload.payload,
                                parsed_payload.payload_length);
  return result;
}

}  // namespace webrtc

将新增4个文件添加到ninja中参与构建，并通过rtc_use_h265打开H265开关：

modules/rtp_rtcp/BUILD.gn

if (rtc_enable_bwe_test_logging) {
   defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=1" ]
 } else {
   defines = [ "BWE_TEST_LOGGING_COMPILE_TIME_ENABLE=0" ]
 }

if (rtc_use_h265) {
   sources += [
     "source/rtp_format_h265.cc",
     "source/rtp_format_h265.h",
     "source/video_rtp_depacketizer_h265.cc",
     "source/video_rtp_depacketizer_h265.h",
   ]
 }

 if (!rtc_use_h265) {
   defines += ["DISABLE_H265"]
 }

解包支持

增加H265解包支持

modules/video_coding/include/video_codec_interface.h

#include "modules/video_coding/codecs/h264/include/h264_globals.h"
 #ifndef DISABLE_H265
 #include "modules/video_coding/codecs/h265/include/h265_globals.h"
 #endif

...

 #ifndef DISABLE_H265
 struct CodecSpecificInfoH265 {
   H265PacketizationMode packetization_mode;
   bool idr_frame;
 };
 #endif

union CodecSpecificInfoUnion {
  CodecSpecificInfoVP8 VP8;
  CodecSpecificInfoVP9 VP9;
  CodecSpecificInfoH264 H264;
 #ifndef DISABLE_H265
   CodecSpecificInfoH265 H265;
 #endif
};
static_assert(std::is_pod<CodecSpecificInfoUnion>::value, "");

设置codec类型

modules/video_coding/encoded_frame.cc

void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) {
...
 #ifndef DISABLE_H265
      case kVideoCodecH265: {
        _codecSpecificInfo.codecType = kVideoCodecH265;
        break;
      }
 #endif
      default: {
        _codecSpecificInfo.codecType = kVideoCodecGeneric;
        break;
      }
}

新增tracker 解析vps/sps/pps信息，参考h264_sps_pps_tracker

modules/video_coding/h265_vps_sps_pps_tracker.cc >folded

#include "modules/video_coding/h265_vps_sps_pps_tracker.h"

#include <string>
#include <utility>

#include "common_video/h264/h264_common.h"
#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_pps_parser.h"
#include "common_video/h265/h265_sps_parser.h"
#include "common_video/h265/h265_vps_parser.h"
#include "modules/video_coding/codecs/h264/include/h264_globals.h"
#include "modules/video_coding/codecs/h265/include/h265_globals.h"
#include "modules/video_coding/frame_object.h"
#include "modules/video_coding/packet_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"

namespace webrtc {
namespace video_coding {

namespace {
const uint8_t start_code_h265[] = {0, 0, 0, 1};
}  // namespace

H265VpsSpsPpsTracker::FixedBitstream H265VpsSpsPpsTracker::CopyAndFixBitstream(
    rtc::ArrayView<const uint8_t> bitstream,
    RTPVideoHeader* video_header) {
  RTC_DCHECK(video_header);
  RTC_DCHECK(video_header->codec == kVideoCodecH265);

  auto& h265_header =
      absl::get<RTPVideoHeaderH265>(video_header->video_type_header);

  bool append_vps_sps_pps = false;
  auto vps = vps_data_.end();
  auto sps = sps_data_.end();
  auto pps = pps_data_.end();

  for (size_t i = 0; i < h265_header.nalus_length; ++i) {
    const H265NaluInfo& nalu = h265_header.nalus[i];
    switch (nalu.type) {
      case H265::NaluType::kVps: {
        vps_data_[nalu.vps_id].size = 0;
        break;
      }
      case H265::NaluType::kSps: {
        sps_data_[nalu.sps_id].vps_id = nalu.vps_id;
        sps_data_[nalu.sps_id].width = video_header->width;
        sps_data_[nalu.sps_id].height = video_header->height;
        break;
      }
      case H265::NaluType::kPps: {
        pps_data_[nalu.pps_id].sps_id = nalu.sps_id;
        break;
      }
      case H265::NaluType::kIdrWRadl:
      case H265::NaluType::kIdrNLp:
      case H265::NaluType::kCra: {
        // If this is the first packet of an IDR, make sure we have the required
        // SPS/PPS and also calculate how much extra space we need in the buffer
        // to prepend the SPS/PPS to the bitstream with start codes.
        if (video_header->is_first_packet_in_frame) {
          if (nalu.pps_id == -1) {
            RTC_LOG(LS_WARNING) << "No PPS id in IDR nalu.";
            return {kRequestKeyframe};
          }

          pps = pps_data_.find(nalu.pps_id);
          if (pps == pps_data_.end()) {
            RTC_LOG(LS_WARNING)
                << "No PPS with id << " << nalu.pps_id << " received";
            return {kRequestKeyframe};
          }

          sps = sps_data_.find(pps->second.sps_id);
          if (sps == sps_data_.end()) {
            RTC_LOG(LS_WARNING)
                << "No SPS with id << " << pps->second.sps_id << " received";
            return {kRequestKeyframe};
          }

          vps = vps_data_.find(sps->second.vps_id);
          if (vps == vps_data_.end()) {
            RTC_LOG(LS_WARNING)
                << "No VPS with id << " << sps->second.vps_id << " received";
            return {kRequestKeyframe};
          }

          // Since the first packet of every keyframe should have its width and
          // height set we set it here in the case of it being supplied out of
          // band.
          video_header->width = sps->second.width;
          video_header->height = sps->second.height;

          // If the VPS/SPS/PPS was supplied out of band then we will have saved
          // the actual bitstream in |data|.
          // This branch is not verified.
          if (vps->second.data && sps->second.data && pps->second.data) {
            RTC_DCHECK_GT(vps->second.size, 0);
            RTC_DCHECK_GT(sps->second.size, 0);
            RTC_DCHECK_GT(pps->second.size, 0);
            append_vps_sps_pps = true;
          }
        }
        break;
      }
      default:
        break;
    }
  }

  RTC_CHECK(!append_vps_sps_pps ||
            (sps != sps_data_.end() && pps != pps_data_.end()));

  // Calculate how much space we need for the rest of the bitstream.
  size_t required_size = 0;

  if (append_vps_sps_pps) {
    required_size += vps->second.size + sizeof(start_code_h265);
    required_size += sps->second.size + sizeof(start_code_h265);
    required_size += pps->second.size + sizeof(start_code_h265);
  }

  if (h265_header.packetization_type == kH265AP) {
    const uint8_t* nalu_ptr = bitstream.data() + 1;
    while (nalu_ptr < bitstream.data() + bitstream.size()) {
      RTC_DCHECK(video_header->is_first_packet_in_frame);
      required_size += sizeof(start_code_h265);

      // The first two bytes describe the length of a segment.
      uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1];
      nalu_ptr += 2;

      required_size += segment_length;
      nalu_ptr += segment_length;
    }
  } else {
    if (video_header->is_first_packet_in_frame)
      required_size += sizeof(start_code_h265);
    required_size += bitstream.size();
  }

  // Then we copy to the new buffer.
  H265VpsSpsPpsTracker::FixedBitstream fixed;
  fixed.bitstream.EnsureCapacity(required_size);

  if (append_vps_sps_pps) {
    // Insert VPS.
    fixed.bitstream.AppendData(start_code_h265);
    fixed.bitstream.AppendData(vps->second.data.get(), vps->second.size);

    // Insert SPS.
    fixed.bitstream.AppendData(start_code_h265);
    fixed.bitstream.AppendData(sps->second.data.get(), sps->second.size);

    // Insert PPS.
    fixed.bitstream.AppendData(start_code_h265);
    fixed.bitstream.AppendData(pps->second.data.get(), pps->second.size);

    // Update codec header to reflect the newly added SPS and PPS.
    H265NaluInfo vps_info;
    vps_info.type = H265::NaluType::kVps;
    vps_info.vps_id = vps->first;
    vps_info.sps_id = -1;
    vps_info.pps_id = -1;
    H265NaluInfo sps_info;
    sps_info.type = H265::NaluType::kSps;
    sps_info.vps_id = vps->first;
    sps_info.sps_id = sps->first;
    sps_info.pps_id = -1;
    H265NaluInfo pps_info;
    pps_info.type = H265::NaluType::kPps;
    pps_info.vps_id = vps->first;
    pps_info.sps_id = sps->first;
    pps_info.pps_id = pps->first;
    if (h265_header.nalus_length + 3 <= kMaxNalusPerPacket) {
      h265_header.nalus[h265_header.nalus_length++] = vps_info;
      h265_header.nalus[h265_header.nalus_length++] = sps_info;
      h265_header.nalus[h265_header.nalus_length++] = pps_info;
    } else {
      RTC_LOG(LS_WARNING) << "Not enough space in H.265 codec header to insert "
                             "SPS/PPS provided out-of-band.";
    }
  }

  // Copy the rest of the bitstream and insert start codes.
  if (h265_header.packetization_type == kH265AP) {
    const uint8_t* nalu_ptr = bitstream.data() + 1;
    while (nalu_ptr < bitstream.data() + bitstream.size()) {
      fixed.bitstream.AppendData(start_code_h265);

      // The first two bytes describe the length of a segment.
      uint16_t segment_length = nalu_ptr[0] << 8 | nalu_ptr[1];
      nalu_ptr += 2;

      size_t copy_end = nalu_ptr - bitstream.data() + segment_length;
      if (copy_end > bitstream.size()) {
        return {kDrop};
      }

      fixed.bitstream.AppendData(nalu_ptr, segment_length);
      nalu_ptr += segment_length;
    }
  } else {
    if (video_header->is_first_packet_in_frame) {
      fixed.bitstream.AppendData(start_code_h265);
    }
    fixed.bitstream.AppendData(bitstream.data(), bitstream.size());
  }

  fixed.action = kInsert;
  return fixed;
}

void H265VpsSpsPpsTracker::InsertVpsSpsPpsNalus(
    const std::vector<uint8_t>& vps,
    const std::vector<uint8_t>& sps,
    const std::vector<uint8_t>& pps) {
  constexpr size_t kNaluHeaderOffset = 1;
  if (vps.size() < kNaluHeaderOffset) {
    RTC_LOG(LS_WARNING) << "VPS size  " << vps.size() << " is smaller than "
                        << kNaluHeaderOffset;
    return;
  }
  if ((vps[0] & 0x7e) >> 1 != H265::NaluType::kSps) {
    RTC_LOG(LS_WARNING) << "SPS Nalu header missing";
    return;
  }
  if (sps.size() < kNaluHeaderOffset) {
    RTC_LOG(LS_WARNING) << "SPS size  " << sps.size() << " is smaller than "
                        << kNaluHeaderOffset;
    return;
  }
  if ((sps[0] & 0x7e) >> 1 != H265::NaluType::kSps) {
    RTC_LOG(LS_WARNING) << "SPS Nalu header missing";
    return;
  }
  if (pps.size() < kNaluHeaderOffset) {
    RTC_LOG(LS_WARNING) << "PPS size  " << pps.size() << " is smaller than "
                        << kNaluHeaderOffset;
    return;
  }
  if ((pps[0] & 0x7e) >> 1 != H265::NaluType::kPps) {
    RTC_LOG(LS_WARNING) << "SPS Nalu header missing";
    return;
  }
  absl::optional<H265VpsParser::VpsState> parsed_vps = H265VpsParser::ParseVps(
      vps.data() + kNaluHeaderOffset, vps.size() - kNaluHeaderOffset);
  absl::optional<H265SpsParser::SpsState> parsed_sps = H265SpsParser::ParseSps(
      sps.data() + kNaluHeaderOffset, sps.size() - kNaluHeaderOffset);
  absl::optional<H265PpsParser::PpsState> parsed_pps = H265PpsParser::ParsePps(
      pps.data() + kNaluHeaderOffset, pps.size() - kNaluHeaderOffset);

  if (!parsed_vps) {
    RTC_LOG(LS_WARNING) << "Failed to parse VPS.";
  }

  if (!parsed_sps) {
    RTC_LOG(LS_WARNING) << "Failed to parse SPS.";
  }

  if (!parsed_pps) {
    RTC_LOG(LS_WARNING) << "Failed to parse PPS.";
  }

  if (!parsed_vps || !parsed_pps || !parsed_sps) {
    return;
  }

  VpsInfo vps_info;
  vps_info.size = vps.size();
  uint8_t* vps_data = new uint8_t[vps_info.size];
  memcpy(vps_data, vps.data(), vps_info.size);
  vps_info.data.reset(vps_data);
  vps_data_[parsed_vps->id] = std::move(vps_info);

  SpsInfo sps_info;
  sps_info.size = sps.size();
  sps_info.width = parsed_sps->width;
  sps_info.height = parsed_sps->height;
  sps_info.vps_id = parsed_sps->vps_id;
  uint8_t* sps_data = new uint8_t[sps_info.size];
  memcpy(sps_data, sps.data(), sps_info.size);
  sps_info.data.reset(sps_data);
  sps_data_[parsed_sps->id] = std::move(sps_info);

  PpsInfo pps_info;
  pps_info.size = pps.size();
  pps_info.sps_id = parsed_pps->sps_id;
  uint8_t* pps_data = new uint8_t[pps_info.size];
  memcpy(pps_data, pps.data(), pps_info.size);
  pps_info.data.reset(pps_data);
  pps_data_[parsed_pps->id] = std::move(pps_info);

  RTC_LOG(LS_INFO) << "Inserted SPS id " << parsed_sps->id << " and PPS id "
                   << parsed_pps->id << " (referencing SPS "
                   << parsed_pps->sps_id << ")";
}

}  // namespace video_coding
}  // namespace webrtc

modules/video_coding/h265_vps_sps_pps_tracker.h >folded

#ifndef MODULES_VIDEO_CODING_H265_VPS_SPS_PPS_TRACKER_H_
#define MODULES_VIDEO_CODING_H265_VPS_SPS_PPS_TRACKER_H_

#include <cstdint>
#include <map>
#include <memory>
#include <vector>

#include "api/array_view.h"
#include "modules/rtp_rtcp/source/rtp_video_header.h"
#include "rtc_base/copy_on_write_buffer.h"

namespace webrtc {
namespace video_coding {

class H265VpsSpsPpsTracker {
 public:
  enum PacketAction { kInsert, kDrop, kRequestKeyframe };
  struct FixedBitstream {
    PacketAction action;
    rtc::CopyOnWriteBuffer bitstream;
  };

  // Returns fixed bitstream and modifies |video_header|.
  FixedBitstream CopyAndFixBitstream(rtc::ArrayView<const uint8_t> bitstream,
                                     RTPVideoHeader* video_header);

  void InsertVpsSpsPpsNalus(const std::vector<uint8_t>& vps,
                            const std::vector<uint8_t>& sps,
                            const std::vector<uint8_t>& pps);

 private:
  struct VpsInfo {
    size_t size = 0;
    std::unique_ptr<uint8_t[]> data;
  };

  struct PpsInfo {
    int sps_id = -1;
    size_t size = 0;
    std::unique_ptr<uint8_t[]> data;
  };

  struct SpsInfo {
    int vps_id = -1;
    size_t size = 0;
    int width = -1;
    int height = -1;
    std::unique_ptr<uint8_t[]> data;
  };

  std::map<uint32_t, VpsInfo> vps_data_;
  std::map<uint32_t, PpsInfo> pps_data_;
  std::map<uint32_t, SpsInfo> sps_data_;
};

}  // namespace video_coding
}  // namespace webrtc

#endif  // MODULES_VIDEO_CODING_H264_SPS_PPS_TRACKER_H_

在网络抖动缓存信息中新增枚举：

modules/video_coding/jitter_buffer_common.h

enum { kH264StartCodeLengthBytes = 4 };
 #ifndef DISABLE_H265
 enum { kH265StartCodeLengthBytes = 4 };
 #endif

PacketBufferRTP包缓冲区增加H265支持：

modules/video_coding/packet_buffer.cc

#include "common_video/h264/h264_common.h"
#ifndef DISABLE_H265
#include "common_video/h265/h265_common.h"
#endif
#include "modules/rtp_rtcp/source/rtp_header_extensions.h"
#include "modules/rtp_rtcp/source/rtp_packet_received.h"
#include "modules/rtp_rtcp/source/rtp_video_header.h"
#include "modules/video_coding/codecs/h264/include/h264_globals.h"
#ifndef DISABLE_H265
#include "modules/video_coding/codecs/h265/include/h265_globals.h"
#endif
#include "rtc_base/checks.h"

...

std::vector<std::unique_ptr<PacketBuffer::Packet>> PacketBuffer::FindFrames(
    uint16_t seq_num) {
  std::vector<std::unique_ptr<PacketBuffer::Packet>> found_frames;
  for (size_t i = 0; i < buffer_.size() && PotentialNewFrame(seq_num); ++i) {
...

      bool is_h264_keyframe = false;
      bool is_h265 = false;
#ifndef DISABLE_H265
     is_h265 = buffer_[start_index]->codec() == kVideoCodecH265;
     bool has_h265_sps = false;
     bool has_h265_pps = false;
     bool has_h265_idr = false;
     bool is_h265_keyframe = false;
#endif
      int idr_width = -1;
      int idr_height = -1;
      while (true) {
        ++tested_packets;

        if (!is_h264 && !is_h265 && buffer_[start_index]->is_first_packet_in_frame())
          break;

        if (is_h264) {
          ...
        }

#ifndef DISABLE_H265
       if (is_h265 && !is_h265_keyframe) {
         const auto* h265_header = absl::get_if<RTPVideoHeaderH265>(
             &buffer_[start_index]->video_header.video_type_header);
         if (!h265_header || h265_header->nalus_length >= kMaxNalusPerPacket)
           return found_frames;
         for (size_t j = 0; j < h265_header->nalus_length; ++j) {
           if (h265_header->nalus[j].type == H265::NaluType::kSps) {
             has_h265_sps = true;
           } else if (h265_header->nalus[j].type == H265::NaluType::kPps) {
             has_h265_pps = true;
           } else if (h265_header->nalus[j].type == H265::NaluType::kIdrWRadl
                      || h265_header->nalus[j].type == H265::NaluType::kIdrNLp
                      || h265_header->nalus[j].type == H265::NaluType::kCra) {
             has_h265_idr = true;
           }
         }
         if ((has_h265_sps && has_h265_pps) || has_h265_idr) {
           is_h265_keyframe = true;
           // Store the resolution of key frame which is the packet with
           // smallest index and valid resolution; typically its IDR or SPS
           // packet; there may be packet preceeding this packet, IDR's
           // resolution will be applied to them.
           if (buffer_[start_index]->width() > 0 &&
               buffer_[start_index]->height() > 0) {
             idr_width = buffer_[start_index]->width();
             idr_height = buffer_[start_index]->height();
           }
         }
       }
#endif

...

      if (is_h264) {
        ...
      }

#ifndef DISABLE_H265
     if (is_h265) {
       // Warn if this is an unsafe frame.
       if (has_h265_idr && (!has_h265_sps || !has_h265_pps)) {
         RTC_LOG(LS_WARNING)
             << "Received H.265-IDR frame "
             << "(SPS: " << has_h265_sps << ", PPS: " << has_h265_pps
             << "). Treating as delta frame since "
             <<  "WebRTC-SpsPpsIdrIsH265Keyframe is always enabled.";
       }

       // Now that we have decided whether to treat this frame as a key frame
       // or delta frame in the frame buffer, we update the field that
       // determines if the RtpFrameObject is a key frame or delta frame.
       const size_t first_packet_index = start_seq_num % buffer_.size();
       if (is_h265_keyframe) {
         buffer_[first_packet_index]->video_header.frame_type =
             VideoFrameType::kVideoFrameKey;
         if (idr_width > 0 && idr_height > 0) {
           // IDR frame was finalized and we have the correct resolution for
           // IDR; update first packet to have same resolution as IDR.
           buffer_[first_packet_index]->video_header.width = idr_width;
           buffer_[first_packet_index]->video_header.height =
               idr_height;
         }
       } else {
         buffer_[first_packet_index]->video_header.frame_type =
             VideoFrameType::kVideoFrameDelta;
       }

       // If this is not a key frame, make sure there are no gaps in the
       // packet sequence numbers up until this point.
       if (!is_h265_keyframe && missing_packets_.upper_bound(start_seq_num) !=
                                    missing_packets_.begin()) {
         return found_frames;
       }
     }
#endif

...

  }
  return found_frames;
}

packet insetStartCode增加H265支持：

modules/video_coding/packet.cc

      completeNALU(kNaluIncomplete),
#ifndef DISABLE_H265
     insertStartCode((videoHeader.codec == kVideoCodecH264 || videoHeader.codec == kVideoCodecH265) &&
                     videoHeader.is_first_packet_in_frame),
#else
     insertStartCode(videoHeader.codec == kVideoCodecH264 &&
                     videoHeader.is_first_packet_in_frame),
#endif
      video_header(videoHeader),

支持H265包拼接,新增函数GetH265NaluInfos：

modules/video_coding/session_info.cc

+ #ifndef DISABLE_H265
+ std::vector<H265NaluInfo> VCMSessionInfo::GetH265NaluInfos() const {
+  if (packets_.empty() || packets_.front().video_header.codec != kVideoCodecH265)
+    return std::vector<H265NaluInfo>();
+  std::vector<H265NaluInfo> nalu_infos;
+  for (const VCMPacket& packet : packets_) {
+    const auto& h265 =
+        absl::get<RTPVideoHeaderH265>(packet.video_header.video_type_header);
+    for (size_t i = 0; i < h265.nalus_length; ++i) {
+      nalu_infos.push_back(h265.nalus[i]);
+    }
+  }
+  return nalu_infos;
+ }
+ #endif

...

size_t VCMSessionInfo::InsertBuffer(uint8_t* frame_buffer,
                                    PacketIterator packet_it) {
  ...
  const size_t kH264NALHeaderLengthInBytes = 1;
+ #ifndef DISABLE_H265
+  const size_t kH265NALHeaderLengthInBytes = 2;
+  const auto* h265 =
+      absl::get_if<RTPVideoHeaderH265>(&packet.video_header.video_type_header);
+ #endif
  ...
    return packet.sizeBytes;
+ #ifndef DISABLE_H265
+  } else if (h265 && h265->packetization_type == kH265AP) {
+    // Similar to H264, for H265 aggregation packets, we rely on jitter buffer
+    // to remove the two length bytes between each NAL unit, and potentially add
+    // start codes.
+    size_t required_length = 0;
+    const uint8_t* nalu_ptr =
+        packet_buffer + kH265NALHeaderLengthInBytes;  // skip payloadhdr
+    while (nalu_ptr < packet_buffer + packet.sizeBytes) {
+      size_t length = BufferToUWord16(nalu_ptr);
+      required_length +=
+          length + (packet.insertStartCode ? kH265StartCodeLengthBytes : 0);
+      nalu_ptr += kLengthFieldLength + length;
+    }
+    ShiftSubsequentPackets(packet_it, required_length);
+    nalu_ptr = packet_buffer + kH265NALHeaderLengthInBytes;
+    uint8_t* frame_buffer_ptr = frame_buffer + offset;
+    while (nalu_ptr < packet_buffer + packet.sizeBytes) {
+      size_t length = BufferToUWord16(nalu_ptr);
+      nalu_ptr += kLengthFieldLength;
+      // since H265 shares the same start code as H264, use the same Insert
+      // function to handle start code.
+      frame_buffer_ptr += Insert(nalu_ptr, length, packet.insertStartCode,
+                                 const_cast<uint8_t*>(frame_buffer_ptr));
+      nalu_ptr += length;
+    }
+    packet.sizeBytes = required_length;
+    return packet.sizeBytes;
+ #endif
  }
  ShiftSubsequentPackets(
      packet_it, packet.sizeBytes +
                     (packet.insertStartCode ? kH264StartCodeLengthBytes : 0));

  packet.sizeBytes =
      Insert(packet_buffer, packet.sizeBytes, packet.insertStartCode,
             const_cast<uint8_t*>(packet.dataPtr));
  return packet.sizeBytes;
}

...

int VCMSessionInfo::InsertPacket(const VCMPacket& packet,
                                 uint8_t* frame_buffer,
                                 const FrameData& frame_data) {

...

+ #ifndef DISABLE_H265
+  } else if (packet.codec() == kVideoCodecH265) {
+    frame_type_ = packet.video_header.frame_type;
+    if (packet.is_first_packet_in_frame() &&
+        (first_packet_seq_num_ == -1 ||
+         IsNewerSequenceNumber(first_packet_seq_num_, packet.seqNum))) {
+      first_packet_seq_num_ = packet.seqNum;
+    }
+    if (packet.markerBit &&
+        (last_packet_seq_num_ == -1 ||
+         IsNewerSequenceNumber(packet.seqNum, last_packet_seq_num_))) {
+      last_packet_seq_num_ = packet.seqNum;
+    }
+ #endif

...

  return static_cast<int>(returnLength);
}

modules/video_coding/session_info.h

  std::vector<NaluInfo> GetNaluInfos() const;
#ifndef DISABLE_H265
 std::vector<H265NaluInfo> GetH265NaluInfos() const;
#endif

将新增2个文件添加到ninja中参与构建：

if (rtc_use_h265) {
    sources += [
      "h265_vps_sps_pps_tracker.cc",
      "h265_vps_sps_pps_tracker.h",
    ]
  }

RTP相关支持

配置最小码率枚举，避免编译不通过：

absl::optional<DataRate> GetExperimentalMinVideoBitrate(VideoCodecType type) {
    ...

#ifndef DISABLE_H265
     case kVideoCodecH265:
#endif
      case kVideoCodecGeneric:
    ...
}

使rtp载荷增加H265支持：

video/rtp_video_stream_receiver.cc

void RtpVideoStreamReceiver::OnReceivedPayloadData(
    rtc::CopyOnWriteBuffer codec_payload,
    const RtpPacketReceived& rtp_packet,
    const RTPVideoHeader& video) {
...

#ifndef DISABLE_H265
 } else if (packet->codec() == kVideoCodecH265) {
   // Only when we start to receive packets will we know what payload type
   // that will be used. When we know the payload type insert the correct
   // sps/pps into the tracker.
   if (packet->payload_type != last_payload_type_) {
     last_payload_type_ = packet->payload_type;
     InsertSpsPpsIntoTracker(packet->payload_type);
   }

   video_coding::H265VpsSpsPpsTracker::FixedBitstream fixed =
       h265_tracker_.CopyAndFixBitstream(
           rtc::MakeArrayView(codec_payload.cdata(), codec_payload.size()),
           &packet->video_header);

   switch (fixed.action) {
     case video_coding::H265VpsSpsPpsTracker::kRequestKeyframe:
       rtcp_feedback_buffer_.RequestKeyFrame();
       rtcp_feedback_buffer_.SendBufferedRtcpFeedback();
       ABSL_FALLTHROUGH_INTENDED;
     case video_coding::H265VpsSpsPpsTracker::kDrop:
       return;
     case video_coding::H265VpsSpsPpsTracker::kInsert:
       packet->video_payload = std::move(fixed.bitstream);
       break;
   }
#endif
  } else {
    packet->video_payload = std::move(codec_payload);
  } 
}

video/rtp_video_stream_receiver.h

#ifndef DISABLE_H265
#include "modules/video_coding/h265_vps_sps_pps_tracker.h"
#endif

...

  std::map<uint8_t, std::unique_ptr<VideoRtpDepacketizer>> payload_type_map_;

#ifndef DISABLE_H265
 video_coding::H265VpsSpsPpsTracker h265_tracker_;
#endif

video/rtp_video_stream_receiver2.cc

void RtpVideoStreamReceiver2::OnReceivedPayloadData(
    rtc::CopyOnWriteBuffer codec_payload,
    const RtpPacketReceived& rtp_packet,
    const RTPVideoHeader& video) {
...

#ifndef DISABLE_H265
 } else if (packet->codec() == kVideoCodecH265) {
   // Only when we start to receive packets will we know what payload type
   // that will be used. When we know the payload type insert the correct
   // sps/pps into the tracker.
   if (packet->payload_type != last_payload_type_) {
     last_payload_type_ = packet->payload_type;
     InsertSpsPpsIntoTracker(packet->payload_type);
   }

   video_coding::H265VpsSpsPpsTracker::FixedBitstream fixed =
       h265_tracker_.CopyAndFixBitstream(
           rtc::MakeArrayView(codec_payload.cdata(), codec_payload.size()),
           &packet->video_header);

   switch (fixed.action) {
     case video_coding::H265VpsSpsPpsTracker::kRequestKeyframe:
       rtcp_feedback_buffer_.RequestKeyFrame();
       rtcp_feedback_buffer_.SendBufferedRtcpFeedback();
       ABSL_FALLTHROUGH_INTENDED;
     case video_coding::H265VpsSpsPpsTracker::kDrop:
       return;
     case video_coding::H265VpsSpsPpsTracker::kInsert:
       packet->video_payload = std::move(fixed.bitstream);
       break;
   }
#endif
  } else {
    packet->video_payload = std::move(codec_payload);
  }

video/rtp_video_stream_receiver2.h

#ifndef DISABLE_H265
#include "modules/video_coding/h265_vps_sps_pps_tracker.h"
#endif

...

  std::map<uint8_t, std::unique_ptr<VideoRtpDepacketizer>> payload_type_map_
      RTC_GUARDED_BY(worker_task_checker_);

#ifndef DISABLE_H265
 video_coding::H265VpsSpsPpsTracker h265_tracker_;
#endif

统计支持：

video/send_statistics_proxy.cc

enum HistogramCodecType {
  kVideoUnknown = 0,
  kVideoVp8 = 1,
  kVideoVp9 = 2,
  kVideoH264 = 3,
#ifndef DISABLE_H265
  kVideoH265 = 4,
#endif
  kVideoMax = 64,
};

HistogramCodecType PayloadNameToHistogramCodecType(
    const std::string& payload_name) {
  VideoCodecType codecType = PayloadStringToCodecType(payload_name);
  switch (codecType) {
    case kVideoCodecVP8:
      return kVideoVp8;
    case kVideoCodecVP9:
      return kVideoVp9;
    case kVideoCodecH264:
      return kVideoH264;
#ifndef DISABLE_H265
   case kVideoCodecH265:
     return kVideoH265;
#endif      
    default:
      return kVideoUnknown;
  }
}

接收流解码器初始化支持：

video/video_receive_stream.cc

VideoCodec CreateDecoderVideoCodec(const VideoReceiveStream::Decoder& decoder) {
  VideoCodec codec;
  memset(&codec, 0, sizeof(codec));

  codec.codecType = PayloadStringToCodecType(decoder.video_format.name);

...

    return associated_codec;
#ifndef DISABLE_H265
 } else if (codec.codecType == kVideoCodecH265) {
   *(codec.H265()) = VideoEncoder::GetDefaultH265Settings();
#endif
  }
...

  return codec;
}

接收流编码器支持：

video/video_stream_encoder.cc

bool RequiresEncoderReset(const VideoCodec& prev_send_codec,
                          const VideoCodec& new_send_codec,
                          bool was_encode_called_since_last_initialization) {
...
    case kVideoCodecH264:
      if (new_send_codec.H264() != prev_send_codec.H264()) {
        return true;
      }
      break;
#ifndef DISABLE_H265
   case kVideoCodecH265:
     if (new_send_codec.H265() != prev_send_codec.H265()) {
       return true;
     }
     break;
#endif
}

编解码器支持

视频数据中增加H265类型：

api/video/encoded_image.h

#include "api/video/video_codec_type.h"

api/video/video_codec_type.h

#ifndef DISABLE_H265
enum VideoCodecType {
  // Java_cpp_enum.py does not allow ifdef in enum class,
  // so we have to create two version of VideoCodecType here 
  kVideoCodecGeneric = 0,
  kVideoCodecVP8,
  kVideoCodecVP9,
  kVideoCodecAV1,
  kVideoCodecH264,
  kVideoCodecH265,
  kVideoCodecMultiplex,
};
#else
enum VideoCodecType {
  // There are various memset(..., 0, ...) calls in the code that rely on
  // kVideoCodecGeneric being zero.
  kVideoCodecGeneric = 0,
  kVideoCodecVP8,
  kVideoCodecVP9,
  kVideoCodecAV1,
  kVideoCodecH264,
  kVideoCodecMultiplex,
};
#endif

codec基本信息支持：

api/video_codecs/video_codec.cc

constexpr char kPayloadNameH264[] = "H264";
#ifndef DISABLE_H265
constexpr char kPayloadNameH265[] = "H265";
#endif

...

#ifndef DISABLE_H265
bool VideoCodecH265::operator==(const VideoCodecH265& other) const {
 return (frameDroppingOn == other.frameDroppingOn &&
         keyFrameInterval == other.keyFrameInterval &&
         vpsLen == other.vpsLen && spsLen == other.spsLen &&
         ppsLen == other.ppsLen &&
         (spsLen == 0 || memcmp(spsData, other.spsData, spsLen) == 0) &&
         (ppsLen == 0 || memcmp(ppsData, other.ppsData, ppsLen) == 0));
}
#endif

...

const VideoCodecH264& VideoCodec::H264() const {
  RTC_DCHECK_EQ(codecType, kVideoCodecH264);
  return codec_specific_.H264;
}

#ifndef DISABLE_H265
VideoCodecH265* VideoCodec::H265() {
 RTC_DCHECK_EQ(codecType, kVideoCodecH265);
 return &codec_specific_.H265;
}

const VideoCodecH265& VideoCodec::H265() const {
 RTC_DCHECK_EQ(codecType, kVideoCodecH265);
 return codec_specific_.H265;
}
#endif

const char* CodecTypeToPayloadString(VideoCodecType type) {
  ...
    case kVideoCodecH264:
      return kPayloadNameH264;
#ifndef DISABLE_H265
   case kVideoCodecH265:
     return kPayloadNameH265;
#endif
    case kVideoCodecMultiplex:
      return kPayloadNameMultiplex;
    case kVideoCodecGeneric:
      return kPayloadNameGeneric;
}

VideoCodecType PayloadStringToCodecType(const std::string& name) {
  ...
  if (absl::EqualsIgnoreCase(name, kPayloadNameH264))
    return kVideoCodecH264;
#ifndef DISABLE_H265
 if (absl::EqualsIgnoreCase(name, kPayloadNameH265))
   return kVideoCodecH265;
#endif
  if (absl::EqualsIgnoreCase(name, kPayloadNameMultiplex))
    return kVideoCodecMultiplex;
  return kVideoCodecGeneric;
}

api/video_codecs/video_codec.h

#ifndef DISABLE_H265
struct VideoCodecH265 {
  bool operator==(const VideoCodecH265& other) const;
  bool operator!=(const VideoCodecH265& other) const {
    return !(*this == other);
  }
  bool frameDroppingOn;
  int keyFrameInterval;
  const uint8_t* vpsData;
  size_t vpsLen;
  const uint8_t* spsData;
  size_t spsLen;
  const uint8_t* ppsData;
  size_t ppsLen;
};
#endif

...

union VideoCodecUnion {
  VideoCodecVP8 VP8;
  VideoCodecVP9 VP9;
  VideoCodecH264 H264;
#ifndef DISABLE_H265
  VideoCodecH265 H265;
#endif
};

...

class RTC_EXPORT VideoCodec {
 public:
    ...

#ifndef DISABLE_H265
  VideoCodecH265* H265();
  const VideoCodecH265& H265() const;
#endif 
 private:
  // TODO(hta): Consider replacing the union with a pointer type.
  // This will allow removing the VideoCodec* types from this file.
  VideoCodecUnion codec_specific_;
};

解码器降级支持：

api/video_codecs/video_decoder_software_fallback_wrapper.cc

void VideoDecoderSoftwareFallbackWrapper::UpdateFallbackDecoderHistograms() {
    switch (codec_settings_.codecType) {

    ...

    case kVideoCodecH264:
      RTC_HISTOGRAM_COUNTS_100000(kFallbackHistogramsUmaPrefix + "H264",
                                  hw_decoded_frames_since_last_fallback_);
      break;
#ifndef DISABLE_H265
    case kVideoCodecH265:
      RTC_HISTOGRAM_COUNTS_100000(kFallbackHistogramsUmaPrefix + "H265",
                                  hw_decoded_frames_since_last_fallback_);
      break;
#endif
    case kVideoCodecMultiplex:
      RTC_HISTOGRAM_COUNTS_100000(kFallbackHistogramsUmaPrefix + "Multiplex",
                                  hw_decoded_frames_since_last_fallback_);
      break;
  }
}

编码器配置：

api/video_codecs/video_encoder_config.cc

void VideoEncoderConfig::EncoderSpecificSettings::FillEncoderSpecificSettings(
    VideoCodec* codec) const {
  if (codec->codecType == kVideoCodecH264) {
    FillVideoCodecH264(codec->H264());
  } else if (codec->codecType == kVideoCodecVP8) {
    FillVideoCodecVp8(codec->VP8());
  } else if (codec->codecType == kVideoCodecVP9) {
    FillVideoCodecVp9(codec->VP9());
#ifndef DISABLE_H265
  } else if (codec->codecType == kVideoCodecH265) {
    FillVideoCodecH265(codec->H265());
#endif
  } else {
    RTC_NOTREACHED() << "Encoder specifics set/used for unknown codec type.";
  }
}

#ifndef DISABLE_H265
void VideoEncoderConfig::EncoderSpecificSettings::FillVideoCodecH265(
    VideoCodecH265* h265_settings) const {
  RTC_NOTREACHED();
}
#endif

#ifndef DISABLE_H265
VideoEncoderConfig::H265EncoderSpecificSettings::H265EncoderSpecificSettings(
    const VideoCodecH265& specifics)
    : specifics_(specifics) {}

void VideoEncoderConfig::H265EncoderSpecificSettings::FillVideoCodecH265(
    VideoCodecH265* h265_settings) const {
  *h265_settings = specifics_;
}
#endif

api/video_codecs/video_encoder_config.h

  class EncoderSpecificSettings : public rtc::RefCountInterface {
   public:
  ...
    virtual void FillVideoCodecH264(VideoCodecH264* h264_settings) const;
#ifndef DISABLE_H265
    virtual void FillVideoCodecH265(VideoCodecH265* h265_settings) const;
#endif

   private:
    ~EncoderSpecificSettings() override {}
    friend class VideoEncoderConfig;
  };

#ifndef DISABLE_H265
  class H265EncoderSpecificSettings : public EncoderSpecificSettings {
   public:
    explicit H265EncoderSpecificSettings(const VideoCodecH265& specifics);
    void FillVideoCodecH265(VideoCodecH265* h265_settings) const override;

   private:
    VideoCodecH265 specifics_;
  };
#endif

api/video_codecs/video_encoder.cc

#ifndef DISABLE_H265
VideoCodecH265 VideoEncoder::GetDefaultH265Settings() {
  VideoCodecH265 h265_settings;
  memset(&h265_settings, 0, sizeof(h265_settings));

  // h265_settings.profile = kProfileBase;
  h265_settings.frameDroppingOn = true;
  h265_settings.keyFrameInterval = 3000;
  h265_settings.spsData = nullptr;
  h265_settings.spsLen = 0;
  h265_settings.ppsData = nullptr;
  h265_settings.ppsLen = 0;

  return h265_settings;
}
#endif

api/video_codecs/video_encoder.h

  static VideoCodecH264 GetDefaultH264Settings();
#ifndef DISABLE_H265
  static VideoCodecH265 GetDefaultH265Settings();
#endif

载荷配置：

call/rtp_payload_params.cc

void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
                                   absl::optional<int> spatial_index,
                                   RTPVideoHeader* rtp) {
 switch (info.codecType) {
    ...
    case kVideoCodecH264: {
      auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
      h264_header.packetization_mode =
          info.codecSpecific.H264.packetization_mode;
      rtp->simulcastIdx = spatial_index.value_or(0);
      return;
    }
#ifndef DISABLE_H265
    case kVideoCodecH265: {
      auto h265_header = rtp->video_type_header.emplace<RTPVideoHeaderH265>();
      h265_header.packetization_mode =
          info.codecSpecific.H265.packetization_mode;
    }
    return;
#endif
...
 }
}

void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
                                  int64_t frame_id,
                                  bool is_keyframe,
                                  RTPVideoHeader* rtp_video_header) {

...

  switch (rtp_video_header->codec) {
    ...
    case VideoCodecType::kVideoCodecH264:
      if (codec_specific_info) {
        H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
                      is_keyframe, rtp_video_header);
      }
      return;
#ifndef DISABLE_H265
    case VideoCodecType::kVideoCodecH265:
#endif
    case VideoCodecType::kVideoCodecMultiplex:
      return;
  }
}

rtp信息解析配置

在common_video中新增h265文件夹：

common_video/h265/h265_bitstream_parser.cc >folded

#include "common_video/h265/h265_bitstream_parser.h"

#include <stdlib.h>

#include <cstdint>
#include <vector>

#include "common_video/h265/h265_common.h"
#include "rtc_base/bit_buffer.h"
#include "rtc_base/logging.h"

namespace {

const int kMaxAbsQpDeltaValue = 51;
const int kMinQpValue = 0;
const int kMaxQpValue = 51;

}  // namespace

namespace webrtc {

#define RETURN_ON_FAIL(x, res)            \
  if (!(x)) {                             \
    RTC_LOG_F(LS_ERROR) << "FAILED: " #x; \
    return res;                           \
  }

#define RETURN_INV_ON_FAIL(x) RETURN_ON_FAIL(x, kInvalidStream)

H265BitstreamParser::H265BitstreamParser() {}
H265BitstreamParser::~H265BitstreamParser() {}

H265BitstreamParser::Result H265BitstreamParser::ParseNonParameterSetNalu(
    const uint8_t* source,
    size_t source_length,
    uint8_t nalu_type) {
  if (!sps_ || !pps_)
    return kInvalidStream;

  last_slice_qp_delta_ = absl::nullopt;
  const std::vector<uint8_t> slice_rbsp =
      H265::ParseRbsp(source, source_length);
  if (slice_rbsp.size() < H265::kNaluTypeSize)
    return kInvalidStream;

  rtc::BitBuffer slice_reader(slice_rbsp.data() + H265::kNaluTypeSize,
                              slice_rbsp.size() - H265::kNaluTypeSize);
  // Check to see if this is an IDR slice, which has an extra field to parse
  // out.
  //bool is_idr = (source[0] & 0x0F) == H265::NaluType::kIdr;
  //uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
  uint32_t golomb_tmp;
  uint32_t bits_tmp;

  // first_slice_segment_in_pic_flag: u(1)
  uint32_t first_slice_segment_in_pic_flag = 0;
  RETURN_INV_ON_FAIL(slice_reader.ReadBits(&first_slice_segment_in_pic_flag, 1));
  if (H265::NaluType::kBlaWLp <= nalu_type &&
      nalu_type <= H265::NaluType::kRsvIrapVcl23) {
    // no_output_of_prior_pics_flag: u(1)
    RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
  }
  // slice_pic_parameter_set_id: ue(v)
  RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
  uint32_t dependent_slice_segment_flag = 0;
  if (first_slice_segment_in_pic_flag == 0) {
    if (pps_->dependent_slice_segments_enabled_flag) {
      // dependent_slice_segment_flag: u(1)
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&dependent_slice_segment_flag, 1));
    }

    // slice_segment_address: u(v)
    int32_t log2_ctb_size_y = sps_->log2_min_luma_coding_block_size_minus3 + 3 + sps_->log2_diff_max_min_luma_coding_block_size;
    uint32_t ctb_size_y = 1 << log2_ctb_size_y;
    uint32_t pic_width_in_ctbs_y = sps_->pic_width_in_luma_samples / ctb_size_y;
    if(sps_->pic_width_in_luma_samples % ctb_size_y)
      pic_width_in_ctbs_y++;

    uint32_t pic_height_in_ctbs_y = sps_->pic_height_in_luma_samples / ctb_size_y;
    if(sps_->pic_height_in_luma_samples % ctb_size_y)
      pic_height_in_ctbs_y++;

    uint32_t slice_segment_address_bits = H265::Log2(pic_height_in_ctbs_y * pic_width_in_ctbs_y);
    RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, slice_segment_address_bits));
  }

  if (dependent_slice_segment_flag == 0) {
    for (uint32_t i = 0; i < pps_->num_extra_slice_header_bits; i++) {
      // slice_reserved_flag: u(1)
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
    }
    // slice_type: ue(v)
    uint32_t slice_type = 0;
    RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&slice_type));
    if (pps_->output_flag_present_flag) {
      // pic_output_flag: u(1)
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
    }
    if (sps_->separate_colour_plane_flag) {
      // colour_plane_id: u(2)
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2));
    }
    uint32_t num_long_term_sps = 0;
    uint32_t num_long_term_pics = 0;
    std::vector<uint32_t> lt_idx_sps;
    std::vector<uint32_t> used_by_curr_pic_lt_flag;
    uint32_t short_term_ref_pic_set_sps_flag = 0;
    uint32_t short_term_ref_pic_set_idx = 0;
    H265SpsParser::ShortTermRefPicSet short_term_ref_pic_set;
    uint32_t slice_temporal_mvp_enabled_flag = 0;
    if (nalu_type != H265::NaluType::kIdrWRadl && nalu_type != H265::NaluType::kIdrNLp) {
      // slice_pic_order_cnt_lsb: u(v)
      uint32_t slice_pic_order_cnt_lsb_bits = sps_->log2_max_pic_order_cnt_lsb_minus4 + 4;
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, slice_pic_order_cnt_lsb_bits));
      // short_term_ref_pic_set_sps_flag: u(1)
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&short_term_ref_pic_set_sps_flag, 1));
      if (!short_term_ref_pic_set_sps_flag) {
        absl::optional<H265SpsParser::ShortTermRefPicSet> ref_pic_set
          = H265SpsParser::ParseShortTermRefPicSet(sps_->num_short_term_ref_pic_sets,
            sps_->num_short_term_ref_pic_sets, sps_->short_term_ref_pic_set, *sps_, &slice_reader);
        if (ref_pic_set) {
          short_term_ref_pic_set = *ref_pic_set;
        } else {
          return kInvalidStream;
        }
      } else if (sps_->num_short_term_ref_pic_sets > 1) {
        // short_term_ref_pic_set_idx: u(v)
        uint32_t short_term_ref_pic_set_idx_bits = H265::Log2(sps_->num_short_term_ref_pic_sets);
        if ((uint32_t)(1 << short_term_ref_pic_set_idx_bits) < sps_->num_short_term_ref_pic_sets) {
          short_term_ref_pic_set_idx_bits++;
        }
        if (short_term_ref_pic_set_idx_bits > 0) {
          RETURN_INV_ON_FAIL(slice_reader.ReadBits(&short_term_ref_pic_set_idx, short_term_ref_pic_set_idx_bits));
        }
      }
      if (sps_->long_term_ref_pics_present_flag) {
        if (sps_->num_long_term_ref_pics_sps > 0) {
          // num_long_term_sps: ue(v)
          RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_long_term_sps));
        }
        // num_long_term_sps: ue(v)
        RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_long_term_pics));
        lt_idx_sps.resize(num_long_term_sps + num_long_term_pics, 0);
        used_by_curr_pic_lt_flag.resize(num_long_term_sps + num_long_term_pics, 0);
        for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) {
          if (i < num_long_term_sps) {
            if (sps_->num_long_term_ref_pics_sps > 1) {
              // lt_idx_sps: u(v)
              uint32_t lt_idx_sps_bits = H265::Log2(sps_->num_long_term_ref_pics_sps);
              RETURN_INV_ON_FAIL(slice_reader.ReadBits(&lt_idx_sps[i], lt_idx_sps_bits));
            }
          } else {
            // poc_lsb_lt: u(v)
            uint32_t poc_lsb_lt_bits = sps_->log2_max_pic_order_cnt_lsb_minus4 + 4;
            RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, poc_lsb_lt_bits));
            // used_by_curr_pic_lt_flag: u(1)
            RETURN_INV_ON_FAIL(slice_reader.ReadBits(&used_by_curr_pic_lt_flag[i], 1));
          }
          // delta_poc_msb_present_flag: u(1)
          uint32_t delta_poc_msb_present_flag = 0;
          RETURN_INV_ON_FAIL(slice_reader.ReadBits(&delta_poc_msb_present_flag, 1));
          if (delta_poc_msb_present_flag) {
            // delta_poc_msb_cycle_lt: ue(v)
            RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
          }
        }
      }
      if (sps_->sps_temporal_mvp_enabled_flag) {
        // slice_temporal_mvp_enabled_flag: u(1)
        RETURN_INV_ON_FAIL(slice_reader.ReadBits(&slice_temporal_mvp_enabled_flag, 1));
      }
    }

    if (sps_->sample_adaptive_offset_enabled_flag) {
      // slice_sao_luma_flag: u(1)
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
      uint32_t chroma_array_type = sps_->separate_colour_plane_flag == 0 ? sps_->chroma_format_idc : 0;
      if (chroma_array_type != 0) {
        // slice_sao_chroma_flag: u(1)
        RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
      }
    }

    if (slice_type == H265::SliceType::kP || slice_type == H265::SliceType::kB) {
      // num_ref_idx_active_override_flag: u(1)
      uint32_t num_ref_idx_active_override_flag = 0;
      RETURN_INV_ON_FAIL(slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
      uint32_t num_ref_idx_l0_active_minus1 = pps_->num_ref_idx_l0_default_active_minus1;
      uint32_t num_ref_idx_l1_active_minus1 = pps_->num_ref_idx_l1_default_active_minus1;
      if (num_ref_idx_active_override_flag) {
        // num_ref_idx_l0_active_minus1: ue(v)
        RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_ref_idx_l0_active_minus1));
        if (slice_type == H265::SliceType::kB) {
          // num_ref_idx_l1_active_minus1: ue(v)
          RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&num_ref_idx_l1_active_minus1));
        }
      }
      uint32_t num_pic_total_curr = CalcNumPocTotalCurr(
          num_long_term_sps, num_long_term_pics, lt_idx_sps,
          used_by_curr_pic_lt_flag, short_term_ref_pic_set_sps_flag,
          short_term_ref_pic_set_idx, short_term_ref_pic_set);
      if (pps_->lists_modification_present_flag && num_pic_total_curr > 1) {
        // ref_pic_lists_modification()
        uint32_t list_entry_bits = H265::Log2(num_pic_total_curr);
        if ((uint32_t)(1 << list_entry_bits) < num_pic_total_curr) {
          list_entry_bits++;
        }
        // ref_pic_list_modification_flag_l0: u(1)
        uint32_t ref_pic_list_modification_flag_l0 = 0;
        RETURN_INV_ON_FAIL(slice_reader.ReadBits(&ref_pic_list_modification_flag_l0, 1));
        if (ref_pic_list_modification_flag_l0) {
          for (uint32_t i = 0; i < num_ref_idx_l0_active_minus1; i++) {
            // list_entry_l0: u(v)
            RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, list_entry_bits));
          }
        }
        if (slice_type == H265::SliceType::kB) {
          // ref_pic_list_modification_flag_l1: u(1)
          uint32_t ref_pic_list_modification_flag_l1 = 0;
          RETURN_INV_ON_FAIL(slice_reader.ReadBits(&ref_pic_list_modification_flag_l1, 1));
          if (ref_pic_list_modification_flag_l1) {
            for (uint32_t i = 0; i < num_ref_idx_l1_active_minus1; i++) {
              // list_entry_l1: u(v)
              RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, list_entry_bits));
            }
          }
        }
      }
      if (slice_type == H265::SliceType::kB) {
        // mvd_l1_zero_flag: u(1)
        RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
      }
      if (pps_->cabac_init_present_flag) {
        // cabac_init_flag: u(1)
        RETURN_INV_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
      }
      if (slice_temporal_mvp_enabled_flag) {
        uint32_t collocated_from_l0_flag = 0;
        if (slice_type == H265::SliceType::kB) {
          // collocated_from_l0_flag: u(1)
          RETURN_INV_ON_FAIL(slice_reader.ReadBits(&collocated_from_l0_flag, 1));
        }
        if ((collocated_from_l0_flag && num_ref_idx_l0_active_minus1 > 0)
          || (!collocated_from_l0_flag && num_ref_idx_l1_active_minus1 > 0)) {
          // collocated_ref_idx: ue(v)
          RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
        }
      }
      if ((pps_->weighted_pred_flag && slice_type == H265::SliceType::kP)
          || (pps_->weighted_bipred_flag && slice_type == H265::SliceType::kB)) {
        // pred_weight_table()
        // TODO(piasy): Do we need support for pred_weight_table()?
        RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported.";
        return kUnsupportedStream;
      }
      // five_minus_max_num_merge_cand: ue(v)
      RETURN_INV_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
      // TODO(piasy): motion_vector_resolution_control_idc?
    }
  }

  // slice_qp_delta: se(v)
  int32_t last_slice_qp_delta;
  RETURN_INV_ON_FAIL(
      slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta));
  if (abs(last_slice_qp_delta) > kMaxAbsQpDeltaValue) {
    // Something has gone wrong, and the parsed value is invalid.
    RTC_LOG(LS_WARNING) << "Parsed QP value out of range.";
    return kInvalidStream;
  }

  last_slice_qp_delta_ = last_slice_qp_delta;

  return kOk;
}

uint32_t H265BitstreamParser::CalcNumPocTotalCurr(
    uint32_t num_long_term_sps, uint32_t num_long_term_pics,
    const std::vector<uint32_t> lt_idx_sps,
    const std::vector<uint32_t> used_by_curr_pic_lt_flag,
    uint32_t short_term_ref_pic_set_sps_flag,
    uint32_t short_term_ref_pic_set_idx,
    const H265SpsParser::ShortTermRefPicSet& short_term_ref_pic_set) {
  uint32_t num_poc_total_curr = 0;
  uint32_t curr_sps_idx;

  bool used_by_curr_pic_lt[16];
  uint32_t num_long_term = num_long_term_sps + num_long_term_pics;

  for (uint32_t i = 0; i < num_long_term; i++) {
    if (i < num_long_term_sps) {
      used_by_curr_pic_lt[i] = sps_->used_by_curr_pic_lt_sps_flag[lt_idx_sps[i]];
    } else {
      used_by_curr_pic_lt[i] = used_by_curr_pic_lt_flag[i];
    }
  }

  if (short_term_ref_pic_set_sps_flag) {
    curr_sps_idx = short_term_ref_pic_set_idx;
  } else {
    curr_sps_idx = sps_->num_short_term_ref_pic_sets;
  }

  if (sps_->short_term_ref_pic_set.size() <= curr_sps_idx) {
    if (curr_sps_idx != 0 || short_term_ref_pic_set_sps_flag) {
      return 0;
    }
  }

  const H265SpsParser::ShortTermRefPicSet* ref_pic_set;
  if (curr_sps_idx < sps_->short_term_ref_pic_set.size()) {
    ref_pic_set = &(sps_->short_term_ref_pic_set[curr_sps_idx]);
  } else {
    ref_pic_set = &short_term_ref_pic_set;
  }

  for (uint32_t i = 0; i < ref_pic_set->num_negative_pics; i++) {
    if (ref_pic_set->used_by_curr_pic_s0_flag[i]) {
      num_poc_total_curr++;
    }
  }

  for (uint32_t i = 0; i < ref_pic_set->num_positive_pics; i++) {
    if (ref_pic_set->used_by_curr_pic_s1_flag[i]) {
      num_poc_total_curr++;
    }
  }

  for (uint32_t i = 0; i < num_long_term_sps + num_long_term_pics; i++) {
    if (used_by_curr_pic_lt[i]) {
      num_poc_total_curr++;
    }
  }

  return num_poc_total_curr;
}

void H265BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
  H265::NaluType nalu_type = H265::ParseNaluType(slice[0]);
  if (nalu_type == H265::NaluType::kSps) {
      sps_ = H265SpsParser::ParseSps(slice + H265::kNaluTypeSize,
                                     length - H265::kNaluTypeSize);
      if (!sps_) {
        RTC_LOG(LS_WARNING) << "Unable to parse SPS from H265 bitstream.";
      }
  } else if (nalu_type == H265::NaluType::kPps) {
      pps_ = H265PpsParser::ParsePps(slice + H265::kNaluTypeSize,
                                     length - H265::kNaluTypeSize);
      if (!pps_) {
        RTC_LOG(LS_WARNING) << "Unable to parse PPS from H265 bitstream.";
      }
  } else if (nalu_type <= H265::NaluType::kRsvIrapVcl23) {
      Result res = ParseNonParameterSetNalu(slice, length, nalu_type);
      if (res != kOk) {
        RTC_LOG(LS_INFO) << "Failed to parse bitstream. Error: " << res;
      }
  }
}

void H265BitstreamParser::ParseBitstream(const uint8_t* bitstream,
                                         size_t length) {
  std::vector<H265::NaluIndex> nalu_indices =
      H265::FindNaluIndices(bitstream, length);
  for (const H265::NaluIndex& index : nalu_indices)
    ParseSlice(&bitstream[index.payload_start_offset], index.payload_size);
}

bool H265BitstreamParser::GetLastSliceQp(int* qp) const {
  if (!last_slice_qp_delta_ || !pps_) {
    return false;
  }
  const int parsed_qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
  if (parsed_qp < kMinQpValue || parsed_qp > kMaxQpValue) {
    RTC_LOG(LS_ERROR) << "Parsed invalid QP from bitstream.";
    return false;
  }
  *qp = parsed_qp;
  return true;
}

void H265BitstreamParser::ParseBitstream(
    rtc::ArrayView<const uint8_t> bitstream) {
  ParseBitstream(bitstream.data(), bitstream.size());
}

absl::optional<int> H265BitstreamParser::GetLastSliceQp() const {
  int qp;
  bool success = GetLastSliceQp(&qp);
  return success ? absl::optional<int>(qp) : absl::nullopt;
}

}  // namespace webrtc

common_video/h265/h265_bitstream_parser.h >folded

#ifndef COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_
#define COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_
#include <stddef.h>
#include <stdint.h>

#include "absl/types/optional.h"
#include "api/video_codecs/bitstream_parser.h"
#include "common_video/h265/h265_pps_parser.h"
#include "common_video/h265/h265_sps_parser.h"

namespace webrtc {

// Stateful H265 bitstream parser (due to SPS/PPS). Used to parse out QP values
// from the bitstream.
// TODO(pbos): Unify with RTP SPS parsing and only use one H265 parser.
// TODO(pbos): If/when this gets used on the receiver side CHECKs must be
// removed and gracefully abort as we have no control over receive-side
// bitstreams.
class H265BitstreamParser : public BitstreamParser {
 public:
  H265BitstreamParser();
  ~H265BitstreamParser() override;

  // These are here for backwards-compatability for the time being.
  void ParseBitstream(const uint8_t* bitstream, size_t length);
  bool GetLastSliceQp(int* qp) const;

  // New interface.
  void ParseBitstream(rtc::ArrayView<const uint8_t> bitstream) override;
  absl::optional<int> GetLastSliceQp() const override;

 protected:
  enum Result {
    kOk,
    kInvalidStream,
    kUnsupportedStream,
  };
  void ParseSlice(const uint8_t* slice, size_t length);
  Result ParseNonParameterSetNalu(const uint8_t* source,
                                  size_t source_length,
                                  uint8_t nalu_type);

  uint32_t CalcNumPocTotalCurr(uint32_t num_long_term_sps,
                               uint32_t num_long_term_pics,
                               const std::vector<uint32_t> lt_idx_sps,
                               const std::vector<uint32_t> used_by_curr_pic_lt_flag,
                               uint32_t short_term_ref_pic_set_sps_flag,
                               uint32_t short_term_ref_pic_set_idx,
                               const H265SpsParser::ShortTermRefPicSet& short_term_ref_pic_set);

  // SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices.
  absl::optional<H265SpsParser::SpsState> sps_;
  absl::optional<H265PpsParser::PpsState> pps_;

  // Last parsed slice QP.
  absl::optional<int32_t> last_slice_qp_delta_;
};

}  // namespace webrtc

#endif  // COMMON_VIDEO_H265_H265_BITSTREAM_PARSER_H_

common_video/h265/h265_common.cc >folded

#include "common_video/h265/h265_common.h"
#include "common_video/h264/h264_common.h"

namespace webrtc {
namespace H265 {

const uint8_t kNaluTypeMask = 0x7E;

std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
                                       size_t buffer_size) {
  std::vector<H264::NaluIndex> indices = H264::FindNaluIndices(buffer, buffer_size);
  std::vector<NaluIndex> results;
  for (auto& index : indices) {
    results.push_back({index.start_offset, index.payload_start_offset, index.payload_size});
  }
  return results;
}

NaluType ParseNaluType(uint8_t data) {
  return static_cast<NaluType>((data & kNaluTypeMask) >> 1);
}

std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length) {
  return H264::ParseRbsp(data, length);
}

void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) {
  H264::WriteRbsp(bytes, length, destination);
}

uint32_t Log2(uint32_t value) {
  uint32_t result = 0;
  // If value is not a power of two an additional bit is required
  // to account for the ceil() of log2() below.
  if ((value & (value - 1)) != 0) {
    ++result;
  }
  while (value > 0) {
    value >>= 1;
    ++result;
  }

  return result;
}

}  // namespace H265
}  // namespace webrtc

common_video/h265/h265_common.h >folded

#ifndef COMMON_VIDEO_H265_H265_COMMON_H_
#define COMMON_VIDEO_H265_H265_COMMON_H_

#include <memory>
#include <vector>

#include "rtc_base/buffer.h"

namespace webrtc {

namespace H265 {
// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU
// of an access unit, and for SPS and PPS blocks.
const size_t kNaluLongStartSequenceSize = 4;

// The size of a shortened NALU start sequence {0 0 1}, that may be used if
// not the first NALU of an access unit or an SPS or PPS block.
const size_t kNaluShortStartSequenceSize = 3;

// The size of the NALU type byte (2).
const size_t kNaluTypeSize = 2;

enum NaluType : uint8_t {
  kTrailN = 0,
  kTrailR = 1,
  kTsaN = 2,
  kTsaR = 3,
  kStsaN = 4,
  kStsaR = 5,
  kRadlN = 6,
  kRadlR = 7,
  kBlaWLp = 16,
  kBlaWRadl = 17,
  kBlaNLp = 18,
  kIdrWRadl = 19,
  kIdrNLp = 20,
  kCra = 21,
  kRsvIrapVcl23 = 23,
  kVps = 32,
  kSps = 33,
  kPps = 34,
  kAud = 35,
  kPrefixSei = 39,
  kSuffixSei = 40,
  kAP = 48,
  kFU = 49
};

enum SliceType : uint8_t { kB = 0, kP = 1, kI = 2 };

struct NaluIndex {
  // Start index of NALU, including start sequence.
  size_t start_offset;
  // Start index of NALU payload, typically type header.
  size_t payload_start_offset;
  // Length of NALU payload, in bytes, counting from payload_start_offset.
  size_t payload_size;
};

// Returns a vector of the NALU indices in the given buffer.
std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
                                       size_t buffer_size);

// Get the NAL type from the header byte immediately following start sequence.
NaluType ParseNaluType(uint8_t data);

// Methods for parsing and writing RBSP. See section 7.4.2 of the H265 spec.
//
// The following sequences are illegal, and need to be escaped when encoding:
// 00 00 00 -> 00 00 03 00
// 00 00 01 -> 00 00 03 01
// 00 00 02 -> 00 00 03 02
// And things in the source that look like the emulation byte pattern (00 00 03)
// need to have an extra emulation byte added, so it's removed when decoding:
// 00 00 03 -> 00 00 03 03
//
// Decoding is simply a matter of finding any 00 00 03 sequence and removing
// the 03 emulation byte.

// Parse the given data and remove any emulation byte escaping.
std::vector<uint8_t> ParseRbsp(const uint8_t* data, size_t length);

// Write the given data to the destination buffer, inserting and emulation
// bytes in order to escape any data the could be interpreted as a start
// sequence.
void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination);

uint32_t Log2(uint32_t value);
}  // namespace H265
}  // namespace webrtc

#endif  // COMMON_VIDEO_H265_H265_COMMON_H_

common_video/h265/h265_pps_parser.cc >folded

#include "common_video/h265/h265_pps_parser.h"

#include <memory>
#include <vector>

#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_sps_parser.h"
#include "rtc_base/bit_buffer.h"
#include "rtc_base/logging.h"

#define RETURN_EMPTY_ON_FAIL(x) \
  if (!(x)) {                   \
    return absl::nullopt;        \
  }

namespace {
const int kMaxPicInitQpDeltaValue = 25;
const int kMinPicInitQpDeltaValue = -26;
}  // namespace

namespace webrtc {

// General note: this is based off the 06/2019 version of the H.265 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.265

absl::optional<H265PpsParser::PpsState> H265PpsParser::ParsePps(
    const uint8_t* data,
    size_t length) {
  // First, parse out rbsp, which is basically the source buffer minus emulation
  // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
  // section 7.3.1.1 of the H.265 standard.
  std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
  rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
  return ParseInternal(&bit_buffer);
}

bool H265PpsParser::ParsePpsIds(const uint8_t* data,
                                size_t length,
                                uint32_t* pps_id,
                                uint32_t* sps_id) {
  RTC_DCHECK(pps_id);
  RTC_DCHECK(sps_id);
  // First, parse out rbsp, which is basically the source buffer minus emulation
  // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
  // section 7.3.1.1 of the H.265 standard.
  std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
  rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
  return ParsePpsIdsInternal(&bit_buffer, pps_id, sps_id);
}

absl::optional<uint32_t> H265PpsParser::ParsePpsIdFromSliceSegmentLayerRbsp(
    const uint8_t* data,
    size_t length,
    uint8_t nalu_type) {
  rtc::BitBuffer slice_reader(data, length);

  // first_slice_segment_in_pic_flag: u(1)
  uint32_t first_slice_segment_in_pic_flag = 0;
  RETURN_EMPTY_ON_FAIL(
      slice_reader.ReadBits(&first_slice_segment_in_pic_flag, 1));

  if (nalu_type >= H265::NaluType::kBlaWLp &&
      nalu_type <= H265::NaluType::kRsvIrapVcl23) {
    // no_output_of_prior_pics_flag: u(1)
    RETURN_EMPTY_ON_FAIL(slice_reader.ConsumeBits(1));
  }

  // slice_pic_parameter_set_id: ue(v)
  uint32_t slice_pic_parameter_set_id = 0;
  if (!slice_reader.ReadExponentialGolomb(&slice_pic_parameter_set_id))
    return absl::nullopt;

  return slice_pic_parameter_set_id;
}

absl::optional<H265PpsParser::PpsState> H265PpsParser::ParseInternal(
    rtc::BitBuffer* bit_buffer) {
  PpsState pps;

  RETURN_EMPTY_ON_FAIL(ParsePpsIdsInternal(bit_buffer, &pps.id, &pps.sps_id));

  uint32_t bits_tmp;
  uint32_t golomb_ignored;
  int32_t signed_golomb_ignored;
  // dependent_slice_segments_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.dependent_slice_segments_enabled_flag, 1));
  // output_flag_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.output_flag_present_flag, 1));
  // num_extra_slice_header_bits: u(3)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.num_extra_slice_header_bits, 3));
  // sign_data_hiding_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  // cabac_init_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.cabac_init_present_flag, 1));
  // num_ref_idx_l0_default_active_minus1: ue(v)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.num_ref_idx_l0_default_active_minus1));
  // num_ref_idx_l1_default_active_minus1: ue(v)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.num_ref_idx_l1_default_active_minus1));
  // init_qp_minus26: se(v)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&pps.pic_init_qp_minus26));
  // Sanity-check parsed value
  if (pps.pic_init_qp_minus26 > kMaxPicInitQpDeltaValue ||
      pps.pic_init_qp_minus26 < kMinPicInitQpDeltaValue) {
    RETURN_EMPTY_ON_FAIL(false);
  }
  // constrained_intra_pred_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  // transform_skip_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  // cu_qp_delta_enabled_flag: u(1)
  uint32_t cu_qp_delta_enabled_flag = 0;
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&cu_qp_delta_enabled_flag, 1));
  if (cu_qp_delta_enabled_flag) {
    // diff_cu_qp_delta_depth: ue(v)
    RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
  }
  // pps_cb_qp_offset: se(v)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
  // pps_cr_qp_offset: se(v)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
  // pps_slice_chroma_qp_offsets_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  // weighted_pred_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_pred_flag, 1));
  // weighted_bipred_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_bipred_flag, 1));
  // transquant_bypass_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  // tiles_enabled_flag: u(1)
  uint32_t tiles_enabled_flag = 0;
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&tiles_enabled_flag, 1));
  // entropy_coding_sync_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  if (tiles_enabled_flag) {
    // num_tile_columns_minus1: ue(v)
    uint32_t num_tile_columns_minus1 = 0;
    RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&num_tile_columns_minus1));
    // num_tile_rows_minus1: ue(v)
    uint32_t num_tile_rows_minus1 = 0;
    RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&num_tile_rows_minus1));
    // uniform_spacing_flag: u(1)
    uint32_t uniform_spacing_flag = 0;
    RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&uniform_spacing_flag, 1));
    if (!uniform_spacing_flag) {
      for (uint32_t i = 0; i < num_tile_columns_minus1; i++) {
        // column_width_minus1: ue(v)
        RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
      }
      for (uint32_t i = 0; i < num_tile_rows_minus1; i++) {
        // row_height_minus1: ue(v)
        RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
      }
      // loop_filter_across_tiles_enabled_flag: u(1)
      RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
    }
  }
  // pps_loop_filter_across_slices_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
  // deblocking_filter_control_present_flag: u(1)
  uint32_t deblocking_filter_control_present_flag = 0;
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&deblocking_filter_control_present_flag, 1));
  if (deblocking_filter_control_present_flag) {
    // deblocking_filter_override_enabled_flag: u(1)
    RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
    // pps_deblocking_filter_disabled_flag: u(1)
    uint32_t pps_deblocking_filter_disabled_flag = 0;
    RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps_deblocking_filter_disabled_flag, 1));
    if (!pps_deblocking_filter_disabled_flag) {
      // pps_beta_offset_div2: se(v)
      RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
      // pps_tc_offset_div2: se(v)
      RETURN_EMPTY_ON_FAIL(bit_buffer->ReadSignedExponentialGolomb(&signed_golomb_ignored));
    }
  }
  // pps_scaling_list_data_present_flag: u(1)
  uint32_t pps_scaling_list_data_present_flag = 0;
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps_scaling_list_data_present_flag, 1));
  if (pps_scaling_list_data_present_flag) {
    // scaling_list_data()
    if (!H265SpsParser::ParseScalingListData(bit_buffer)) {
      return absl::nullopt;
    }
  }
  // lists_modification_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.lists_modification_present_flag, 1));
  // log2_parallel_merge_level_minus2: ue(v)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
  // slice_segment_header_extension_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));

  return pps;
}

bool H265PpsParser::ParsePpsIdsInternal(rtc::BitBuffer* bit_buffer,
                                        uint32_t* pps_id,
                                        uint32_t* sps_id) {
  // pic_parameter_set_id: ue(v)
  if (!bit_buffer->ReadExponentialGolomb(pps_id))
    return false;
  // seq_parameter_set_id: ue(v)
  if (!bit_buffer->ReadExponentialGolomb(sps_id))
    return false;
  return true;
}

}  // namespace webrtc

common_video/h265/h265_pps_parser.h >folded

#ifndef COMMON_VIDEO_H265_PPS_PARSER_H_
#define COMMON_VIDEO_H265_PPS_PARSER_H_

#include "absl/types/optional.h"

namespace rtc {
class BitBuffer;
}

namespace webrtc {

// A class for parsing out picture parameter set (PPS) data from a H265 NALU.
class H265PpsParser {
 public:
  // The parsed state of the PPS. Only some select values are stored.
  // Add more as they are actually needed.
  struct PpsState {
    PpsState() = default;

    uint32_t dependent_slice_segments_enabled_flag = 0;
    uint32_t cabac_init_present_flag = 0;
    uint32_t output_flag_present_flag = 0;
    uint32_t num_extra_slice_header_bits = 0;
    uint32_t num_ref_idx_l0_default_active_minus1 = 0;
    uint32_t num_ref_idx_l1_default_active_minus1 = 0;
    int32_t pic_init_qp_minus26 = 0;
    uint32_t weighted_pred_flag = 0;
    uint32_t weighted_bipred_flag = 0;
    uint32_t lists_modification_present_flag = 0;
    uint32_t id = 0;
    uint32_t sps_id = 0;
  };

  // Unpack RBSP and parse PPS state from the supplied buffer.
  static absl::optional<PpsState> ParsePps(const uint8_t* data, size_t length);

  static bool ParsePpsIds(const uint8_t* data,
                          size_t length,
                          uint32_t* pps_id,
                          uint32_t* sps_id);

  static absl::optional<uint32_t> ParsePpsIdFromSliceSegmentLayerRbsp(
      const uint8_t* data,
      size_t length,
      uint8_t nalu_type);

 protected:
  // Parse the PPS state, for a bit buffer where RBSP decoding has already been
  // performed.
  static absl::optional<PpsState> ParseInternal(rtc::BitBuffer* bit_buffer);
  static bool ParsePpsIdsInternal(rtc::BitBuffer* bit_buffer,
                                  uint32_t* pps_id,
                                  uint32_t* sps_id);
};

}  // namespace webrtc

#endif  // COMMON_VIDEO_H265_PPS_PARSER_H_

common_video/h265/h265_sps_parser.cc >folded

#include <memory>
#include <vector>

#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_sps_parser.h"
#include "rtc_base/bit_buffer.h"
#include "rtc_base/logging.h"

namespace {
typedef absl::optional<webrtc::H265SpsParser::SpsState> OptionalSps;
typedef absl::optional<webrtc::H265SpsParser::ShortTermRefPicSet> OptionalShortTermRefPicSet;

#define RETURN_EMPTY_ON_FAIL(x) \
  if (!(x)) {                   \
    return OptionalSps();       \
  }

#define RETURN_FALSE_ON_FAIL(x) \
  if (!(x)) {                   \
    return false;               \
  }

#define RETURN_EMPTY2_ON_FAIL(x)                \
  if (!(x)) {                                   \
    return OptionalShortTermRefPicSet();        \
  }
}  // namespace

namespace webrtc {

H265SpsParser::SpsState::SpsState() = default;

H265SpsParser::ShortTermRefPicSet::ShortTermRefPicSet() = default;

// General note: this is based off the 06/2019 version of the H.265 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.265

// Unpack RBSP and parse SPS state from the supplied buffer.
absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSps(
    const uint8_t* data,
    size_t length) {
  std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
  rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
  return ParseSpsInternal(&bit_buffer);
}

bool H265SpsParser::ParseScalingListData(rtc::BitBuffer* buffer) {
  uint32_t scaling_list_pred_mode_flag[4][6];
  uint32_t scaling_list_pred_matrix_id_delta[4][6];
  int32_t scaling_list_dc_coef_minus8[4][6];
  int32_t scaling_list[4][6][64];
  for (int size_id = 0; size_id < 4; size_id++) {
    for (int matrix_id = 0; matrix_id < 6; matrix_id += (size_id == 3) ? 3 : 1) {
      // scaling_list_pred_mode_flag: u(1)
      RETURN_FALSE_ON_FAIL(buffer->ReadBits(&scaling_list_pred_mode_flag[size_id][matrix_id], 1));
      if (!scaling_list_pred_mode_flag[size_id][matrix_id]) {
        // scaling_list_pred_matrix_id_delta: ue(v)
        RETURN_FALSE_ON_FAIL(buffer->ReadExponentialGolomb(&scaling_list_pred_matrix_id_delta[size_id][matrix_id]));
      } else {
        int32_t next_coef = 8;
        uint32_t coef_num = std::min(64, 1 << (4 + (size_id << 1)));
        if (size_id > 1) {
          // scaling_list_dc_coef_minus8: se(v)
          RETURN_FALSE_ON_FAIL(buffer->ReadSignedExponentialGolomb(&scaling_list_dc_coef_minus8[size_id - 2][matrix_id]));
          next_coef = scaling_list_dc_coef_minus8[size_id - 2][matrix_id];
        }
        for (uint32_t i = 0; i < coef_num; i++) {
          // scaling_list_delta_coef: se(v)
          int32_t scaling_list_delta_coef = 0;
          RETURN_FALSE_ON_FAIL(buffer->ReadSignedExponentialGolomb(&scaling_list_delta_coef));
          next_coef = (next_coef + scaling_list_delta_coef + 256) % 256;
          scaling_list[size_id][matrix_id][i] = next_coef;
        }
      }
    }
  }
  return true;
}

absl::optional<H265SpsParser::ShortTermRefPicSet> H265SpsParser::ParseShortTermRefPicSet(
        uint32_t st_rps_idx, uint32_t num_short_term_ref_pic_sets,
        const std::vector<H265SpsParser::ShortTermRefPicSet>& short_term_ref_pic_set,
        H265SpsParser::SpsState& sps, rtc::BitBuffer* buffer) {
  H265SpsParser::ShortTermRefPicSet ref_pic_set;

  uint32_t inter_ref_pic_set_prediction_flag = 0;
  if (st_rps_idx != 0) {
    // inter_ref_pic_set_prediction_flag: u(1)
    RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&inter_ref_pic_set_prediction_flag, 1));
  }
  if (inter_ref_pic_set_prediction_flag) {
    uint32_t delta_idx_minus1 = 0;
    if (st_rps_idx == num_short_term_ref_pic_sets) {
      // delta_idx_minus1: ue(v)
      RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&delta_idx_minus1));
    }
    // delta_rps_sign: u(1)
    uint32_t delta_rps_sign = 0;
    RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&delta_rps_sign, 1));
    // abs_delta_rps_minus1: ue(v)
    uint32_t abs_delta_rps_minus1 = 0;
    RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&abs_delta_rps_minus1));
    uint32_t ref_rps_idx = st_rps_idx - (delta_idx_minus1 + 1);
    uint32_t num_delta_pocs = 0;
    if (short_term_ref_pic_set[ref_rps_idx].inter_ref_pic_set_prediction_flag) {
      auto& used_by_curr_pic_flag = short_term_ref_pic_set[ref_rps_idx].used_by_curr_pic_flag;
      auto& use_delta_flag = short_term_ref_pic_set[ref_rps_idx].use_delta_flag;
      if (used_by_curr_pic_flag.size() != use_delta_flag.size()) {
        return OptionalShortTermRefPicSet();
      }
      for (uint32_t i = 0; i < used_by_curr_pic_flag.size(); i++) {
        if (used_by_curr_pic_flag[i] || use_delta_flag[i]) {
          num_delta_pocs++;
        }
      }
    } else {
      num_delta_pocs = short_term_ref_pic_set[ref_rps_idx].num_negative_pics + short_term_ref_pic_set[ref_rps_idx].num_positive_pics;
    }
    ref_pic_set.used_by_curr_pic_flag.resize(num_delta_pocs + 1, 0);
    ref_pic_set.use_delta_flag.resize(num_delta_pocs + 1, 1);
    for (uint32_t j = 0; j <= num_delta_pocs; j++) {
      // used_by_curr_pic_flag: u(1)
      RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.used_by_curr_pic_flag[j], 1));
      if (!ref_pic_set.used_by_curr_pic_flag[j]) {
        // use_delta_flag: u(1)
        RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.use_delta_flag[j], 1));
      }
    }
  } else {
    // num_negative_pics: ue(v)
    RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.num_negative_pics));
    // num_positive_pics: ue(v)
    RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.num_positive_pics));

    ref_pic_set.delta_poc_s0_minus1.resize(ref_pic_set.num_negative_pics, 0);
    ref_pic_set.used_by_curr_pic_s0_flag.resize(ref_pic_set.num_negative_pics, 0);
    for (uint32_t i = 0; i < ref_pic_set.num_negative_pics; i++) {
      // delta_poc_s0_minus1: ue(v)
      RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.delta_poc_s0_minus1[i]));
      // used_by_curr_pic_s0_flag: u(1)
      RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.used_by_curr_pic_s0_flag[i], 1));
    }
    ref_pic_set.delta_poc_s1_minus1.resize(ref_pic_set.num_positive_pics, 0);
    ref_pic_set.used_by_curr_pic_s1_flag.resize(ref_pic_set.num_positive_pics, 0);
    for (uint32_t i = 0; i < ref_pic_set.num_positive_pics; i++) {
      // delta_poc_s1_minus1: ue(v)
      RETURN_EMPTY2_ON_FAIL(buffer->ReadExponentialGolomb(&ref_pic_set.delta_poc_s1_minus1[i]));
      // used_by_curr_pic_s1_flag: u(1)
      RETURN_EMPTY2_ON_FAIL(buffer->ReadBits(&ref_pic_set.used_by_curr_pic_s1_flag[i], 1));
    }
  }

  return OptionalShortTermRefPicSet(ref_pic_set);
}

absl::optional<H265SpsParser::SpsState> H265SpsParser::ParseSpsInternal(
    rtc::BitBuffer* buffer) {
  // Now, we need to use a bit buffer to parse through the actual HEVC SPS
  // format. See Section 7.3.2.2.1 ("General sequence parameter set data
  // syntax") of the H.265 standard for a complete description.
  // Since we only care about resolution, we ignore the majority of fields, but
  // we still have to actively parse through a lot of the data, since many of
  // the fields have variable size.
  // We're particularly interested in:
  // chroma_format_idc -> affects crop units
  // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
  // frame_crop_*_offset -> crop information

  SpsState sps;

  // The golomb values we have to read, not just consume.
  uint32_t golomb_ignored;

  // sps_video_parameter_set_id: u(4)
  uint32_t sps_video_parameter_set_id = 0;
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_video_parameter_set_id, 4));
  // sps_max_sub_layers_minus1: u(3)
  uint32_t sps_max_sub_layers_minus1 = 0;
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_max_sub_layers_minus1, 3));
  sps.sps_max_sub_layers_minus1 = sps_max_sub_layers_minus1;
  sps.sps_max_dec_pic_buffering_minus1.resize(sps_max_sub_layers_minus1 + 1, 0);
  // sps_temporal_id_nesting_flag: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
  // profile_tier_level(1, sps_max_sub_layers_minus1). We are acutally not
  // using them, so read/skip over it.
  // general_profile_space+general_tier_flag+general_prfile_idc: u(8)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
  // general_profile_compatabilitiy_flag[32]
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(4));
  // general_progressive_source_flag + interlaced_source_flag+
  // non-packed_constraint flag + frame_only_constraint_flag: u(4)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
  // general_profile_idc decided flags or reserved.  u(43)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(43));
  // general_inbld_flag or reserved 0: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
  // general_level_idc: u(8)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
  // if max_sub_layers_minus1 >=1, read the sublayer profile information
  std::vector<uint32_t> sub_layer_profile_present_flags;
  std::vector<uint32_t> sub_layer_level_present_flags;
  uint32_t sub_layer_profile_present = 0;
  uint32_t sub_layer_level_present = 0;
  for (uint32_t i = 0; i < sps_max_sub_layers_minus1; i++) {
    // sublayer_profile_present_flag and sublayer_level_presnet_flag:  u(2)
    RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sub_layer_profile_present, 1));
    RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sub_layer_level_present, 1));
    sub_layer_profile_present_flags.push_back(sub_layer_profile_present);
    sub_layer_level_present_flags.push_back(sub_layer_level_present);
  }
  if (sps_max_sub_layers_minus1 > 0) {
    for (uint32_t j = sps_max_sub_layers_minus1; j < 8; j++) {
      // reserved 2 bits: u(2)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(2));
    }
  }
  for (uint32_t k = 0; k < sps_max_sub_layers_minus1; k++) {
    if (sub_layer_profile_present_flags[k]) {  //
      // sub_layer profile_space/tier_flag/profile_idc. ignored. u(8)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
      // profile_compatability_flag:  u(32)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(4));
      // sub_layer progressive_source_flag/interlaced_source_flag/
      // non_packed_constraint_flag/frame_only_constraint_flag: u(4)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
      // following 43-bits are profile_idc specific. We simply read/skip it.
      // u(43)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(43));
      // 1-bit profile_idc specific inbld flag.  We simply read/skip it. u(1)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
    }
    if (sub_layer_level_present_flags[k]) {
      // sub_layer_level_idc: u(8)
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
    }
  }
  // sps_seq_parameter_set_id: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id));
  // chrome_format_idc: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.chroma_format_idc));
  if (sps.chroma_format_idc == 3) {
    // seperate_colour_plane_flag: u(1)
    RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
  }
  uint32_t pic_width_in_luma_samples = 0;
  uint32_t pic_height_in_luma_samples = 0;
  // pic_width_in_luma_samples: ue(v)
  RETURN_EMPTY_ON_FAIL(
      buffer->ReadExponentialGolomb(&pic_width_in_luma_samples));
  // pic_height_in_luma_samples: ue(v)
  RETURN_EMPTY_ON_FAIL(
      buffer->ReadExponentialGolomb(&pic_height_in_luma_samples));
  // conformance_window_flag: u(1)
  uint32_t conformance_window_flag = 0;
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&conformance_window_flag, 1));

  uint32_t conf_win_left_offset = 0;
  uint32_t conf_win_right_offset = 0;
  uint32_t conf_win_top_offset = 0;
  uint32_t conf_win_bottom_offset = 0;
  if (conformance_window_flag) {
    // conf_win_left_offset: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_left_offset));
    // conf_win_right_offset: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_right_offset));
    // conf_win_top_offset: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&conf_win_top_offset));
    // conf_win_bottom_offset: ue(v)
    RETURN_EMPTY_ON_FAIL(
        buffer->ReadExponentialGolomb(&conf_win_bottom_offset));
  }

  // bit_depth_luma_minus8: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  // bit_depth_chroma_minus8: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  // log2_max_pic_order_cnt_lsb_minus4: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.log2_max_pic_order_cnt_lsb_minus4));
  uint32_t sps_sub_layer_ordering_info_present_flag = 0;
  // sps_sub_layer_ordering_info_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_sub_layer_ordering_info_present_flag, 1));
  for (uint32_t i = (sps_sub_layer_ordering_info_present_flag != 0) ? 0 : sps_max_sub_layers_minus1;
       i <= sps_max_sub_layers_minus1; i++) {
    // sps_max_dec_pic_buffering_minus1: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.sps_max_dec_pic_buffering_minus1[i]));
    // sps_max_num_reorder_pics: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
    // sps_max_latency_increase_plus1: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  }
  // log2_min_luma_coding_block_size_minus3: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.log2_min_luma_coding_block_size_minus3));
  // log2_diff_max_min_luma_coding_block_size: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.log2_diff_max_min_luma_coding_block_size));
  // log2_min_luma_transform_block_size_minus2: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  // log2_diff_max_min_luma_transform_block_size: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  // max_transform_hierarchy_depth_inter: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  // max_transform_hierarchy_depth_intra: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
  // scaling_list_enabled_flag: u(1)
  uint32_t scaling_list_enabled_flag = 0;
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&scaling_list_enabled_flag, 1));
  if (scaling_list_enabled_flag) {
    // sps_scaling_list_data_present_flag: u(1)
    uint32_t sps_scaling_list_data_present_flag = 0;
    RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps_scaling_list_data_present_flag, 1));
    if (sps_scaling_list_data_present_flag) {
      // scaling_list_data()
      if (!ParseScalingListData(buffer)) {
        return OptionalSps();
      }
    }
  }

  // amp_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
  // sample_adaptive_offset_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.sample_adaptive_offset_enabled_flag, 1));
  // pcm_enabled_flag: u(1)
  uint32_t pcm_enabled_flag = 0;
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&pcm_enabled_flag, 1));
  if (pcm_enabled_flag) {
    // pcm_sample_bit_depth_luma_minus1: u(4)
    RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
    // pcm_sample_bit_depth_chroma_minus1: u(4)
    RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(4));
    // log2_min_pcm_luma_coding_block_size_minus3: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
    // log2_diff_max_min_pcm_luma_coding_block_size: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
    // pcm_loop_filter_disabled_flag: u(1)
    RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
  }

  // num_short_term_ref_pic_sets: ue(v)
  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.num_short_term_ref_pic_sets));
  sps.short_term_ref_pic_set.resize(sps.num_short_term_ref_pic_sets);
  for (uint32_t st_rps_idx = 0; st_rps_idx < sps.num_short_term_ref_pic_sets; st_rps_idx++) {
    // st_ref_pic_set()
    OptionalShortTermRefPicSet ref_pic_set = ParseShortTermRefPicSet(
        st_rps_idx, sps.num_short_term_ref_pic_sets, sps.short_term_ref_pic_set, sps, buffer);
    if (ref_pic_set) {
      sps.short_term_ref_pic_set[st_rps_idx] = *ref_pic_set;
    } else {
      return OptionalSps();
    }
  }

  // long_term_ref_pics_present_flag: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.long_term_ref_pics_present_flag, 1));
  if (sps.long_term_ref_pics_present_flag) {
    // num_long_term_ref_pics_sps: ue(v)
    RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.num_long_term_ref_pics_sps));
    sps.used_by_curr_pic_lt_sps_flag.resize(sps.num_long_term_ref_pics_sps, 0);
    for (uint32_t i = 0; i < sps.num_long_term_ref_pics_sps; i++) {
      // lt_ref_pic_poc_lsb_sps: u(v)
      uint32_t lt_ref_pic_poc_lsb_sps_bits = sps.log2_max_pic_order_cnt_lsb_minus4 + 4;
      RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(lt_ref_pic_poc_lsb_sps_bits));
      // used_by_curr_pic_lt_sps_flag: u(1)
      RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.used_by_curr_pic_lt_sps_flag[i], 1));
    }
  }

  // sps_temporal_mvp_enabled_flag: u(1)
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.sps_temporal_mvp_enabled_flag, 1));

  // Far enough! We don't use the rest of the SPS.

  sps.vps_id = sps_video_parameter_set_id;

  sps.pic_width_in_luma_samples = pic_width_in_luma_samples;
  sps.pic_height_in_luma_samples = pic_height_in_luma_samples;

  // Start with the resolution determined by the pic_width/pic_height fields.
  sps.width = pic_width_in_luma_samples;
  sps.height = pic_height_in_luma_samples;

  if (conformance_window_flag) {
    int sub_width_c = ((1 == sps.chroma_format_idc) || (2 == sps.chroma_format_idc)) &&
                              (0 == sps.separate_colour_plane_flag)
                          ? 2
                          : 1;
    int sub_height_c =
        (1 == sps.chroma_format_idc) && (0 == sps.separate_colour_plane_flag) ? 2 : 1;
    // the offset includes the pixel within conformance window. so don't need to
    // +1 as per spec
    sps.width -= sub_width_c * (conf_win_right_offset + conf_win_left_offset);
    sps.height -= sub_height_c * (conf_win_top_offset + conf_win_bottom_offset);
  }

  return OptionalSps(sps);
}

}  // namespace webrtc

common_video/h265/h265_sps_parser.h >folded

#ifndef COMMON_VIDEO_H265_H265_SPS_PARSER_H_
#define COMMON_VIDEO_H265_H265_SPS_PARSER_H_

#include <vector>

#include "absl/types/optional.h"

namespace rtc {
class BitBuffer;
}

namespace webrtc {

// A class for parsing out sequence parameter set (SPS) data from an H265 NALU.
class H265SpsParser {
 public:

  struct ShortTermRefPicSet {
    ShortTermRefPicSet();

    uint32_t inter_ref_pic_set_prediction_flag = 0;
    std::vector<uint32_t> used_by_curr_pic_flag;
    std::vector<uint32_t> use_delta_flag;
    uint32_t num_negative_pics = 0;
    uint32_t num_positive_pics = 0;
    std::vector<uint32_t> delta_poc_s0_minus1;
    std::vector<uint32_t> used_by_curr_pic_s0_flag;
    std::vector<uint32_t> delta_poc_s1_minus1;
    std::vector<uint32_t> used_by_curr_pic_s1_flag;
  };

  // The parsed state of the SPS. Only some select values are stored.
  // Add more as they are actually needed.
  struct SpsState {
    SpsState();

    uint32_t sps_max_sub_layers_minus1;
    uint32_t chroma_format_idc = 0;
    uint32_t separate_colour_plane_flag = 0;
    uint32_t pic_width_in_luma_samples = 0;
    uint32_t pic_height_in_luma_samples = 0;
    uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0;
    std::vector<uint32_t> sps_max_dec_pic_buffering_minus1;
    uint32_t log2_min_luma_coding_block_size_minus3 = 0;
    uint32_t log2_diff_max_min_luma_coding_block_size = 0;
    uint32_t sample_adaptive_offset_enabled_flag = 0;
    uint32_t num_short_term_ref_pic_sets = 0;
    std::vector<H265SpsParser::ShortTermRefPicSet> short_term_ref_pic_set;
    uint32_t long_term_ref_pics_present_flag = 0;
    uint32_t num_long_term_ref_pics_sps = 0;
    std::vector<uint32_t> used_by_curr_pic_lt_sps_flag;
    uint32_t sps_temporal_mvp_enabled_flag = 0;
    uint32_t width = 0;
    uint32_t height = 0;
    uint32_t id = 0;
    uint32_t vps_id = 0;
  };

  // Unpack RBSP and parse SPS state from the supplied buffer.
  static absl::optional<SpsState> ParseSps(const uint8_t* data, size_t length);

  static bool ParseScalingListData(rtc::BitBuffer* buffer);

  static absl::optional<ShortTermRefPicSet> ParseShortTermRefPicSet(
        uint32_t st_rps_idx, uint32_t num_short_term_ref_pic_sets,
        const std::vector<ShortTermRefPicSet>& ref_pic_sets,
        SpsState& sps, rtc::BitBuffer* buffer);

 protected:
 // Parse the SPS state, for a bit buffer where RBSP decoding has already been
 // performed.
  static absl::optional<SpsState> ParseSpsInternal(rtc::BitBuffer* buffer);
};

}  // namespace webrtc
#endif  // COMMON_VIDEO_H265_H265_SPS_PARSER_H_

common_video/h265/h265_vps_parser.cc >folded

#include <memory>
#include <vector>

#include "common_video/h265/h265_common.h"
#include "common_video/h265/h265_vps_parser.h"
#include "rtc_base/bit_buffer.h"
#include "rtc_base/logging.h"

namespace {
typedef absl::optional<webrtc::H265VpsParser::VpsState> OptionalVps;

#define RETURN_EMPTY_ON_FAIL(x) \
  if (!(x)) {                   \
    return OptionalVps();       \
  }
}  // namespace

namespace webrtc {

H265VpsParser::VpsState::VpsState() = default;

// General note: this is based off the 06/2019 version of the H.265 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.265

// Unpack RBSP and parse SPS state from the supplied buffer.
absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseVps(
    const uint8_t* data,
    size_t length) {
  std::vector<uint8_t> unpacked_buffer = H265::ParseRbsp(data, length);
  rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size());
  return ParseInternal(&bit_buffer);
}

absl::optional<H265VpsParser::VpsState> H265VpsParser::ParseInternal(
    rtc::BitBuffer* buffer) {
  // Now, we need to use a bit buffer to parse through the actual HEVC VPS
  // format. See Section 7.3.2.1 ("Video parameter set RBSP syntax") of the
  // H.265 standard for a complete description.

  VpsState vps;

  // vps_video_parameter_set_id: u(4)
  vps.id = 0;
  RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&vps.id, 4));

  return OptionalVps(vps);
}

}  // namespace webrtc

common_video/h265/h265_vps_parser.h >folded

#ifndef COMMON_VIDEO_H265_H265_VPS_PARSER_H_
#define COMMON_VIDEO_H265_H265_VPS_PARSER_H_

#include "absl/types/optional.h"

namespace rtc {
class BitBuffer;
}

namespace webrtc {

// A class for parsing out sequence parameter set (VPS) data from an H265 NALU.
class H265VpsParser {
 public:
  // The parsed state of the VPS. Only some select values are stored.
  // Add more as they are actually needed.
  struct VpsState {
    VpsState();

    uint32_t id = 0;
  };

  // Unpack RBSP and parse VPS state from the supplied buffer.
  static absl::optional<VpsState> ParseVps(const uint8_t* data, size_t length);

 protected:
  // Parse the VPS state, for a bit buffer where RBSP decoding has already been
  // performed.
  static absl::optional<VpsState> ParseInternal(rtc::BitBuffer* bit_buffer);
};

}  // namespace webrtc
#endif  // COMMON_VIDEO_H265_H265_VPS_PARSER_H_

将新增文件添加到ninja中参与构建：

if (rtc_use_h265) {
  sources += [
    "h265/h265_bitstream_parser.cc",
    "h265/h265_bitstream_parser.h",
    "h265/h265_common.cc",
    "h265/h265_common.h",
    "h265/h265_pps_parser.cc",
    "h265/h265_pps_parser.h",
    "h265/h265_sps_parser.cc",
    "h265/h265_sps_parser.h",
    "h265/h265_vps_parser.cc",
    "h265/h265_vps_parser.h",
  ]
}

部分配置修改

log配置：

logging/rtc_event_log/encoder/rtc_event_log_encoder_new_format.cc

rtclog2::FrameDecodedEvents::Codec ConvertToProtoFormat(VideoCodecType codec) {
  switch (codec) {
    ...
    case VideoCodecType::kVideoCodecH264:
      return rtclog2::FrameDecodedEvents::CODEC_H264;
#ifndef DISABLE_H265
    case VideoCodecType::kVideoCodecH265:
      return rtclog2::FrameDecodedEvents::CODEC_H265;
#endif
    case VideoCodecType::kVideoCodecMultiplex:
      // This codec type is afaik not used.
      return rtclog2::FrameDecodedEvents::CODEC_UNKNOWN;
  }
  RTC_NOTREACHED();
  return rtclog2::FrameDecodedEvents::CODEC_UNKNOWN;
}

logging/rtc_event_log/rtc_event_log2.proto

message FrameDecodedEvents {
  enum Codec {
    CODEC_UNKNOWN = 0;
    CODEC_GENERIC = 1;
    CODEC_VP8 = 2;
    CODEC_VP9 = 3;
    CODEC_AV1 = 4;
    CODEC_H264 = 5;
    CODEC_H265 = 6;
  }
  ...
}

相关常量

media/base/media_constants.cc

const char kHEVCCodecName[] = "H265X";
#ifndef DISABLE_H265
const char kH265CodecName[] = "H265";
#endif

// RFC 6184 RTP Payload Format for H.264 video
const char kH264FmtpProfileLevelId[] = "profile-level-id";
const char kH264FmtpLevelAsymmetryAllowed[] = "level-asymmetry-allowed";
const char kH264FmtpPacketizationMode[] = "packetization-mode";
const char kH264FmtpSpropParameterSets[] = "sprop-parameter-sets";
const char kH264FmtpSpsPpsIdrInKeyframe[] = "sps-pps-idr-in-keyframe";
const char kH264ProfileLevelConstrainedBaseline[] = "42e01f";
const char kH264ProfileLevelConstrainedHigh[] = "640c1f";
#ifndef DISABLE_H265
// RFC 7798 RTP Payload Format for H.265 video
const char kH265FmtpProfileSpace[] = "profile-space";
const char kH265FmtpProfileId[] = "profile-id";
const char kH265FmtpTierFlag[] = "tier-flag";
const char kH265FmtpLevelId[] = "level-id";
#endif

media/base/media_constants.h

RTC_EXPORT extern const char kHEVCCodecName[];
#ifndef DISABLE_H265
RTC_EXPORT extern const char kH265CodecName[];
#endif

// RFC 6184 RTP Payload Format for H.264 video
RTC_EXPORT extern const char kH264FmtpProfileLevelId[];
RTC_EXPORT extern const char kH264FmtpLevelAsymmetryAllowed[];
RTC_EXPORT extern const char kH264FmtpPacketizationMode[];
extern const char kH264FmtpSpropParameterSets[];
extern const char kH264FmtpSpsPpsIdrInKeyframe[];
extern const char kH264ProfileLevelConstrainedBaseline[];
extern const char kH264ProfileLevelConstrainedHigh[];

#ifndef DISABLE_H265
// RFC 7798 RTP Payload Format for H.265 video
RTC_EXPORT extern const char kH265FmtpProfileSpace[];
RTC_EXPORT extern const char kH265FmtpProfileId[];
RTC_EXPORT extern const char kH265FmtpTierFlag[];
RTC_EXPORT extern const char kH265FmtpLevelId[];
#endif
extern const int kDefaultVideoMaxFramerate;

最后打开H265开关

BUILD.gn

config("common_inherited_config") {
...

  if (!rtc_use_h265) {
    defines += [ "DISABLE_H265" ]
  }
}

build_overrides/build.gni

if (is_win || is_ios || is_android) {
  rtc_use_h265 = true
} else {
  rtc_use_h265 = false
}

end

漫长的修改后，WebRTC将支持H265，笔者在测试H265与H264的区别时，发现JitterBufferCache 减少140ms，果然是更复杂的算法、更高的压缩率。

参考：

owt-deps-webrtc

Android平台WebRTC开启H265编解码

http://hanniballol.github.io/2022/09/29/Android平台WebRTC开启H265编解码/

作者

8MilesRD

发布于

2022-09-29

更新于

2022-09-30

许可协议

Android平台WebRTC开启H265编解码

开启H265编解码

打开支持

封包支持

解包支持

RTP相关支持

编解码器支持

rtp信息解析配置

部分配置修改

最后打开H265开关

end

作者

发布于

更新于

许可协议

喜欢这篇文章？打赏一下作者吧

评论

链接

分类

最新文章

归档

标签

订阅更新

广告