Source: lib/transmuxer/h264.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.transmuxer.H264');
  7. goog.require('shaka.util.ExpGolomb');
  8. goog.require('shaka.util.Uint8ArrayUtils');
  9. /**
  10. * H.264 utils
  11. */
  12. shaka.transmuxer.H264 = class {
  13. /**
  14. * Read a sequence parameter set and return some interesting video
  15. * properties. A sequence parameter set is the H264 metadata that
  16. * describes the properties of upcoming video frames.
  17. *
  18. * @param {!Array.<shaka.extern.VideoNalu>} nalus
  19. * @return {?{height: number, width: number, videoConfig: !Uint8Array,
  20. * hSpacing: number, vSpacing: number}}
  21. */
  22. static parseInfo(nalus) {
  23. const H264 = shaka.transmuxer.H264;
  24. if (!nalus.length) {
  25. return null;
  26. }
  27. const spsNalu = nalus.find((nalu) => {
  28. return nalu.type == H264.NALU_TYPE_SPS_;
  29. });
  30. const ppsNalu = nalus.find((nalu) => {
  31. return nalu.type == H264.NALU_TYPE_PPS_;
  32. });
  33. if (!spsNalu || !ppsNalu) {
  34. return null;
  35. }
  36. const expGolombDecoder = new shaka.util.ExpGolomb(spsNalu.data);
  37. // profile_idc
  38. const profileIdc = expGolombDecoder.readUnsignedByte();
  39. // constraint_set[0-5]_flag
  40. expGolombDecoder.readUnsignedByte();
  41. // level_idc u(8)
  42. expGolombDecoder.readUnsignedByte();
  43. // seq_parameter_set_id
  44. expGolombDecoder.skipExpGolomb();
  45. // some profiles have more optional data we don't need
  46. if (H264.PROFILES_WITH_OPTIONAL_SPS_DATA_.includes(profileIdc)) {
  47. const chromaFormatIdc = expGolombDecoder.readUnsignedExpGolomb();
  48. if (chromaFormatIdc === 3) {
  49. // separate_colour_plane_flag
  50. expGolombDecoder.skipBits(1);
  51. }
  52. // bit_depth_luma_minus8
  53. expGolombDecoder.skipExpGolomb();
  54. // bit_depth_chroma_minus8
  55. expGolombDecoder.skipExpGolomb();
  56. // qpprime_y_zero_transform_bypass_flag
  57. expGolombDecoder.skipBits(1);
  58. // seq_scaling_matrix_present_flag
  59. if (expGolombDecoder.readBoolean()) {
  60. const scalingListCount = (chromaFormatIdc !== 3) ? 8 : 12;
  61. for (let i = 0; i < scalingListCount; i++) {
  62. // seq_scaling_list_present_flag[ i ]
  63. if (expGolombDecoder.readBoolean()) {
  64. if (i < 6) {
  65. expGolombDecoder.skipScalingList(16);
  66. } else {
  67. expGolombDecoder.skipScalingList(64);
  68. }
  69. }
  70. }
  71. }
  72. }
  73. // log2_max_frame_num_minus4
  74. expGolombDecoder.skipExpGolomb();
  75. const picOrderCntType = expGolombDecoder.readUnsignedExpGolomb();
  76. if (picOrderCntType === 0) {
  77. // log2_max_pic_order_cnt_lsb_minus4
  78. expGolombDecoder.readUnsignedExpGolomb();
  79. } else if (picOrderCntType === 1) {
  80. // delta_pic_order_always_zero_flag
  81. expGolombDecoder.skipBits(1);
  82. // offset_for_non_ref_pic
  83. expGolombDecoder.skipExpGolomb();
  84. // offset_for_top_to_bottom_field
  85. expGolombDecoder.skipExpGolomb();
  86. const numRefFramesInPicOrderCntCycle =
  87. expGolombDecoder.readUnsignedExpGolomb();
  88. for (let i = 0; i < numRefFramesInPicOrderCntCycle; i++) {
  89. // offset_for_ref_frame[ i ]
  90. expGolombDecoder.skipExpGolomb();
  91. }
  92. }
  93. // max_num_ref_frames
  94. expGolombDecoder.skipExpGolomb();
  95. // gaps_in_frame_num_value_allowed_flag
  96. expGolombDecoder.skipBits(1);
  97. const picWidthInMbsMinus1 =
  98. expGolombDecoder.readUnsignedExpGolomb();
  99. const picHeightInMapUnitsMinus1 =
  100. expGolombDecoder.readUnsignedExpGolomb();
  101. const frameMbsOnlyFlag = expGolombDecoder.readBits(1);
  102. if (frameMbsOnlyFlag === 0) {
  103. // mb_adaptive_frame_field_flag
  104. expGolombDecoder.skipBits(1);
  105. }
  106. // direct_8x8_inference_flag
  107. expGolombDecoder.skipBits(1);
  108. let frameCropLeftOffset = 0;
  109. let frameCropRightOffset = 0;
  110. let frameCropTopOffset = 0;
  111. let frameCropBottomOffset = 0;
  112. // frame_cropping_flag
  113. if (expGolombDecoder.readBoolean()) {
  114. frameCropLeftOffset = expGolombDecoder.readUnsignedExpGolomb();
  115. frameCropRightOffset = expGolombDecoder.readUnsignedExpGolomb();
  116. frameCropTopOffset = expGolombDecoder.readUnsignedExpGolomb();
  117. frameCropBottomOffset = expGolombDecoder.readUnsignedExpGolomb();
  118. }
  119. let hSpacing = 1;
  120. let vSpacing = 1;
  121. // vui_parameters_present_flag
  122. if (expGolombDecoder.readBoolean()) {
  123. // aspect_ratio_info_present_flag
  124. if (expGolombDecoder.readBoolean()) {
  125. const aspectRatioIdc = expGolombDecoder.readUnsignedByte();
  126. const hSpacingTable = [
  127. 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2,
  128. ];
  129. const vSpacingTable = [
  130. 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99, 3, 2, 1,
  131. ];
  132. if (aspectRatioIdc > 0 && aspectRatioIdc <= 16) {
  133. hSpacing = hSpacingTable[aspectRatioIdc - 1];
  134. vSpacing = vSpacingTable[aspectRatioIdc - 1];
  135. } else if (aspectRatioIdc === 255) {
  136. hSpacing = expGolombDecoder.readBits(16);
  137. vSpacing = expGolombDecoder.readBits(16);
  138. }
  139. }
  140. }
  141. const height = ((2 - frameMbsOnlyFlag) *
  142. (picHeightInMapUnitsMinus1 + 1) * 16) - (frameCropTopOffset * 2) -
  143. (frameCropBottomOffset * 2);
  144. const width = ((picWidthInMbsMinus1 + 1) * 16) -
  145. frameCropLeftOffset * 2 - frameCropRightOffset * 2;
  146. // assemble the SPSs
  147. let sps = [];
  148. const spsData = spsNalu.fullData;
  149. sps.push((spsData.byteLength >>> 8) & 0xff);
  150. sps.push(spsData.byteLength & 0xff);
  151. sps = sps.concat(...spsData);
  152. // assemble the PPSs
  153. let pps = [];
  154. const ppsData = ppsNalu.fullData;
  155. pps.push((ppsData.byteLength >>> 8) & 0xff);
  156. pps.push(ppsData.byteLength & 0xff);
  157. pps = pps.concat(...ppsData);
  158. const videoConfig = new Uint8Array(
  159. [
  160. 0x01, // version
  161. sps[3], // profile
  162. sps[4], // profile compat
  163. sps[5], // level
  164. 0xfc | 3, // lengthSizeMinusOne, hard-coded to 4 bytes
  165. 0xe0 | 1, // 3bit reserved (111) + numOfSequenceParameterSets
  166. ].concat(sps).concat([
  167. 1, // numOfPictureParameterSets
  168. ]).concat(pps));
  169. return {
  170. height,
  171. width,
  172. videoConfig,
  173. hSpacing,
  174. vSpacing,
  175. };
  176. }
  177. /**
  178. * @param {!Array.<shaka.extern.MPEG_PES>} videoData
  179. * @return {!Array.<shaka.extern.VideoSample>}
  180. */
  181. static getVideoSamples(videoData) {
  182. const H264 = shaka.transmuxer.H264;
  183. /** @type {!Array.<shaka.extern.VideoSample>} */
  184. const videoSamples = [];
  185. /** @type {?shaka.extern.VideoSample} */
  186. let lastVideoSample = null;
  187. /** @type {boolean} */
  188. let audFound = false;
  189. const addLastVideoSample = () => {
  190. if (!lastVideoSample) {
  191. return;
  192. }
  193. if (!lastVideoSample.nalus.length || !lastVideoSample.frame) {
  194. return;
  195. }
  196. const nalusData = [];
  197. for (const nalu of lastVideoSample.nalus) {
  198. const size = nalu.fullData.byteLength;
  199. const naluLength = new Uint8Array(4);
  200. naluLength[0] = (size >> 24) & 0xff;
  201. naluLength[1] = (size >> 16) & 0xff;
  202. naluLength[2] = (size >> 8) & 0xff;
  203. naluLength[3] = size & 0xff;
  204. nalusData.push(naluLength);
  205. nalusData.push(nalu.fullData);
  206. }
  207. lastVideoSample.data = shaka.util.Uint8ArrayUtils.concat(...nalusData);
  208. videoSamples.push(lastVideoSample);
  209. };
  210. const createLastVideoSample = (pes) => {
  211. lastVideoSample = {
  212. data: new Uint8Array([]),
  213. frame: false,
  214. isKeyframe: false,
  215. pts: pes.pts,
  216. dts: pes.dts,
  217. nalus: [],
  218. };
  219. };
  220. for (let i = 0; i < videoData.length; i++) {
  221. const pes = videoData[i];
  222. const nalus = pes.nalus;
  223. let spsFound = false;
  224. // If new NAL units found and last sample still there, let's push ...
  225. // This helps parsing streams with missing AUD
  226. // (only do this if AUD never found)
  227. if (lastVideoSample && nalus.length && !audFound) {
  228. addLastVideoSample();
  229. createLastVideoSample(pes);
  230. }
  231. for (const nalu of pes.nalus) {
  232. let push = false;
  233. switch (nalu.type) {
  234. case H264.NALU_TYPE_NDR_: {
  235. let isKeyframe = false;
  236. push = true;
  237. const data = nalu.data;
  238. // Only check slice type to detect KF in case SPS found in same
  239. // packet (any keyframe is preceded by SPS ...)
  240. if (spsFound && data.length > 4) {
  241. // retrieve slice type by parsing beginning of NAL unit (follow
  242. // H264 spec,slice_header definition) to detect keyframe embedded
  243. // in NDR
  244. const sliceType = new shaka.util.ExpGolomb(data).readSliceType();
  245. // 2 : I slice, 4 : SI slice, 7 : I slice, 9: SI slice
  246. // SI slice : A slice that is coded using intra prediction only
  247. // and using quantisation of the prediction samples.
  248. // An SI slice can be coded such that its decoded samples can be
  249. // constructed identically to an SP slice.
  250. // I slice: A slice that is not an SI slice that is decoded using
  251. // intra prediction only.
  252. if (sliceType === 2 || sliceType === 4 ||
  253. sliceType === 7 || sliceType === 9) {
  254. isKeyframe = true;
  255. }
  256. }
  257. if (isKeyframe) {
  258. // If we have non-keyframe data already, that cannot belong to
  259. // the same frame as a keyframe, so force a push
  260. if (lastVideoSample &&
  261. lastVideoSample.frame && !lastVideoSample.isKeyframe) {
  262. addLastVideoSample();
  263. lastVideoSample = null;
  264. }
  265. }
  266. if (!lastVideoSample) {
  267. createLastVideoSample(pes);
  268. }
  269. lastVideoSample.frame = true;
  270. lastVideoSample.isKeyframe = isKeyframe;
  271. break;
  272. }
  273. case H264.NALU_TYPE_IDR_: {
  274. push = true;
  275. // Handle PES not starting with AUD
  276. // If we have frame data already, that cannot belong to the same
  277. // frame, so force a push
  278. if (lastVideoSample &&
  279. lastVideoSample.frame && !lastVideoSample.isKeyframe) {
  280. addLastVideoSample();
  281. lastVideoSample = null;
  282. }
  283. if (!lastVideoSample) {
  284. createLastVideoSample(pes);
  285. }
  286. lastVideoSample.frame = true;
  287. lastVideoSample.isKeyframe = true;
  288. break;
  289. }
  290. case H264.NALU_TYPE_SEI_:
  291. push = true;
  292. break;
  293. case H264.NALU_TYPE_SPS_:
  294. push = true;
  295. spsFound = true;
  296. break;
  297. case H264.NALU_TYPE_PPS_:
  298. push = true;
  299. break;
  300. case H264.NALU_TYPE_AUD_:
  301. push = true;
  302. audFound = true;
  303. if (lastVideoSample && lastVideoSample.frame) {
  304. addLastVideoSample();
  305. lastVideoSample = null;
  306. }
  307. if (!lastVideoSample) {
  308. createLastVideoSample(pes);
  309. }
  310. break;
  311. case H264.NALU_TYPE_FILLER_DATA_:
  312. push = true;
  313. break;
  314. default:
  315. push = false;
  316. break;
  317. }
  318. if (lastVideoSample && push) {
  319. lastVideoSample.nalus.push(nalu);
  320. }
  321. }
  322. }
  323. // If last PES packet, push samples
  324. addLastVideoSample();
  325. return videoSamples;
  326. }
  327. };
  328. /**
  329. * NALU type for NDR for H.264.
  330. * @const {number}
  331. * @private
  332. */
  333. shaka.transmuxer.H264.NALU_TYPE_NDR_ = 0x01;
  334. /**
  335. * NALU type for Instantaneous Decoder Refresh (IDR) for H.264.
  336. * @const {number}
  337. * @private
  338. */
  339. shaka.transmuxer.H264.NALU_TYPE_IDR_ = 0x05;
  340. /**
  341. * NALU type for Supplemental Enhancement Information (SEI) for H.264.
  342. * @const {number}
  343. * @private
  344. */
  345. shaka.transmuxer.H264.NALU_TYPE_SEI_ = 0x06;
  346. /**
  347. * NALU type for Sequence Parameter Set (SPS) for H.264.
  348. * @const {number}
  349. * @private
  350. */
  351. shaka.transmuxer.H264.NALU_TYPE_SPS_ = 0x07;
  352. /**
  353. * NALU type for Picture Parameter Set (PPS) for H.264.
  354. * @const {number}
  355. * @private
  356. */
  357. shaka.transmuxer.H264.NALU_TYPE_PPS_ = 0x08;
  358. /**
  359. * NALU type for Access Unit Delimiter (AUD) for H.264.
  360. * @const {number}
  361. * @private
  362. */
  363. shaka.transmuxer.H264.NALU_TYPE_AUD_ = 0x09;
  364. /**
  365. * NALU type for Filler Data for H.264.
  366. * @const {number}
  367. * @private
  368. */
  369. shaka.transmuxer.H264.NALU_TYPE_FILLER_DATA_ = 0x0c;
  370. /**
  371. * Values of profile_idc that indicate additional fields are included in the
  372. * SPS.
  373. * see Recommendation ITU-T H.264 (4/2013)
  374. * 7.3.2.1.1 Sequence parameter set data syntax
  375. *
  376. * @const {!Array.<number>}
  377. * @private
  378. */
  379. shaka.transmuxer.H264.PROFILES_WITH_OPTIONAL_SPS_DATA_ =
  380. [100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134];