From ffaed6432cefa5ab2ea2e6ba1095f67b1c81e451 Mon Sep 17 00:00:00 2001 From: Muhammad Haris Date: Thu, 11 Jun 2020 18:42:03 -0400 Subject: [PATCH] Created MP4 box parsers to parse data for common box types (#2648) --- build/types/core | 1 + externs/shaka/mp4_boxes.js | 95 +++++++++++++++ lib/hls/hls_parser.js | 16 +-- lib/text/mp4_vtt_parser.js | 150 ++++------------------- lib/util/mp4_box_parsers.js | 171 +++++++++++++++++++++++++++ test/media/transmuxer_integration.js | 6 +- 6 files changed, 298 insertions(+), 141 deletions(-) create mode 100644 externs/shaka/mp4_boxes.js create mode 100644 lib/util/mp4_box_parsers.js diff --git a/build/types/core b/build/types/core index c6d8a0c3dc..b14a78152c 100644 --- a/build/types/core +++ b/build/types/core @@ -77,6 +77,7 @@ +../../lib/util/map_utils.js +../../lib/util/media_ready_state_utils.js +../../lib/util/mime_utils.js ++../../lib/util/mp4_box_parsers.js +../../lib/util/mp4_parser.js +../../lib/util/multi_map.js +../../lib/util/networking.js diff --git a/externs/shaka/mp4_boxes.js b/externs/shaka/mp4_boxes.js new file mode 100644 index 0000000000..3016cfebe6 --- /dev/null +++ b/externs/shaka/mp4_boxes.js @@ -0,0 +1,95 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + + +/** + * @externs + */ + +/** + * @typedef {{ + * defaultSampleDuration: ?number + * }} + * + * @property {?number} defaultSampleDuration + * If specified via flags, this overrides the default sample + * duration in the Track Extends Box for this fragment + * + * @exportDoc + */ +shaka.extern.ParsedTFHDBox; + +/** + * @typedef {{ + * baseMediaDecodeTime: !number + * }} + * + * @property {!number} baseMediaDecodeTime + * As per the spec: the absolute decode time, measured on the media + * timeline, of the first sample in decode order in the track fragment + * + * @exportDoc + */ +shaka.extern.ParsedTFDTBox; + +/** + * @typedef {{ + * timescale: !number + * }} + * + * @property {!number} timescale + * As per the spec: an integer that specifies the timeā€scale for this media; + * this is the number of time units that pass in one second + * + * @exportDoc + */ +shaka.extern.ParsedMDHDBox; + +/** + * @typedef {{ + * defaultSampleDuration: !number + * }} + * + * @property {!number} defaultSampleDuration + * The default sample duration to be used in track fragments + * + * @exportDoc + */ +shaka.extern.ParsedTREXBox; + +/** + * @typedef {{ + * sampleCount: !number, + * sampleData: !Array. + * }} + * + * @property {!number} sampleCount + * As per the spec: the number of samples being added in this run; + * @property {!Array.} sampleData + * An array of size containing data for each sample + * + * @exportDoc + */ +shaka.extern.ParsedTRUNBox; + +/** + * @typedef {{ + * sampleDuration: ?number, + * sampleSize: ?number, + * sampleCompositionTimeOffset: ?number + * }} + * + * @property {?number} sampleDuration + * The length of the sample in timescale units. + * @property {?number} sampleSize + * The size of the sample in bytes. + * @property {?number} sampleCompositionTimeOffset + * The time since the start of the sample in timescale units. Time + * offset is based of the start of the sample. If this value is + * missing, the accumated durations preceeding this time sample will + * be used to create the start time. + */ +shaka.extern.TRUNSample; diff --git a/lib/hls/hls_parser.js b/lib/hls/hls_parser.js index 040b4fef92..7cbfe05625 100644 --- a/lib/hls/hls_parser.js +++ b/lib/hls/hls_parser.js @@ -32,6 +32,7 @@ goog.require('shaka.util.LanguageUtils'); goog.require('shaka.util.ManifestParserUtils'); goog.require('shaka.util.MimeUtils'); goog.require('shaka.util.Mp4Parser'); +goog.require('shaka.util.Mp4BoxParsers'); goog.require('shaka.util.Networking'); goog.require('shaka.util.OperationManager'); goog.require('shaka.util.Timer'); @@ -1864,12 +1865,10 @@ shaka.hls.HlsParser = class { box.version == 0 || box.version == 1, 'MDHD version can only be 0 or 1'); - // Skip "creation_time" and "modification_time". - // They are 4 bytes each if the mdhd box is version 0, 8 bytes each - // if it is version 1. - box.reader.skip(box.version == 0 ? 8 : 16); + const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD( + box.reader, box.version); - timescale = box.reader.readUint32(); + timescale = parsedMDHDBox.timescale; box.parser.stop(); }).parse(initData, /* partialOkay= */ true); @@ -1891,9 +1890,10 @@ shaka.hls.HlsParser = class { goog.asserts.assert( box.version == 0 || box.version == 1, 'TFDT version can only be 0 or 1'); - const baseTime = (box.version == 0) ? - box.reader.readUint32() : - box.reader.readUint64(); + + const parsedTFDTBox = shaka.util.Mp4BoxParsers.parseTFDT( + box.reader, box.version); + const baseTime = parsedTFDTBox.baseMediaDecodeTime; startTime = baseTime / timescale; parsedMedia = true; box.parser.stop(); diff --git a/lib/text/mp4_vtt_parser.js b/lib/text/mp4_vtt_parser.js index 8d1e8c4805..46debbdbe1 100644 --- a/lib/text/mp4_vtt_parser.js +++ b/lib/text/mp4_vtt_parser.js @@ -14,8 +14,8 @@ goog.require('shaka.text.VttTextParser'); goog.require('shaka.util.DataViewReader'); goog.require('shaka.util.Error'); goog.require('shaka.util.Functional'); -goog.require('shaka.util.Iterables'); goog.require('shaka.util.Mp4Parser'); +goog.require('shaka.util.Mp4BoxParsers'); goog.require('shaka.util.StringUtils'); goog.require('shaka.util.TextParser'); @@ -52,18 +52,10 @@ shaka.text.Mp4VttParser = class { goog.asserts.assert( box.version == 0 || box.version == 1, 'MDHD version can only be 0 or 1'); - if (box.version == 0) { - box.reader.skip(4); // Skip "creation_time". - box.reader.skip(4); // Skip "modification_time". - this.timescale_ = box.reader.readUint32(); - box.reader.skip(4); // Skip "duration". - } else { - box.reader.skip(8); // Skip "creation_time". - box.reader.skip(8); // Skip "modification_time". - this.timescale_ = box.reader.readUint32(); - box.reader.skip(8); // Skip "duration". - } - box.reader.skip(4); // Skip "pad", "language", and "pre-defined". + + const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD( + box.reader, box.version); + this.timescale_ = parsedMDHDBox.timescale; }) .box('minf', Mp4Parser.children) .box('stbl', Mp4Parser.children) @@ -106,11 +98,10 @@ shaka.text.Mp4VttParser = class { shaka.util.Error.Code.INVALID_MP4_VTT); } - const Mp4VttParser = shaka.text.Mp4VttParser; const Mp4Parser = shaka.util.Mp4Parser; let baseTime = 0; - /** @type {!Array.} */ + /** @type {!Array.} */ let presentations = []; /** @type {!Uint8Array} */ let rawPayload; @@ -130,14 +121,18 @@ shaka.text.Mp4VttParser = class { goog.asserts.assert( box.version == 0 || box.version == 1, 'TFDT version can only be 0 or 1'); - baseTime = (box.version == 0) ? box.reader.readUint32() : - box.reader.readUint64(); + + const parsedTFDTBox = shaka.util.Mp4BoxParsers.parseTFDT( + box.reader, box.version); + baseTime = parsedTFDTBox.baseMediaDecodeTime; }) .fullBox('tfhd', (box) => { goog.asserts.assert( box.flags != null, 'A TFHD box should have a valid flags value'); - defaultDuration = Mp4VttParser.parseTFHD_(box.flags, box.reader); + const parsedTFHDBox = shaka.util.Mp4BoxParsers.parseTFHD( + box.reader, box.flags); + defaultDuration = parsedTFHDBox.defaultSampleDuration; }) .fullBox('trun', (box) => { sawTRUN = true; @@ -147,8 +142,10 @@ shaka.text.Mp4VttParser = class { goog.asserts.assert( box.flags != null, 'A TRUN box should have a valid flags value'); - presentations = - Mp4VttParser.parseTRUN_(box.version, box.flags, box.reader); + + const parsedTRUNBox = shaka.util.Mp4BoxParsers.parseTRUN( + box.reader, box.version, box.flags); + presentations = parsedTRUNBox.sampleData; }) .box('mdat', Mp4Parser.allData((data) => { goog.asserts.assert( @@ -176,9 +173,9 @@ shaka.text.Mp4VttParser = class { for (const presentation of presentations) { // If one presentation corresponds to multiple payloads, it is assumed // that all of those payloads have the same start time and duration. - const duration = presentation.duration || defaultDuration; - const startTime = presentation.timeOffset ? - baseTime + presentation.timeOffset : + const duration = presentation.sampleDuration || defaultDuration; + const startTime = presentation.sampleCompositionTimeOffset ? + baseTime + presentation.sampleCompositionTimeOffset : currentTime; currentTime = startTime + (duration || 0); @@ -244,94 +241,6 @@ shaka.text.Mp4VttParser = class { cues.filter(shaka.util.Functional.isNotNull)); } - /** - * @param {number} flags - * @param {!shaka.util.DataViewReader} reader - * @return {?number} The default_sample_duration field, if present. - * @private - */ - static parseTFHD_(flags, reader) { - // Skip "track_ID". - reader.skip(4); - - // Skip "base_data_offset" if present. - if (flags & 0x000001) { - reader.skip(8); - } - - // Skip "sample_description_index" if present. - if (flags & 0x000002) { - reader.skip(4); - } - - // Read and return "default_sample_duration" if present. - if (flags & 0x000008) { - return reader.readUint32(); - } - - // There is no "default_sample_duration". - return null; - } - - /** - * @param {number} version - * @param {number} flags - * @param {!shaka.util.DataViewReader} reader - * @return {!Array.} - * @private - */ - static parseTRUN_(version, flags, reader) { - const sampleCount = reader.readUint32(); - - // Skip "data_offset" if present. - if (flags & 0x000001) { - reader.skip(4); - } - - // Skip "first_sample_flags" if present. - if (flags & 0x000004) { - reader.skip(4); - } - - const samples = []; - - for (const _ of shaka.util.Iterables.range(sampleCount)) { - shaka.util.Functional.ignored(_); - /** @type {shaka.text.Mp4VttParser.TimeSegment} */ - const sample = { - duration: null, - sampleSize: null, - timeOffset: null, - }; - - // Read "sample duration" if present. - if (flags & 0x000100) { - sample.duration = reader.readUint32(); - } - - // Read "sample_size" if present. - if (flags & 0x000200) { - sample.sampleSize = reader.readUint32(); - } - - // Skip "sample_flags" if present. - if (flags & 0x000400) { - reader.skip(4); - } - - // Read "sample_time_offset" if present. - if (flags & 0x000800) { - sample.timeOffset = version == 0 ? - reader.readUint32() : - reader.readInt32(); - } - - samples.push(sample); - } - - return samples; - } - /** * Parses a vttc box into a cue. * @@ -407,24 +316,5 @@ shaka.text.Mp4VttParser = class { } }; -/** - * @typedef {{ - * duration: ?number, - * sampleSize: ?number, - * timeOffset: ?number - * }} - * - * @property {?number} duration - * The length of the segment in timescale units. - * @property {?number} sampleSize - * The size of the segment in bytes. - * @property {?number} timeOffset - * The time since the start of the segment in timescale units. Time - * offset is based of the start of the segment. If this value is - * missing, the accumated durations preceeding this time segment will - * be used to create the start time. - */ -shaka.text.Mp4VttParser.TimeSegment; - shaka.text.TextEngine.registerParser( 'application/mp4; codecs="wvtt"', () => new shaka.text.Mp4VttParser()); diff --git a/lib/util/mp4_box_parsers.js b/lib/util/mp4_box_parsers.js new file mode 100644 index 0000000000..0eba371920 --- /dev/null +++ b/lib/util/mp4_box_parsers.js @@ -0,0 +1,171 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +goog.provide('shaka.util.Mp4BoxParsers'); + +goog.require('shaka.util.DataViewReader'); +goog.require('shaka.util.Iterables'); + +shaka.util.Mp4BoxParsers = class { + /** + * Parses a TFHD Box. + * @param {!shaka.util.DataViewReader} reader + * @param {!number} flags + * @return {!shaka.extern.ParsedTFHDBox} + */ + static parseTFHD(reader, flags) { + let defaultSampleDuration = null; + + reader.skip(4); // Skip "track_ID" + + // Skip "base_data_offset" if present. + if (flags & 0x000001) { + reader.skip(8); + } + + // Skip "sample_description_index" if present. + if (flags & 0x000002) { + reader.skip(4); + } + + // Read "default_sample_duration" if present. + if (flags & 0x000008) { + defaultSampleDuration = reader.readUint32(); + } + + /** @type {!shaka.extern.ParsedTFHDBox} */ + const parsedTFHD = { + defaultSampleDuration: defaultSampleDuration, + }; + + return parsedTFHD; + } + + /** + * Parses a TFDT Box. + * @param {!shaka.util.DataViewReader} reader + * @param {!number} version + * @return {!shaka.extern.ParsedTFDTBox} + */ + static parseTFDT(reader, version) { + const baseMediaDecodeTime = version == 1 ? + reader.readUint64() : reader.readUint32(); + + /** @type {!shaka.extern.ParsedTFDTBox} */ + const parsedTFDT = { + baseMediaDecodeTime: baseMediaDecodeTime, + }; + + return parsedTFDT; + } + + /** + * Parses a MDHD Box. + * @param {!shaka.util.DataViewReader} reader + * @param {!number} version + * @return {!shaka.extern.ParsedMDHDBox} + */ + static parseMDHD(reader, version) { + if (version == 1) { + reader.skip(8); // Skip "creation_time" + reader.skip(8); // Skip "modification_time" + } else { + reader.skip(4); // Skip "creation_time" + reader.skip(4); // Skip "modification_time" + } + + const timescale = reader.readUint32(); + + /** @type {!shaka.extern.ParsedMDHDBox} */ + const parsedMDHD = { + timescale: timescale, + }; + + return parsedMDHD; + } + + /** + * Parses a TREX Box. + * @param {!shaka.util.DataViewReader} reader + * @return {!shaka.extern.ParsedTREXBox} + */ + static parseTREX(reader) { + reader.skip(4); // Skip "track_ID" + reader.skip(4); // Skip "default_sample_description_index" + const defaultSampleDuration = reader.readUint32(); + + /** @type {!shaka.extern.ParsedTREXBox} */ + const parsedTREX = { + defaultSampleDuration: defaultSampleDuration, + }; + + return parsedTREX; + } + + /** + * Parses a TRUN Box. + * @param {!shaka.util.DataViewReader} reader + * @param {!number} version + * @param {!number} flags + * @return {!shaka.extern.ParsedTRUNBox} + */ + static parseTRUN(reader, version, flags) { + const sampleCount = reader.readUint32(); + const sampleData = []; + + // Skip "data_offset" if present. + if (flags & 0x000001) { + reader.skip(4); + } + + // Skip "first_sample_flags" if present. + if (flags & 0x000004) { + reader.skip(4); + } + + for (const _ of shaka.util.Iterables.range(sampleCount)) { + shaka.util.Functional.ignored(_); + /** @type {shaka.extern.TRUNSample} */ + const sample = { + sampleDuration: null, + sampleSize: null, + sampleCompositionTimeOffset: null, + }; + + // Read "sample duration" if present. + if (flags & 0x000100) { + sample.sampleDuration = reader.readUint32(); + } + + // Read "sample_size" if present. + if (flags & 0x000200) { + sample.sampleSize = reader.readUint32(); + } + + // Skip "sample_flags" if present. + if (flags & 0x000400) { + reader.skip(4); + } + + // Read "sample_time_offset" if present. + if (flags & 0x000800) { + sample.sampleCompositionTimeOffset = version == 0 ? + reader.readUint32() : + reader.readInt32(); + } + + sampleData.push(sample); + } + + /** @type {!shaka.extern.ParsedTRUNBox} */ + const parsedTRUN = { + sampleCount: sampleCount, + sampleData: sampleData, + }; + + return parsedTRUN; + } +}; diff --git a/test/media/transmuxer_integration.js b/test/media/transmuxer_integration.js index 10c0c7190d..fb7af1b836 100644 --- a/test/media/transmuxer_integration.js +++ b/test/media/transmuxer_integration.js @@ -154,9 +154,9 @@ describe('Transmuxer', () => { goog.asserts.assert( box.version == 0 || box.version == 1, 'TFDT version can only be 0 or 1'); - mp4Timestamp = (box.version == 0) ? - box.reader.readUint32() : - box.reader.readUint64(); + const parsedTFDTBox = shaka.util.Mp4BoxParsers.parseTFDT( + box.reader, box.version); + mp4Timestamp = parsedTFDTBox.baseMediaDecodeTime; parsed = true; }) .parse(transmuxedData.data);