diff --git a/build/types/cea b/build/types/cea new file mode 100644 index 0000000000..6d0d0eb326 --- /dev/null +++ b/build/types/cea @@ -0,0 +1,5 @@ +# Inband closed caption support. + ++../../lib/cea/mp4_cea_parser.js ++../../lib/cea/i_cea_parser.js ++../../lib/cea/sei_processor.js diff --git a/build/types/core b/build/types/core index b14a78152c..940b93d0cb 100644 --- a/build/types/core +++ b/build/types/core @@ -100,3 +100,5 @@ +../../third_party/closure-uri/uri.js +../../third_party/closure-uri/utils.js + ++@cea diff --git a/lib/cea/i_cea_parser.js b/lib/cea/i_cea_parser.js new file mode 100644 index 0000000000..4a29affe4a --- /dev/null +++ b/lib/cea/i_cea_parser.js @@ -0,0 +1,56 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +goog.provide('shaka.cea.ICeaParser'); + +/** + * Interface for parsing inband closed caption data from MP4 streams. + * @interface + */ +shaka.cea.ICeaParser = class { + /** + * Initializes the parser with init segment data. + * @param {!BufferSource} initSegment init segment to parse. + */ + init(initSegment) {} + + /** + * Parses the stream and extracts closed captions packets. + * @param {!BufferSource} mediaSegment media segment to parse. + * @return {!Array} + */ + parse(mediaSegment) {} +}; + +/** + * NALU type for Supplemental Enhancement Information (SEI). + * @const {number} + */ +shaka.cea.ICeaParser.NALU_TYPE_SEI = 0x06; + +/** + * Default timescale value for a track. + */ +shaka.cea.ICeaParser.DEFAULT_TIMESCALE_VALUE = 90000; + +/** + * @typedef {{ + * packet: !Uint8Array, + * pts: !number + * }} + * + * @description Parsed Caption Packet. + * @property {!Uint8Array} packet + * Caption packet. More specifically, it contains a "User data + * registered by Recommendation ITU-T T.35 SEI message", from section D.1.6 + * and section D.2.6 of Rec. ITU-T H.264 (06/2019). + * @property {!number} pts + * The presentation timestamp (pts) at which the ITU-T T.35 data shows up, + * in seconds. + * @exportDoc + */ +shaka.cea.ICeaParser.CaptionPacket; + diff --git a/lib/cea/mp4_cea_parser.js b/lib/cea/mp4_cea_parser.js new file mode 100644 index 0000000000..42e2cdfd43 --- /dev/null +++ b/lib/cea/mp4_cea_parser.js @@ -0,0 +1,258 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +goog.provide('shaka.cea.Mp4CeaParser'); + +goog.require('goog.asserts'); +goog.require('shaka.cea.ICeaParser'); +goog.require('shaka.cea.SeiProcessor'); +goog.require('shaka.util.Mp4Parser'); +goog.require('shaka.util.Mp4BoxParsers'); +goog.require('shaka.util.DataViewReader'); + +/** + * MPEG4 stream parser used for extracting 708 closed captions data. + * @implements {shaka.cea.ICeaParser} + */ +shaka.cea.Mp4CeaParser = class { + constructor() { + /** + * SEI data processor. + * @private + * @const {!shaka.cea.SeiProcessor} + */ + this.seiProcessor_ = new shaka.cea.SeiProcessor(); + + /** + * Map of track id to corresponding timescale. + * @private {!Map} + */ + this.trackIdToTimescale_ = new Map(); + + /** + * Default sample duration, as specified by the TREX box. + * @private {!number} + */ + this.defaultSampleDuration_ = 0; + + /** + * Default sample size, as specified by the TREX box. + * @private {!number} + */ + this.defaultSampleSize_ = 0; + } + + /** + * Parses the init segment. Gets Default Sample Duration and Size from the + * TREX box, and constructs a map of Track IDs to timescales. Each TRAK box + * contains a track header (TKHD) containing track ID, and a media header box + * (MDHD) containing the timescale for the track + * @override + */ + init(initSegment) { + const Mp4Parser = shaka.util.Mp4Parser; + const trackIds = []; + const timescales = []; + + new Mp4Parser() + .box('moov', Mp4Parser.children) + .box('mvex', Mp4Parser.children) + .fullBox('trex', (box) => { + const parsedTREXBox = shaka.util.Mp4BoxParsers.parseTREX( + box.reader); + + this.defaultSampleDuration_ = parsedTREXBox.defaultSampleDuration; + this.defaultSampleSize_ = parsedTREXBox.defaultSampleSize; + }) + .box('trak', Mp4Parser.children) + .fullBox('tkhd', (box) => { + goog.asserts.assert( + box.version != null, + 'TKHD is a full box and should have a valid version.'); + const parsedTKHDBox = shaka.util.Mp4BoxParsers.parseTKHD( + box.reader, box.version); + trackIds.push(parsedTKHDBox.trackId); + }) + .box('mdia', Mp4Parser.children) + .fullBox('mdhd', (box) => { + goog.asserts.assert( + box.version != null, + 'MDHD is a full box and should have a valid version.'); + const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD( + box.reader, box.version); + timescales.push(parsedMDHDBox.timescale); + }) + .parse(initSegment, /* partialOkay= */ true); + + // At least one track should exist, and each track should have a + // corresponding Id in TKHD box, and timescale in its MDHD box + if (!trackIds.length|| !timescales.length || + trackIds.length != timescales.length) { + throw new shaka.util.Error( + shaka.util.Error.Severity.CRITICAL, + shaka.util.Error.Category.TEXT, + shaka.util.Error.Code.INVALID_MP4_CEA); + } + + // Populate the map from track Id to timescale + trackIds.forEach((trackId, idx) => { + this.trackIdToTimescale_.set(trackId, timescales[idx]); + }); + } + + /** + * Parses each video segment. In fragmented MP4s, MOOF and MDAT come in + * pairs. The following logic gets the necessary info from MOOFs to parse + * MDATs (base media decode time, sample sizes/offsets/durations, etc), + * and then parses the MDAT boxes for CEA-708 packets using this information. + * CEA-708 packets are returned in the callback. + * @override + */ + parse(mediaSegment) { + const Mp4Parser = shaka.util.Mp4Parser; + + /** @type {!Array} **/ + const captionPackets = []; + + // Fields that are found in MOOF boxes + let defaultSampleDuration = this.defaultSampleDuration_; + let defaultSampleSize = this.defaultSampleSize_; + let sampleData = []; + let baseMediaDecodeTime = null; + let timescale = shaka.cea.ICeaParser.DEFAULT_TIMESCALE_VALUE; + + new Mp4Parser() + .box('moof', Mp4Parser.children) + .box('traf', Mp4Parser.children) + .fullBox('trun', (box) => { + goog.asserts.assert( + box.version != null && box.flags!=null, + 'TRUN is a full box and should have a valid version & flags.'); + + const parsedTRUN = shaka.util.Mp4BoxParsers.parseTRUN( + box.reader, box.version, box.flags); + + sampleData = parsedTRUN.sampleData; + }) + + .fullBox('tfhd', (box) => { + goog.asserts.assert( + box.flags != null, + 'TFHD is a full box and should have valid flags.'); + + const parsedTFHD = shaka.util.Mp4BoxParsers.parseTFHD( + box.reader, box.flags); + + // If specified, defaultSampleDuration and defaultSampleSize + // override the ones specified in the TREX box + defaultSampleDuration = parsedTFHD.defaultSampleDuration + || this.defaultSampleDuration_; + + defaultSampleSize = parsedTFHD.defaultSampleSize + || this.defaultSampleSize_; + + const trackId = parsedTFHD.trackId; + + // Get the timescale from the track Id + if (this.trackIdToTimescale_.has(trackId)) { + timescale = this.trackIdToTimescale_.get(trackId); + } + }) + + .fullBox('tfdt', (box) => { + goog.asserts.assert( + box.version != null, + 'TFDT is a full box and should have a valid version.'); + + const parsedTFDT = shaka.util.Mp4BoxParsers.parseTFDT( + box.reader, box.version); + + baseMediaDecodeTime = parsedTFDT.baseMediaDecodeTime; + }) + .box('mdat', (box) => { + if (baseMediaDecodeTime === null) { + // This field should have been populated by + // the Base Media Decode time in the TFDT box + throw new shaka.util.Error( + shaka.util.Error.Severity.CRITICAL, + shaka.util.Error.Category.TEXT, + shaka.util.Error.Code.INVALID_MP4_CEA); + } + this.parseMdat_(box.reader, baseMediaDecodeTime, timescale, + defaultSampleDuration, defaultSampleSize, sampleData, + captionPackets); + }) + .parse(mediaSegment, /* partialOkay= */ false); + + return captionPackets; + } + + /** + * Parse MDAT box. + * @param {!shaka.util.DataViewReader} reader + * @param {!number} time + * @param {!number} timescale + * @param {!number} defaultSampleDuration + * @param {!number} defaultSampleSize + * @param {!Array} sampleData + * @param {!Array} captionPackets + * @private + */ + parseMdat_(reader, time, timescale, defaultSampleDuration, + defaultSampleSize, sampleData, captionPackets) { + let sampleIndex = 0; + + // The fields in each ParsedTRUNSample contained in the sampleData + // array are nullable. In the case of sample data and sample duration, + // we use the defaults provided by the TREX/TFHD boxes. For sample + // composition time offset, we default to 0. + let sampleSize = defaultSampleSize; + + if (sampleData.length) { + sampleSize = sampleData[0].sampleSize || defaultSampleSize; + } + + while (reader.hasMoreData()) { + const naluSize = reader.readUint32(); + const naluType = reader.readUint8() & 0x1F; + if (naluType == shaka.cea.ICeaParser.NALU_TYPE_SEI) { + let timeOffset = 0; + + if (sampleData.length > sampleIndex) { + timeOffset = sampleData[sampleIndex].sampleCompositionTimeOffset || 0; + } + + const pts = (time + timeOffset)/timescale; + for (const packet of this.seiProcessor_ + .process(reader.readBytes(naluSize - 1))) { + captionPackets.push({ + packet, + pts, + }); + } + } else { + reader.skip(naluSize - 1); + } + sampleSize -= (naluSize + 4); + if (sampleSize == 0) { + if (sampleData.length > sampleIndex) { + time += sampleData[sampleIndex].sampleDuration || + defaultSampleDuration; + } else { + time += defaultSampleDuration; + } + + sampleIndex++; + + if (sampleData.length > sampleIndex) { + sampleSize = sampleData[sampleIndex].sampleSize || defaultSampleSize; + } else { + sampleSize = defaultSampleSize; + } + } + } + } +}; diff --git a/lib/cea/sei_processor.js b/lib/cea/sei_processor.js new file mode 100644 index 0000000000..6fc0e8e839 --- /dev/null +++ b/lib/cea/sei_processor.js @@ -0,0 +1,77 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +goog.provide('shaka.cea.SeiProcessor'); + + +/** + * H.264 SEI NALU Parser used for extracting 708 closed caption packets. + */ +shaka.cea.SeiProcessor = class { + /** + * Processes supplemental enhancement information data. + * @param {!Uint8Array} naluData NALU from which SEI data is to be processed. + * @return {!Iterable.} + */ + * process(naluData) { + const emuCount = this.removeEmu_(naluData); + + // The following is an implementation of section 7.3.2.3.1 + // in Rec. ITU-T H.264 (06/2019), the H.264 spec. + let offset = 0; + + while (offset + emuCount < naluData.length) { + let payloadType = 0; // SEI payload type as defined by H.264 spec + while (naluData[offset] == 0xFF) { + payloadType += 255; + offset++; + } + payloadType += naluData[offset++]; + + let payloadSize = 0; // SEI payload size as defined by H.264 spec + while (naluData[offset] == 0xFF) { + payloadSize += 255; + offset++; + } + payloadSize += naluData[offset++]; + + // Payload type 4 is user_data_registered_itu_t_t35, as per the H.264 + // spec. This payload type contains caption data. + if (payloadType == 0x04) { + yield naluData.subarray(offset, offset + payloadSize); + } + offset += payloadSize; + } + } + + + /** + * Removes H.264 emulation prevention bytes from the byte array. + * @param {!Uint8Array} naluData NALU from which EMUs should be removed. + * @return {number} The number of removed emulation prevention bytes. + * @private + */ + removeEmu_(naluData) { + let zeroCount = 0; + let src = 0; + let dst = 0; + while (src < naluData.length) { + if (zeroCount == 2 && naluData[src] == 0x03) { + zeroCount = 0; + } else { + if (naluData[src] == 0x00) { + zeroCount++; + } else { + zeroCount = 0; + } + naluData[dst] = naluData[src]; + dst++; + } + src++; + } + return (src - dst); + } +}; diff --git a/lib/media/closed_caption_parser.js b/lib/media/closed_caption_parser.js index 1f04024d61..1ad5496af2 100644 --- a/lib/media/closed_caption_parser.js +++ b/lib/media/closed_caption_parser.js @@ -7,6 +7,7 @@ goog.provide('shaka.media.IClosedCaptionParser'); goog.provide('shaka.media.MuxJSClosedCaptionParser'); goog.provide('shaka.media.NoopCaptionParser'); +goog.provide('shaka.media.ClosedCaptionParser'); goog.require('shaka.util.BufferUtils'); @@ -131,3 +132,43 @@ shaka.media.NoopCaptionParser = class { */ reset() {} }; + +/** Closed Caption Parser provides all operations + * for parsing the closed captions + * embedded in Dash videos streams. + * + * @implements {shaka.media.IClosedCaptionParser} + * @final + */ +shaka.media.ClosedCaptionParser = class { + constructor() { + /** + * MP4 Parser for MDAT, TREX, TRUN, and MDHD boxes + * @private {!shaka.cea.ICeaParser} + */ + this.ceaParser_ = new shaka.cea.Mp4CeaParser(); + } + + /** + * @override + */ + init(data) { + this.ceaParser_.init(data); + } + + /** + * @override + */ + parseFrom(data, onCaptions) { + const captionPackets = this.ceaParser_.parse(data); + shaka.util.Functional.ignored(captionPackets); + // Todo: This is where the parsed data will be passed + // to the decoder to decode + } + + /** + * @override + */ + reset() { + } +}; diff --git a/lib/util/error.js b/lib/util/error.js index b89c90cfa8..51c04f861b 100644 --- a/lib/util/error.js +++ b/lib/util/error.js @@ -319,6 +319,10 @@ shaka.util.Error.Code = { */ 'UNABLE_TO_EXTRACT_CUE_START_TIME': 2009, + /** + * MP4 segment for CEA data is invalid. + */ + 'INVALID_MP4_CEA': 2010, /** * Some component tried to read past the end of a buffer. The segment index, diff --git a/lib/util/mp4_box_parsers.js b/lib/util/mp4_box_parsers.js index a7148b5cab..8c440ff11b 100644 --- a/lib/util/mp4_box_parsers.js +++ b/lib/util/mp4_box_parsers.js @@ -18,8 +18,9 @@ shaka.util.Mp4BoxParsers = class { */ static parseTFHD(reader, flags) { let defaultSampleDuration = null; + let defaultSampleSize = null; - reader.skip(4); // Skip "track_ID" + const trackId = reader.readUint32(); // Read "track_ID" // Skip "base_data_offset" if present. if (flags & 0x000001) { @@ -36,8 +37,15 @@ shaka.util.Mp4BoxParsers = class { defaultSampleDuration = reader.readUint32(); } + // Read "default_sample_size" if present. + if (flags & 0x000010) { + defaultSampleSize = reader.readUint32(); + } + return { + trackId, defaultSampleDuration, + defaultSampleSize, }; } @@ -87,9 +95,11 @@ shaka.util.Mp4BoxParsers = class { reader.skip(4); // Skip "track_ID" reader.skip(4); // Skip "default_sample_description_index" const defaultSampleDuration = reader.readUint32(); + const defaultSampleSize = reader.readUint32(); return { defaultSampleDuration, + defaultSampleSize, }; } @@ -181,12 +191,20 @@ shaka.util.Mp4BoxParsers = class { /** * @typedef {{ - * defaultSampleDuration: ?number + * trackId: !number, + * defaultSampleDuration: ?number, + * defaultSampleSize: ?number * }} * + * @property {!number} trackId + * As per the spec: an integer that uniquely identifies this + * track over the entire life‐time of this presentation * @property {?number} defaultSampleDuration * If specified via flags, this overrides the default sample * duration in the Track Extends Box for this fragment + * @property {?number} defaultSampleSize + * If specified via flags, this overrides the default sample + * size in the Track Extends Box for this fragment * * @exportDoc */ @@ -198,8 +216,8 @@ shaka.util.ParsedTFHDBox; * }} * * @property {!number} baseMediaDecodeTime - * As per the spec: the absolute decode time, measured on the media - * timeline, of the first sample in decode order in the track fragment + * As per the spec: the absolute decode time, measured on the media + * timeline, of the first sample in decode order in the track fragment * * @exportDoc */ @@ -211,8 +229,8 @@ shaka.util.ParsedTFDTBox; * }} * * @property {!number} timescale - * As per the spec: an integer that specifies the time‐scale for this media; - * this is the number of time units that pass in one second + * As per the spec: an integer that specifies the time‐scale for this media; + * this is the number of time units that pass in one second * * @exportDoc */ @@ -220,11 +238,14 @@ shaka.util.ParsedMDHDBox; /** * @typedef {{ - * defaultSampleDuration: !number + * defaultSampleDuration: !number, + * defaultSampleSize: !number * }} * * @property {!number} defaultSampleDuration - * The default sample duration to be used in track fragments + * The default sample duration to be used in track fragments + * @property {!number} defaultSampleSize + * The default sample size to be used in track fragments * * @exportDoc */ @@ -237,9 +258,9 @@ shaka.util.ParsedTREXBox; * }} * * @property {!number} sampleCount - * As per the spec: the number of samples being added in this run; + * As per the spec: the number of samples being added in this run; * @property {!Array.} sampleData - * An array of size containing data for each sample + * An array of size containing data for each sample * * @exportDoc */ @@ -253,14 +274,14 @@ shaka.util.ParsedTRUNBox; * }} * * @property {?number} sampleDuration - * The length of the sample in timescale units. + * The length of the sample in timescale units. * @property {?number} sampleSize - * The size of the sample in bytes. + * The size of the sample in bytes. * @property {?number} sampleCompositionTimeOffset - * The time since the start of the sample in timescale units. Time - * offset is based of the start of the sample. If this value is - * missing, the accumated durations preceeding this time sample will - * be used to create the start time. + * The time since the start of the sample in timescale units. Time + * offset is based of the start of the sample. If this value is + * missing, the accumulated durations preceeding this time sample will + * be used to create the start time. * * @exportDoc */ @@ -272,7 +293,7 @@ shaka.util.ParsedTRUNSample; * }} * * @property {!number} trackId - * Unique ID indicative of this track + * Unique ID indicative of this track * * @exportDoc */ diff --git a/shaka-player.uncompiled.js b/shaka-player.uncompiled.js index 718b8f21ea..ce09e88e4b 100644 --- a/shaka-player.uncompiled.js +++ b/shaka-player.uncompiled.js @@ -15,6 +15,7 @@ goog.require('shaka.abr.SimpleAbrManager'); goog.require('shaka.ads.AdManager'); goog.require('shaka.cast.CastProxy'); goog.require('shaka.cast.CastReceiver'); +goog.require('shaka.cea.Mp4CeaParser'); goog.require('shaka.dash.DashParser'); goog.require('shaka.hls.HlsParser'); goog.require('shaka.log'); diff --git a/test/cea/mp4_cea_parser_unit.js b/test/cea/mp4_cea_parser_unit.js new file mode 100644 index 0000000000..de18fd68f0 --- /dev/null +++ b/test/cea/mp4_cea_parser_unit.js @@ -0,0 +1,58 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +describe('Mp4CeaParser', () => { + const ceaInitSegmentUri = '/base/test/test/assets/cea-init.mp4'; + const ceaSegmentUri = '/base/test/test/assets/cea-segment.mp4'; + + /** @type {!ArrayBuffer} */ + let ceaInitSegment; + /** @type {!ArrayBuffer} */ + let ceaSegment; + + beforeAll(async () => { + const responses = await Promise.all([ + shaka.test.Util.fetch(ceaInitSegmentUri), + shaka.test.Util.fetch(ceaSegmentUri), + ]); + ceaInitSegment = responses[0]; + ceaSegment = responses[1]; + }); + + it('parses cea data from mp4 stream', () => { + const cea708Parser = new shaka.cea.Mp4CeaParser(); + + const expectedCea708Packet = new Uint8Array([ + 0xb5, 0x00, 0x31, 0x47, 0x41, 0x39, 0x34, 0x03, + 0xce, 0xff, 0xfd, 0x94, 0x20, 0xfd, 0x94, 0xae, + 0xfd, 0x91, 0x62, 0xfd, 0x73, 0xf7, 0xfd, 0xe5, + 0xba, 0xfd, 0x91, 0xb9, 0xfd, 0xb0, 0xb0, 0xfd, + 0xba, 0xb0, 0xfd, 0xb0, 0xba, 0xfd, 0xb0, 0x31, + 0xfd, 0xba, 0xb0, 0xfd, 0xb0, 0x80, 0xfd, 0x94, + 0x2c, 0xfd, 0x94, 0x2f, 0xff, + ]); + + cea708Parser.init(ceaInitSegment); + const cea708Packets = cea708Parser.parse(ceaSegment); + expect(cea708Packets).toBeDefined(); + expect(cea708Packets.length).toBe(4); + expect(cea708Packets[cea708Packets.length - 1].packet) + .toEqual(expectedCea708Packet); + }); + + it('parses an invalid init segment', () => { + const cea708Parser = new shaka.cea.Mp4CeaParser(); + + const expected = Util.jasmineError(new shaka.util.Error( + shaka.util.Error.Severity.CRITICAL, + shaka.util.Error.Category.TEXT, + shaka.util.Error.Code.INVALID_MP4_CEA)); + + expect(() => { + cea708Parser.init(ceaSegment); + }).toThrow(expected); + }); +}); diff --git a/test/util/mp4_box_parsers_unit.js b/test/util/mp4_box_parsers_unit.js index 308d495791..f0b46701f4 100644 --- a/test/util/mp4_box_parsers_unit.js +++ b/test/util/mp4_box_parsers_unit.js @@ -1,4 +1,5 @@ -/** @license +/*! @license + * Shaka Player * Copyright 2016 Google LLC * SPDX-License-Identifier: Apache-2.0 */ @@ -26,10 +27,12 @@ describe('Mp4BoxParsers', () => { let tkhdParsed = false; let mdhdParsed = false; let defaultSampleDuration; + let defaultSampleSize; let trackId; let timescale; const expectedDefaultSampleDuration = 512; + const expectedDefaultSampleSize = 0; const expectedTrackId = 1; const expectedTimescale = 12288; @@ -42,6 +45,7 @@ describe('Mp4BoxParsers', () => { box.reader); defaultSampleDuration = parsedTREXBox.defaultSampleDuration; + defaultSampleSize = parsedTREXBox.defaultSampleSize; trexParsed = true; }) .box('trak', Mp4Parser.children) @@ -70,6 +74,7 @@ describe('Mp4BoxParsers', () => { expect(tkhdParsed).toBe(true); expect(mdhdParsed).toBe(true); expect(defaultSampleDuration).toBe(expectedDefaultSampleDuration); + expect(defaultSampleSize).toBe(expectedDefaultSampleSize); expect(trackId).toBe(expectedTrackId); expect(timescale).toBe(expectedTimescale); });