Skip to content

Commit

Permalink
Parse CEA-708 Packets from Fragmented MP4 streams. (#2660)
Browse files Browse the repository at this point in the history
This is an MP4 Parser which extracts CEA-708 packets from Fragmented MP4 streams.

The Closed Caption Parser (shaka.media.ClosedCaptionParser) will own this MP4 Parser, and will initialize it and call it as shown. As data comes in, the parser will parse this data, and the caption packets data then be returned in a callback (on708Data), as shown. Here, a theoretical decoder (future pull request, mentioned as a Todo comment) will decode and extract the parsed captions from these packets.

Issue #2648
  • Loading branch information
muhammadharis authored Jul 5, 2020
1 parent fb43b1f commit 480d4a8
Show file tree
Hide file tree
Showing 11 changed files with 546 additions and 18 deletions.
5 changes: 5 additions & 0 deletions build/types/cea
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Inband closed caption support.

+../../lib/cea/mp4_cea_parser.js
+../../lib/cea/i_cea_parser.js
+../../lib/cea/sei_processor.js
2 changes: 2 additions & 0 deletions build/types/core
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,5 @@

+../../third_party/closure-uri/uri.js
+../../third_party/closure-uri/utils.js

+@cea
56 changes: 56 additions & 0 deletions lib/cea/i_cea_parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*! @license
* Shaka Player
* Copyright 2016 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

goog.provide('shaka.cea.ICeaParser');

/**
* Interface for parsing inband closed caption data from MP4 streams.
* @interface
*/
shaka.cea.ICeaParser = class {
/**
* Initializes the parser with init segment data.
* @param {!BufferSource} initSegment init segment to parse.
*/
init(initSegment) {}

/**
* Parses the stream and extracts closed captions packets.
* @param {!BufferSource} mediaSegment media segment to parse.
* @return {!Array<!shaka.cea.ICeaParser.CaptionPacket>}
*/
parse(mediaSegment) {}
};

/**
* NALU type for Supplemental Enhancement Information (SEI).
* @const {number}
*/
shaka.cea.ICeaParser.NALU_TYPE_SEI = 0x06;

/**
* Default timescale value for a track.
*/
shaka.cea.ICeaParser.DEFAULT_TIMESCALE_VALUE = 90000;

/**
* @typedef {{
* packet: !Uint8Array,
* pts: !number
* }}
*
* @description Parsed Caption Packet.
* @property {!Uint8Array} packet
* Caption packet. More specifically, it contains a "User data
* registered by Recommendation ITU-T T.35 SEI message", from section D.1.6
* and section D.2.6 of Rec. ITU-T H.264 (06/2019).
* @property {!number} pts
* The presentation timestamp (pts) at which the ITU-T T.35 data shows up,
* in seconds.
* @exportDoc
*/
shaka.cea.ICeaParser.CaptionPacket;

258 changes: 258 additions & 0 deletions lib/cea/mp4_cea_parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
/*! @license
* Shaka Player
* Copyright 2016 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

goog.provide('shaka.cea.Mp4CeaParser');

goog.require('goog.asserts');
goog.require('shaka.cea.ICeaParser');
goog.require('shaka.cea.SeiProcessor');
goog.require('shaka.util.Mp4Parser');
goog.require('shaka.util.Mp4BoxParsers');
goog.require('shaka.util.DataViewReader');

/**
* MPEG4 stream parser used for extracting 708 closed captions data.
* @implements {shaka.cea.ICeaParser}
*/
shaka.cea.Mp4CeaParser = class {
constructor() {
/**
* SEI data processor.
* @private
* @const {!shaka.cea.SeiProcessor}
*/
this.seiProcessor_ = new shaka.cea.SeiProcessor();

/**
* Map of track id to corresponding timescale.
* @private {!Map<number, number>}
*/
this.trackIdToTimescale_ = new Map();

/**
* Default sample duration, as specified by the TREX box.
* @private {!number}
*/
this.defaultSampleDuration_ = 0;

/**
* Default sample size, as specified by the TREX box.
* @private {!number}
*/
this.defaultSampleSize_ = 0;
}

/**
* Parses the init segment. Gets Default Sample Duration and Size from the
* TREX box, and constructs a map of Track IDs to timescales. Each TRAK box
* contains a track header (TKHD) containing track ID, and a media header box
* (MDHD) containing the timescale for the track
* @override
*/
init(initSegment) {
const Mp4Parser = shaka.util.Mp4Parser;
const trackIds = [];
const timescales = [];

new Mp4Parser()
.box('moov', Mp4Parser.children)
.box('mvex', Mp4Parser.children)
.fullBox('trex', (box) => {
const parsedTREXBox = shaka.util.Mp4BoxParsers.parseTREX(
box.reader);

this.defaultSampleDuration_ = parsedTREXBox.defaultSampleDuration;
this.defaultSampleSize_ = parsedTREXBox.defaultSampleSize;
})
.box('trak', Mp4Parser.children)
.fullBox('tkhd', (box) => {
goog.asserts.assert(
box.version != null,
'TKHD is a full box and should have a valid version.');
const parsedTKHDBox = shaka.util.Mp4BoxParsers.parseTKHD(
box.reader, box.version);
trackIds.push(parsedTKHDBox.trackId);
})
.box('mdia', Mp4Parser.children)
.fullBox('mdhd', (box) => {
goog.asserts.assert(
box.version != null,
'MDHD is a full box and should have a valid version.');
const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD(
box.reader, box.version);
timescales.push(parsedMDHDBox.timescale);
})
.parse(initSegment, /* partialOkay= */ true);

// At least one track should exist, and each track should have a
// corresponding Id in TKHD box, and timescale in its MDHD box
if (!trackIds.length|| !timescales.length ||
trackIds.length != timescales.length) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_MP4_CEA);
}

// Populate the map from track Id to timescale
trackIds.forEach((trackId, idx) => {
this.trackIdToTimescale_.set(trackId, timescales[idx]);
});
}

/**
* Parses each video segment. In fragmented MP4s, MOOF and MDAT come in
* pairs. The following logic gets the necessary info from MOOFs to parse
* MDATs (base media decode time, sample sizes/offsets/durations, etc),
* and then parses the MDAT boxes for CEA-708 packets using this information.
* CEA-708 packets are returned in the callback.
* @override
*/
parse(mediaSegment) {
const Mp4Parser = shaka.util.Mp4Parser;

/** @type {!Array<!shaka.cea.ICeaParser.CaptionPacket>} **/
const captionPackets = [];

// Fields that are found in MOOF boxes
let defaultSampleDuration = this.defaultSampleDuration_;
let defaultSampleSize = this.defaultSampleSize_;
let sampleData = [];
let baseMediaDecodeTime = null;
let timescale = shaka.cea.ICeaParser.DEFAULT_TIMESCALE_VALUE;

new Mp4Parser()
.box('moof', Mp4Parser.children)
.box('traf', Mp4Parser.children)
.fullBox('trun', (box) => {
goog.asserts.assert(
box.version != null && box.flags!=null,
'TRUN is a full box and should have a valid version & flags.');

const parsedTRUN = shaka.util.Mp4BoxParsers.parseTRUN(
box.reader, box.version, box.flags);

sampleData = parsedTRUN.sampleData;
})

.fullBox('tfhd', (box) => {
goog.asserts.assert(
box.flags != null,
'TFHD is a full box and should have valid flags.');

const parsedTFHD = shaka.util.Mp4BoxParsers.parseTFHD(
box.reader, box.flags);

// If specified, defaultSampleDuration and defaultSampleSize
// override the ones specified in the TREX box
defaultSampleDuration = parsedTFHD.defaultSampleDuration
|| this.defaultSampleDuration_;

defaultSampleSize = parsedTFHD.defaultSampleSize
|| this.defaultSampleSize_;

const trackId = parsedTFHD.trackId;

// Get the timescale from the track Id
if (this.trackIdToTimescale_.has(trackId)) {
timescale = this.trackIdToTimescale_.get(trackId);
}
})

.fullBox('tfdt', (box) => {
goog.asserts.assert(
box.version != null,
'TFDT is a full box and should have a valid version.');

const parsedTFDT = shaka.util.Mp4BoxParsers.parseTFDT(
box.reader, box.version);

baseMediaDecodeTime = parsedTFDT.baseMediaDecodeTime;
})
.box('mdat', (box) => {
if (baseMediaDecodeTime === null) {
// This field should have been populated by
// the Base Media Decode time in the TFDT box
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_MP4_CEA);
}
this.parseMdat_(box.reader, baseMediaDecodeTime, timescale,
defaultSampleDuration, defaultSampleSize, sampleData,
captionPackets);
})
.parse(mediaSegment, /* partialOkay= */ false);

return captionPackets;
}

/**
* Parse MDAT box.
* @param {!shaka.util.DataViewReader} reader
* @param {!number} time
* @param {!number} timescale
* @param {!number} defaultSampleDuration
* @param {!number} defaultSampleSize
* @param {!Array<shaka.util.ParsedTRUNSample>} sampleData
* @param {!Array<!shaka.cea.ICeaParser.CaptionPacket>} captionPackets
* @private
*/
parseMdat_(reader, time, timescale, defaultSampleDuration,
defaultSampleSize, sampleData, captionPackets) {
let sampleIndex = 0;

// The fields in each ParsedTRUNSample contained in the sampleData
// array are nullable. In the case of sample data and sample duration,
// we use the defaults provided by the TREX/TFHD boxes. For sample
// composition time offset, we default to 0.
let sampleSize = defaultSampleSize;

if (sampleData.length) {
sampleSize = sampleData[0].sampleSize || defaultSampleSize;
}

while (reader.hasMoreData()) {
const naluSize = reader.readUint32();
const naluType = reader.readUint8() & 0x1F;
if (naluType == shaka.cea.ICeaParser.NALU_TYPE_SEI) {
let timeOffset = 0;

if (sampleData.length > sampleIndex) {
timeOffset = sampleData[sampleIndex].sampleCompositionTimeOffset || 0;
}

const pts = (time + timeOffset)/timescale;
for (const packet of this.seiProcessor_
.process(reader.readBytes(naluSize - 1))) {
captionPackets.push({
packet,
pts,
});
}
} else {
reader.skip(naluSize - 1);
}
sampleSize -= (naluSize + 4);
if (sampleSize == 0) {
if (sampleData.length > sampleIndex) {
time += sampleData[sampleIndex].sampleDuration ||
defaultSampleDuration;
} else {
time += defaultSampleDuration;
}

sampleIndex++;

if (sampleData.length > sampleIndex) {
sampleSize = sampleData[sampleIndex].sampleSize || defaultSampleSize;
} else {
sampleSize = defaultSampleSize;
}
}
}
}
};
Loading

0 comments on commit 480d4a8

Please sign in to comment.