diff --git a/packager/media/base/audio_stream_info.h b/packager/media/base/audio_stream_info.h index 7e1bfadc93a..ed6af816739 100644 --- a/packager/media/base/audio_stream_info.h +++ b/packager/media/base/audio_stream_info.h @@ -60,6 +60,10 @@ class AudioStreamInfo : public StreamInfo { sampling_frequency_ = sampling_frequency; } + void set_max_bitrate(const uint32_t max_bitrate) { + max_bitrate_ = max_bitrate; + } + /// @param audio_object_type is only used by AAC Codec, ignored otherwise. /// @return The codec string. static std::string GetCodecString(Codec codec, uint8_t audio_object_type); diff --git a/packager/media/formats/mp2t/CMakeLists.txt b/packager/media/formats/mp2t/CMakeLists.txt index 6c4d5b7b5a4..8aa9fb95ccf 100644 --- a/packager/media/formats/mp2t/CMakeLists.txt +++ b/packager/media/formats/mp2t/CMakeLists.txt @@ -33,6 +33,7 @@ add_library(mp2t STATIC pes_packet_generator.h program_map_table_writer.cc program_map_table_writer.h + ts_audio_type.h ts_muxer.cc ts_muxer.h ts_packet.cc diff --git a/packager/media/formats/mp2t/mp2t_media_parser.cc b/packager/media/formats/mp2t/mp2t_media_parser.cc index a83c95ad27d..a7e4ed5adce 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -274,7 +275,8 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) { DVLOG(1) << "Create a new PMT parser"; std::unique_ptr pmt_section_parser(new TsSectionPmt(std::bind( &Mp2tMediaParser::RegisterPes, this, pmt_pid, std::placeholders::_1, - std::placeholders::_2, std::placeholders::_3, std::placeholders::_4))); + std::placeholders::_2, std::placeholders::_3, std::placeholders::_4, + std::placeholders::_5, std::placeholders::_6, std::placeholders::_7))); std::unique_ptr pmt_pid_state( new PidState(pmt_pid, PidState::kPidPmt, std::move(pmt_section_parser))); pmt_pid_state->Enable(); @@ -284,13 +286,19 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) { void Mp2tMediaParser::RegisterPes(int pmt_pid, int pes_pid, TsStreamType stream_type, + uint32_t max_bitrate, + const std::string& lang, + TsAudioType audio_type, const uint8_t* descriptor, size_t descriptor_length) { if (pids_.count(pes_pid) != 0) return; DVLOG(1) << "RegisterPes:" << " pes_pid=" << pes_pid << " stream_type=" << std::hex - << static_cast(stream_type) << std::dec; + << static_cast(stream_type) << std::dec + << "max_bitrate=" << max_bitrate << " lang=" << lang + << "audio_type=" << std::hex << static_cast(audio_type) + << std::dec; // Create a stream parser corresponding to the stream type. PidState::PidType pid_type = PidState::kPidVideoPes; @@ -340,6 +348,10 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid, new PidState(pes_pid, pid_type, std::move(pes_section_parser))); pes_pid_state->Enable(); pids_.emplace(pes_pid, std::move(pes_pid_state)); + + // Store PES metadata. + pes_metadata_.insert( + std::make_pair(pes_pid, PesMetadata{max_bitrate, lang, audio_type})); } void Mp2tMediaParser::OnNewStreamInfo( @@ -358,6 +370,17 @@ void Mp2tMediaParser::OnNewStreamInfo( if (new_stream_info) { // Set the stream configuration information for the PID. + auto pes_metadata = pes_metadata_.find(pes_pid); + DCHECK(pes_metadata != pes_metadata_.end()); + if (!pes_metadata->second.language.empty()) + new_stream_info->set_language(pes_metadata->second.language); + if (new_stream_info->stream_type() == kStreamAudio) { + auto* audio_info = static_cast(new_stream_info.get()); + audio_info->set_max_bitrate(pes_metadata->second.max_bitrate); + // TODO(modernletter) Add some field for audio type to AudioStreamInfo + // and set here from audio_type + } + pid_state->second->set_config(new_stream_info); } else { LOG(WARNING) << "Ignoring unsupported stream with pid=" << pes_pid; diff --git a/packager/media/formats/mp2t/mp2t_media_parser.h b/packager/media/formats/mp2t/mp2t_media_parser.h index 5ddcc74ac50..fdce93e2d5c 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.h +++ b/packager/media/formats/mp2t/mp2t_media_parser.h @@ -14,6 +14,7 @@ #include #include #include +#include #include namespace shaka { @@ -27,6 +28,12 @@ class PidState; class TsPacket; class TsSection; +struct PesMetadata { + uint32_t max_bitrate; + std::string language; + TsAudioType audio_type; +}; + class Mp2tMediaParser : public MediaParser { public: Mp2tMediaParser(); @@ -50,10 +57,15 @@ class Mp2tMediaParser : public MediaParser { // Callback invoked to register a PES pid. // Possible values for |media_type| are defined in: // ISO-13818.1 / ITU H.222 Table 2.34 "Media type assignments". + // Possible values for |audio_type| are defined in: + // ISO-13818.1 / ITU H.222 Table 2-60 "Audio type values". // |pes_pid| is part of the Program Map Table refered by |pmt_pid|. void RegisterPes(int pmt_pid, int pes_pid, TsStreamType media_type, + uint32_t max_bitrate, + const std::string& lang, + TsAudioType audio_type, const uint8_t* descriptor, size_t descriptor_length); @@ -94,6 +106,9 @@ class Mp2tMediaParser : public MediaParser { // has a deterministic order. std::map> pids_; + // Map of PIDs and their metadata. + std::map pes_metadata_; + // Whether |init_cb_| has been invoked. bool is_initialized_; diff --git a/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc b/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc index aeb44b248bb..60808daa3b1 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -190,6 +191,19 @@ TEST_F(Mp2tMediaParserTest, PtsZeroDtsWrapAround) { EXPECT_GT(video_max_pts_, static_cast(1) << 33); } +TEST_F(Mp2tMediaParserTest, PmtEsDescriptors) { + //"bear-eng-visualy-impaired-audio.ts" consist of audio stream marked as + // english audio with commentary for visualy impaired viewer and max + // bitrate set to ~128kbps + + ParseMpeg2TsFile("bear-visualy-impaired-eng-audio.ts", 188); + EXPECT_TRUE(parser_->Flush()); + EXPECT_STREQ("eng", stream_map_[257]->language().c_str()); + + auto* audio_info = static_cast(stream_map_[257].get()); + EXPECT_EQ(131600, audio_info->max_bitrate()); +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/ts_audio_type.h b/packager/media/formats/mp2t/ts_audio_type.h new file mode 100644 index 00000000000..a8347dcad05 --- /dev/null +++ b/packager/media/formats/mp2t/ts_audio_type.h @@ -0,0 +1,30 @@ +// Copyright 2023 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H +#define PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H + +#include + +namespace shaka { +namespace media { +namespace mp2t { + +enum class TsAudioType : uint8_t { + // ISO-13818.1 / ITU H.222 Table 2-60 "Audio type values" + kUndefined = 0x00, + kCleanEffects = 0x01, + kHearingImpaired = 0x02, + kVisualyImpairedCommentary = 0x03, + // 0x04-0x7F - user private + // 0x80-0xFF - reserved +}; + +} // namespace mp2t +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_MP2T_TS_AUDIO_TYPE_H diff --git a/packager/media/formats/mp2t/ts_section_pmt.cc b/packager/media/formats/mp2t/ts_section_pmt.cc index 07aef734138..f8748017e09 100644 --- a/packager/media/formats/mp2t/ts_section_pmt.cc +++ b/packager/media/formats/mp2t/ts_section_pmt.cc @@ -10,12 +10,21 @@ #include #include +#include #include namespace shaka { namespace media { namespace mp2t { +namespace { + +const int kISO639LanguageDescriptor = 0x0A; +const int kMaximumBitrateDescriptor = 0x0E; +const int kSubtitlingDescriptor = 0x59; + +} // namespace + TsSectionPmt::TsSectionPmt(const RegisterPesCb& register_pes_cb) : register_pes_cb_(register_pes_cb) { } @@ -82,6 +91,9 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { TsStreamType stream_type; const uint8_t* descriptor; size_t descriptor_length; + std::string lang; + uint32_t max_bitrate; + TsAudioType audio_type; }; std::vector pid_info; while (static_cast(bit_reader->bits_available()) > @@ -99,22 +111,59 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { // Do not register the PID right away. // Wait for the end of the section to be fully parsed // to make sure there is no error. - pid_info.push_back({pid_es, stream_type, descriptor, es_info_length}); + pid_info.push_back({pid_es, stream_type, descriptor, es_info_length, "", 0, + TsAudioType::kUndefined}); // Read the ES info descriptors. // Defined in section 2.6 of ISO-13818. - if (es_info_length > 0) { - uint8_t descriptor_tag; + uint8_t descriptor_tag; + uint8_t descriptor_length; + + while (es_info_length) { RCHECK(bit_reader->ReadBits(8, &descriptor_tag)); - es_info_length--; + RCHECK(bit_reader->ReadBits(8, &descriptor_length)); + es_info_length -= 2; // See ETSI EN 300 468 Section 6.1 if (stream_type == TsStreamType::kPesPrivateData && - descriptor_tag == 0x59) { // subtitling_descriptor + descriptor_tag == kSubtitlingDescriptor) { pid_info.back().stream_type = TsStreamType::kDvbSubtitles; + } else if (descriptor_tag == kISO639LanguageDescriptor && + descriptor_length >= 4) { + // See section 2.6.19 of ISO-13818 + // Descriptor can contain 0..N language defintions, + // we process only the first one + RCHECK(es_info_length >= 4); + + char lang[3]; + RCHECK(bit_reader->ReadBits(8, &lang[0])); // ISO_639_language_code + RCHECK(bit_reader->ReadBits(8, &lang[1])); + RCHECK(bit_reader->ReadBits(8, &lang[2])); + RCHECK(bit_reader->ReadBits(8, &pid_info.back().audio_type)); + pid_info.back().lang = std::string(lang, 3); + + es_info_length -= 4; + descriptor_length -= 4; + } else if (descriptor_tag == kMaximumBitrateDescriptor && + descriptor_length >= 3) { + // See section 2.6.25 of ISO-13818 + RCHECK(es_info_length >= 3); + + uint32_t max_bitrate; + RCHECK(bit_reader->SkipBits(2)); // reserved + RCHECK(bit_reader->ReadBits(22, &max_bitrate)); + // maximum bitrate is stored in units of 50 bytes per second + pid_info.back().max_bitrate = 50 * 8 * max_bitrate; + + es_info_length -= 3; + descriptor_length -= 3; } + + RCHECK(bit_reader->SkipBits(8 * descriptor_length)); + es_info_length -= descriptor_length; } - RCHECK(bit_reader->SkipBits(8 * es_info_length)); + + RCHECK(bit_reader->SkipBytes(es_info_length)); } // Read the CRC. @@ -123,8 +172,8 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { // Once the PMT has been proved to be correct, register the PIDs. for (auto& info : pid_info) { - register_pes_cb_(info.pid_es, info.stream_type, info.descriptor, - info.descriptor_length); + register_pes_cb_(info.pid_es, info.stream_type, info.max_bitrate, info.lang, + info.audio_type, info.descriptor, info.descriptor_length); } return true; diff --git a/packager/media/formats/mp2t/ts_section_pmt.h b/packager/media/formats/mp2t/ts_section_pmt.h index 96075281bb0..c3d3a4cab63 100644 --- a/packager/media/formats/mp2t/ts_section_pmt.h +++ b/packager/media/formats/mp2t/ts_section_pmt.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -17,10 +18,20 @@ namespace mp2t { class TsSectionPmt : public TsSectionPsi { public: - // RegisterPesCb::Run(int pes_pid, int stream_type); + // RegisterPesCb::Run(int pes_pid, int stream_type, uint32_t max_bitrate, + // const string& lang, TsAudioType audio_type, uint8_t* descriptor, + // size_t desriptor_size); // Stream type is defined in // "Table 2-34 – Stream type assignments" in H.222 - typedef std::function + // Audio type is defined in + // "Table 2-60 - Audio type values" in H.222 + typedef std::function RegisterPesCb; explicit TsSectionPmt(const RegisterPesCb& register_pes_cb); diff --git a/packager/media/test/data/README b/packager/media/test/data/README index a113480e24b..f18e601b34b 100644 --- a/packager/media/test/data/README +++ b/packager/media/test/data/README @@ -29,6 +29,12 @@ bear-640x360.ts - AVC + AAC encode, multiplexed into an MPEG2-TS container. bear-640x360_ptswraparound.ts - Same as bear-640x360.ts, with a timestamp wrap-around in the middle, created with the below command: ffmpeg -itsoffset 95442 -i bear-640x360.ts -c:v copy -c:a copy -muxdelay 0 bear-640x360_ptswraparound.ts bear-640x360-hevc.ts - HEVC + AAC encode, multiplexed into an MPEG2-TS container. +bear-eng-visualy-impaired-audio.ts - Audio stream from bear-640x360.ts marked as english with commentary for visually impaired viewer using the below commands: + tsp -I file bear-640x360.ts \ + -P filter --video --negate + -P inject --replace --pid 4096 --xml bear-visualy-impaired-eng-audio-pmt.xml \ + -O file bear-visualy-impaired-eng-audio.ts + (xml template can be obtained by command "tsp -I file bear-640x360.ts -P tables --pid 4096 --tid 2 --max 1 --xml pmt.xml -O drop") // ISO-BMFF streams. bear-1280x720.mp4 - AVC + AAC encode, mulitplexed into an ISOBMFF container. diff --git a/packager/media/test/data/bear-visualy-impaired-eng-audio-pmt.xml b/packager/media/test/data/bear-visualy-impaired-eng-audio-pmt.xml new file mode 100644 index 00000000000..bc8c273b37b --- /dev/null +++ b/packager/media/test/data/bear-visualy-impaired-eng-audio-pmt.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/packager/media/test/data/bear-visualy-impaired-eng-audio.ts b/packager/media/test/data/bear-visualy-impaired-eng-audio.ts new file mode 100644 index 00000000000..94ba1b6479a Binary files /dev/null and b/packager/media/test/data/bear-visualy-impaired-eng-audio.ts differ