From 27831ced0c04e7f1e08ea662d90b0477448e5a54 Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Sat, 18 May 2024 12:28:41 +0200 Subject: [PATCH] support multichannel Opus (https://github.com/bluenviron/mediamtx/issues/3355) --- README.md | 1 + pkg/description/session_test.go | 10 +- pkg/format/format.go | 2 +- pkg/format/format_test.go | 35 ++++- pkg/format/opus.go | 141 ++++++++++++++---- .../FuzzUnmarshalOpusMulti/771e938e4458e983 | 2 + .../FuzzUnmarshalOpusMulti/9cd9b70960b4a733 | 2 + 7 files changed, 158 insertions(+), 35 deletions(-) create mode 100644 pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/771e938e4458e983 create mode 100644 pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/9cd9b70960b4a733 diff --git a/README.md b/README.md index c03f7cfc..00e74d4d 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,7 @@ In RTSP, media streams are routed between server and clients by using RTP packet |[RFC2250, RTP Payload Format for MPEG1/MPEG2 Video](https://datatracker.ietf.org/doc/html/rfc2250)|MPEG-1 video, MPEG-2 audio, MPEG-TS payload formats| |[RFC2435, RTP Payload Format for JPEG-compressed Video](https://datatracker.ietf.org/doc/html/rfc2435)|M-JPEG payload format| |[RFC7587, RTP Payload Format for the Opus Speech and Audio Codec](https://datatracker.ietf.org/doc/html/rfc7587)|Opus payload format| +|[Multiopus in libwebrtc](https://webrtc-review.googlesource.com/c/src/+/129768)|Opus payload format| |[RFC5215, RTP Payload Format for Vorbis Encoded Audio](https://datatracker.ietf.org/doc/html/rfc5215)|Vorbis payload format| |[RFC4184, RTP Payload Format for AC-3 Audio](https://datatracker.ietf.org/doc/html/rfc4184)|AC-3 payload format| |[RFC6416, RTP Payload Format for MPEG-4 Audio/Visual Streams](https://datatracker.ietf.org/doc/html/rfc6416)|MPEG-4 audio payload format| diff --git a/pkg/description/session_test.go b/pkg/description/session_test.go index b5ec7887..3774ea88 100644 --- a/pkg/description/session_test.go +++ b/pkg/description/session_test.go @@ -310,8 +310,9 @@ var casesSession = []struct { IsBackChannel: true, Formats: []format.Format{ &format.Opus{ - PayloadTyp: 111, - IsStereo: false, + PayloadTyp: 111, + IsStereo: false, + ChannelCount: 1, }, &format.Generic{ PayloadTyp: 103, @@ -820,8 +821,9 @@ func TestSessionFindFormat(t *testing.T) { Type: MediaTypeAudio, Formats: []format.Format{ &format.Opus{ - PayloadTyp: 111, - IsStereo: true, + PayloadTyp: 111, + IsStereo: true, + ChannelCount: 2, }, }, }, diff --git a/pkg/format/format.go b/pkg/format/format.go index d586785e..657c43ae 100644 --- a/pkg/format/format.go +++ b/pkg/format/format.go @@ -112,7 +112,7 @@ func Unmarshal(mediaType string, payloadType uint8, rtpMap string, fmtp map[stri // audio - case codec == "opus": + case codec == "opus", codec == "multiopus": return &Opus{} case codec == "vorbis": diff --git a/pkg/format/format_test.go b/pkg/format/format_test.go index 3504f8fd..e1f221d0 100644 --- a/pkg/format/format_test.go +++ b/pkg/format/format_test.go @@ -645,14 +645,37 @@ var casesFormat = []struct { "sprop-stereo": "1", }, &Opus{ - PayloadTyp: 96, - IsStereo: true, + PayloadTyp: 96, + IsStereo: true, + ChannelCount: 2, }, "opus/48000/2", map[string]string{ "sprop-stereo": "1", }, }, + { + "audio opus 5.1", + "audio", + 96, + "multiopus/48000/6", + map[string]string{ + "num_streams": "4", + "coupled_streams": "2", + "channel_mapping": "0,4,1,2,3,5", + }, + &Opus{ + PayloadTyp: 96, + ChannelCount: 6, + }, + "multiopus/48000/6", + map[string]string{ + "channel_mapping": "0,4,1,2,3,5", + "coupled_streams": "2", + "num_streams": "4", + "sprop-maxcapturerate": "48000", + }, + }, { "audio ac3", "audio", @@ -1250,6 +1273,14 @@ func FuzzUnmarshalOpus(f *testing.F) { }) } +func FuzzUnmarshalOpusMulti(f *testing.F) { + f.Add("48000/a") + + f.Fuzz(func(_ *testing.T, a string) { + Unmarshal("audio", 96, "multiopus/"+a, nil) //nolint:errcheck + }) +} + func FuzzUnmarshalVorbis(f *testing.F) { f.Fuzz(func(_ *testing.T, a, b string) { Unmarshal("audio", 96, "Vorbis/"+a, map[string]string{ //nolint:errcheck diff --git a/pkg/format/opus.go b/pkg/format/opus.go index 77bbb770..a4030375 100644 --- a/pkg/format/opus.go +++ b/pkg/format/opus.go @@ -12,33 +12,63 @@ import ( // Opus is the RTP format for the Opus codec. // Specification: https://datatracker.ietf.org/doc/html/rfc7587 +// Specification: https://webrtc-review.googlesource.com/c/src/+/129768 type Opus struct { - PayloadTyp uint8 - IsStereo bool + PayloadTyp uint8 + ChannelCount int + + // Deprecated: replaced by ChannelCount. + IsStereo bool } func (f *Opus) unmarshal(ctx *unmarshalContext) error { f.PayloadTyp = ctx.payloadType - tmp := strings.SplitN(ctx.clock, "/", 2) - if len(tmp) != 2 { - return fmt.Errorf("invalid clock (%v)", ctx.clock) - } + if ctx.codec == "opus" { + tmp := strings.SplitN(ctx.clock, "/", 2) + if len(tmp) != 2 { + return fmt.Errorf("invalid clock (%v)", ctx.clock) + } - sampleRate, err := strconv.ParseUint(tmp[0], 10, 31) - if err != nil || sampleRate != 48000 { - return fmt.Errorf("invalid sample rate: %d", sampleRate) - } + sampleRate, err := strconv.ParseUint(tmp[0], 10, 31) + if err != nil || sampleRate != 48000 { + return fmt.Errorf("invalid sample rate: '%s", tmp[0]) + } - channelCount, err := strconv.ParseUint(tmp[1], 10, 31) - if err != nil || channelCount != 2 { - return fmt.Errorf("invalid channel count: %d", channelCount) - } + channelCount, err := strconv.ParseUint(tmp[1], 10, 31) + if err != nil || channelCount != 2 { + return fmt.Errorf("invalid channel count: '%s'", tmp[1]) + } + + // assume mono + f.ChannelCount = 1 + f.IsStereo = false - for key, val := range ctx.fmtp { - if key == "sprop-stereo" { - f.IsStereo = (val == "1") + for key, val := range ctx.fmtp { + if key == "sprop-stereo" { + if val == "1" { + f.ChannelCount = 2 + f.IsStereo = true + } + } + } + } else { + tmp := strings.SplitN(ctx.clock, "/", 2) + if len(tmp) != 2 { + return fmt.Errorf("invalid clock (%v)", ctx.clock) + } + + sampleRate, err := strconv.ParseUint(tmp[0], 10, 31) + if err != nil || sampleRate != 48000 { + return fmt.Errorf("invalid sample rate: '%s'", tmp[0]) } + + channelCount, err := strconv.ParseUint(tmp[1], 10, 31) + if err != nil { + return fmt.Errorf("invalid channel count: '%s'", tmp[1]) + } + + f.ChannelCount = int(channelCount) } return nil @@ -63,22 +93,77 @@ func (f *Opus) PayloadType() uint8 { // RTPMap implements Format. func (f *Opus) RTPMap() string { - // RFC7587: The RTP clock rate in "a=rtpmap" MUST be 48000, and the - // number of channels MUST be 2. - return "opus/48000/2" + if f.ChannelCount <= 2 { + // RFC7587: The RTP clock rate in "a=rtpmap" MUST be 48000, and the + // number of channels MUST be 2. + return "opus/48000/2" + } + + return "multiopus/48000/" + strconv.FormatUint(uint64(f.ChannelCount), 10) } // FMTP implements Format. func (f *Opus) FMTP() map[string]string { - fmtp := map[string]string{ - "sprop-stereo": func() string { - if f.IsStereo { - return "1" - } - return "0" - }(), + if f.ChannelCount <= 2 { + return map[string]string{ + "sprop-stereo": func() string { + if f.ChannelCount == 2 || (f.ChannelCount == 0 && f.IsStereo) { + return "1" + } + return "0" + }(), + } + } + + switch f.ChannelCount { + case 3: + return map[string]string{ + "num_streams": "2", + "coupled_streams": "1", + "channel_mapping": "0,2,1", + "sprop-maxcapturerate": "48000", + } + + case 4: + return map[string]string{ + "num_streams": "2", + "coupled_streams": "2", + "channel_mapping": "0,1,2,3", + "sprop-maxcapturerate": "48000", + } + + case 5: + return map[string]string{ + "num_streams": "3", + "coupled_streams": "2", + "channel_mapping": "0,4,1,2,3", + "sprop-maxcapturerate": "48000", + } + + case 6: + return map[string]string{ + "num_streams": "4", + "coupled_streams": "2", + "channel_mapping": "0,4,1,2,3,5", + "sprop-maxcapturerate": "48000", + } + + case 7: + return map[string]string{ + "num_streams": "4", + "coupled_streams": "3", + "channel_mapping": "0,4,1,2,3,5,6", + "sprop-maxcapturerate": "48000", + } + + default: // assume 8 + return map[string]string{ + "num_streams": "5", + "coupled_streams": "3", + "channel_mapping": "0,6,1,4,5,2,3,7", + "sprop-maxcapturerate": "48000", + } } - return fmtp } // PTSEqualsDTS implements Format. diff --git a/pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/771e938e4458e983 b/pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/771e938e4458e983 new file mode 100644 index 00000000..ee3f3399 --- /dev/null +++ b/pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/771e938e4458e983 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("0") diff --git a/pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/9cd9b70960b4a733 b/pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/9cd9b70960b4a733 new file mode 100644 index 00000000..e4946606 --- /dev/null +++ b/pkg/format/testdata/fuzz/FuzzUnmarshalOpusMulti/9cd9b70960b4a733 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("/")