From ea9aa356acc549631407004e55d877bcac54caad Mon Sep 17 00:00:00 2001 From: boks1971 Date: Mon, 31 Jan 2022 12:31:15 +0530 Subject: [PATCH] Firefox case when screen and camera offer together Firefox camera tracks do not fire onTrack when screen share and camera is negotiated in the same offer. There were two issues 1. When updateRemoteDescription runs, only the first media description of a specific kind was considered. So, when screen was before camera, screen did not $ave RID in extmap. It is in the camera section as video is using simulcast. All the extra ones in the camera section were ignored. Parse all descriptions and collect extensions to fix it. 2. This one sounds like a Firefox issue. When sending answer back, if the screen share section has the RID, Firefox seems to get stuck. It does not send screen or camera media and after 15 seconds, the peer connection fails. To address that, store extensions from SDP in transceiver and while constructing answer SDP, cross check against transceiver's negotiated extensions before adding it to the answer SDP. The reason I think this is a Firefox issue is because how it behaves before this change when camera is negotiated first and screen later in a separate offer. When screen is negotiated later, answer does send back RID and it works. Looks like Firefox has an issue seeing RID before it expects it. Testing -------- Used LiveKit JS SDK sample to negotiate screen and camera together. This was not an issue with Chrome as I could not get Chrome to negotiate screen and camera in the same offer with screen ahead of camera. --- api.go | 31 +++++++++++++------------------ api_test.go | 15 +++++++++++++++ mediaengine.go | 30 ++++++++++++++++++------------ peerconnection.go | 10 +++++++++- rtptransceiver.go | 26 +++++++++++++++++++++++++- sdp.go | 17 ++++++++++++++++- 6 files changed, 96 insertions(+), 33 deletions(-) diff --git a/api.go b/api.go index b85ac62c25e..85424df4d9b 100644 --- a/api.go +++ b/api.go @@ -23,28 +23,21 @@ type API struct { // NewAPI Creates a new API object for keeping semi-global settings to WebRTC objects func NewAPI(options ...func(*API)) *API { - a := &API{interceptor: &interceptor.NoOp{}} + a := &API{ + interceptor: &interceptor.NoOp{}, + settingEngine: &SettingEngine{}, + mediaEngine: &MediaEngine{}, + interceptorRegistry: &interceptor.Registry{}, + } for _, o := range options { o(a) } - if a.settingEngine == nil { - a.settingEngine = &SettingEngine{} - } - if a.settingEngine.LoggerFactory == nil { a.settingEngine.LoggerFactory = logging.NewDefaultLoggerFactory() } - if a.mediaEngine == nil { - a.mediaEngine = &MediaEngine{} - } - - if a.interceptorRegistry == nil { - a.interceptorRegistry = &interceptor.Registry{} - } - return a } @@ -52,9 +45,8 @@ func NewAPI(options ...func(*API)) *API { // Settings can be changed after passing the engine to an API. func WithMediaEngine(m *MediaEngine) func(a *API) { return func(a *API) { - if m != nil { - a.mediaEngine = m - } else { + a.mediaEngine = m + if a.mediaEngine == nil { a.mediaEngine = &MediaEngine{} } } @@ -70,8 +62,11 @@ func WithSettingEngine(s SettingEngine) func(a *API) { // WithInterceptorRegistry allows providing Interceptors to the API. // Settings should not be changed after passing the registry to an API. -func WithInterceptorRegistry(interceptorRegistry *interceptor.Registry) func(a *API) { +func WithInterceptorRegistry(ir *interceptor.Registry) func(a *API) { return func(a *API) { - a.interceptorRegistry = interceptorRegistry + a.interceptorRegistry = ir + if a.interceptorRegistry == nil { + a.interceptorRegistry = &interceptor.Registry{} + } } } diff --git a/api_test.go b/api_test.go index d9f90be435a..d2de28094bf 100644 --- a/api_test.go +++ b/api_test.go @@ -19,6 +19,10 @@ func TestNewAPI(t *testing.T) { if api.mediaEngine == nil { t.Error("Failed to init media engine") } + + if api.interceptorRegistry == nil { + t.Error("Failed to init interceptor registry") + } } func TestNewAPI_Options(t *testing.T) { @@ -40,3 +44,14 @@ func TestNewAPI_Options(t *testing.T) { t.Error("Failed to set media engine") } } + +func TestNewAPI_OptionsDefaultize(t *testing.T) { + api := NewAPI( + WithMediaEngine(nil), + WithInterceptorRegistry(nil), + ) + + assert.NotNil(t, api.settingEngine) + assert.NotNil(t, api.mediaEngine) + assert.NotNil(t, api.interceptorRegistry) +} diff --git a/mediaengine.go b/mediaengine.go index bdab6b9fb60..281652303e3 100644 --- a/mediaengine.go +++ b/mediaengine.go @@ -476,10 +476,10 @@ func (m *MediaEngine) updateFromRemoteDescription(desc sdp.SessionDescription) e for _, media := range desc.MediaDescriptions { var typ RTPCodecType switch { - case !m.negotiatedAudio && strings.EqualFold(media.MediaName.Media, "audio"): + case strings.EqualFold(media.MediaName.Media, "audio"): m.negotiatedAudio = true typ = RTPCodecTypeAudio - case !m.negotiatedVideo && strings.EqualFold(media.MediaName.Media, "video"): + case strings.EqualFold(media.MediaName.Media, "video"): m.negotiatedVideo = true typ = RTPCodecTypeVideo default: @@ -561,18 +561,24 @@ func (m *MediaEngine) getRTPParametersByKind(typ RTPCodecType, directions []RTPT m.mu.RLock() defer m.mu.RUnlock() - if m.negotiatedVideo && typ == RTPCodecTypeVideo || - m.negotiatedAudio && typ == RTPCodecTypeAudio { - for id, e := range m.negotiatedHeaderExtensions { - if haveRTPTransceiverDirectionIntersection(e.allowedDirections, directions) && (e.isAudio && typ == RTPCodecTypeAudio || e.isVideo && typ == RTPCodecTypeVideo) { - headerExtensions = append(headerExtensions, RTPHeaderExtensionParameter{ID: id, URI: e.uri}) - } - } + + var mediaHeaderExtensions map[int]mediaEngineHeaderExtension + if (m.negotiatedVideo && typ == RTPCodecTypeVideo) || (m.negotiatedAudio && typ == RTPCodecTypeAudio) { + mediaHeaderExtensions = m.negotiatedHeaderExtensions } else { + mediaHeaderExtensions = make(map[int]mediaEngineHeaderExtension, len(m.headerExtensions)) for id, e := range m.headerExtensions { - if haveRTPTransceiverDirectionIntersection(e.allowedDirections, directions) && (e.isAudio && typ == RTPCodecTypeAudio || e.isVideo && typ == RTPCodecTypeVideo) { - headerExtensions = append(headerExtensions, RTPHeaderExtensionParameter{ID: id + 1, URI: e.uri}) - } + mediaHeaderExtensions[id+1] = e + } + } + + for id, e := range mediaHeaderExtensions { + if (typ == RTPCodecTypeVideo && !e.isVideo) || (typ == RTPCodecTypeAudio && !e.isAudio) { + continue + } + + if haveRTPTransceiverDirectionIntersection(e.allowedDirections, directions) { + headerExtensions = append(headerExtensions, RTPHeaderExtensionParameter{ID: id, URI: e.uri}) } } diff --git a/peerconnection.go b/peerconnection.go index 9bb5e1be17e..60f2fb1940a 100644 --- a/peerconnection.go +++ b/peerconnection.go @@ -1083,6 +1083,14 @@ func (pc *PeerConnection) SetRemoteDescription(desc SessionDescription) error { _ = t.SetCodecPreferences(filteredCodecs) } + if extensions, err := rtpExtensionsFromMediaDescription(media); err == nil { + headerExtensions := []RTPHeaderExtensionParameter{} + for uri, id := range extensions { + headerExtensions = append(headerExtensions, RTPHeaderExtensionParameter{URI: uri, ID: id}) + } + _ = t.SetHeaderExtensions(headerExtensions) + } + case direction == RTPTransceiverDirectionRecvonly: if t.Direction() == RTPTransceiverDirectionSendrecv { t.setDirection(RTPTransceiverDirectionSendonly) @@ -1200,7 +1208,7 @@ func (pc *PeerConnection) startReceiver(incoming trackDetails, receiver *RTPRece for _, t := range receiver.Tracks() { if t.SSRC() == 0 || t.RID() != "" { - return + continue } go func(track *TrackRemote) { diff --git a/rtptransceiver.go b/rtptransceiver.go index 4ff4ead7d64..632f8e587b2 100644 --- a/rtptransceiver.go +++ b/rtptransceiver.go @@ -18,7 +18,8 @@ type RTPTransceiver struct { receiver atomic.Value // *RTPReceiver direction atomic.Value // RTPTransceiverDirection - codecs []RTPCodecParameters // User provided codecs via SetCodecPreferences + codecs []RTPCodecParameters // User provided codecs via SetCodecPreferences + headerExtensions []RTPHeaderExtensionParameter stopped bool kind RTPCodecType @@ -80,6 +81,29 @@ func (t *RTPTransceiver) getCodecs() []RTPCodecParameters { return filteredCodecs } +// SetHeaderExtensions stores header extension from SDP +func (t *RTPTransceiver) SetHeaderExtensions(headerExtensions []RTPHeaderExtensionParameter) error { + t.mu.Lock() + defer t.mu.Unlock() + + t.headerExtensions = headerExtensions + return nil +} + +func (t *RTPTransceiver) hasHeaderExtension(uri string) bool { + t.mu.RLock() + defer t.mu.RUnlock() + + for _, he := range t.headerExtensions { + if he.URI == uri { + return true + } + } + + return false +} + +// Codecs returns list of supported codecs // Sender returns the RTPTransceiver's RTPSender if it has one func (t *RTPTransceiver) Sender() *RTPSender { if v := t.sender.Load(); v != nil { diff --git a/sdp.go b/sdp.go index 1e1a96fc476..dcc12037fff 100644 --- a/sdp.go +++ b/sdp.go @@ -337,7 +337,18 @@ func populateLocalCandidates(sessionDescription *SessionDescription, i *ICEGathe } } -func addTransceiverSDP(d *sdp.SessionDescription, isPlanB, shouldAddCandidates bool, dtlsFingerprints []DTLSFingerprint, mediaEngine *MediaEngine, midValue string, iceParams ICEParameters, candidates []ICECandidate, dtlsRole sdp.ConnectionRole, iceGatheringState ICEGatheringState, mediaSection mediaSection) (bool, error) { +func addTransceiverSDP( + d *sdp.SessionDescription, + isPlanB, shouldAddCandidates bool, + dtlsFingerprints []DTLSFingerprint, + mediaEngine *MediaEngine, + midValue string, + iceParams ICEParameters, + candidates []ICECandidate, + dtlsRole sdp.ConnectionRole, + iceGatheringState ICEGatheringState, + mediaSection mediaSection, +) (bool, error) { transceivers := mediaSection.transceivers if len(transceivers) < 1 { return false, errSDPZeroTransceivers @@ -389,6 +400,10 @@ func addTransceiverSDP(d *sdp.SessionDescription, isPlanB, shouldAddCandidates b parameters := mediaEngine.getRTPParametersByKind(t.kind, directions) for _, rtpExtension := range parameters.HeaderExtensions { + if !t.hasHeaderExtension(rtpExtension.URI) { + continue + } + extURL, err := url.Parse(rtpExtension.URI) if err != nil { return false, err