From d1369d494d9403ac4f0a1c621132594c5915a1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Tue, 22 Nov 2022 22:40:12 +0100 Subject: [PATCH 1/7] VoiceRecordings: honor advanced audio processing settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audio processing settings introduced in #8759 is now taken into account when recording a voice message. Signed-off-by: László Várady --- src/audio/VoiceRecording.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/audio/VoiceRecording.ts b/src/audio/VoiceRecording.ts index 99f878868d5..682360c33aa 100644 --- a/src/audio/VoiceRecording.ts +++ b/src/audio/VoiceRecording.ts @@ -93,8 +93,10 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.recorderStream = await navigator.mediaDevices.getUserMedia({ audio: { channelCount: CHANNELS, - noiseSuppression: true, // browsers ignore constraints they can't honour deviceId: MediaDeviceHandler.getAudioInput(), + autoGainControl: { ideal: MediaDeviceHandler.getAudioAutoGainControl() }, + echoCancellation: { ideal: MediaDeviceHandler.getAudioEchoCancellation() }, + noiseSuppression: { ideal: MediaDeviceHandler.getAudioNoiseSuppression() }, }, }); this.recorderContext = createAudioContext({ From b37174af006b3ccca85671e81626efc18969ff27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Tue, 22 Nov 2022 22:51:53 +0100 Subject: [PATCH 2/7] VoiceRecordings: add higher-quality audio recording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When recording non-voice audio (e.g. music, FX), a different Opus encoder application should be specified. It is also recommended to increase the bitrate to 64-96 kb/s for musical use. Note: the HQ mode is currently activated when noise suppression is turned off. This is a very arbitrary condition. Signed-off-by: László Várady --- src/audio/VoiceRecording.ts | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/audio/VoiceRecording.ts b/src/audio/VoiceRecording.ts index 682360c33aa..e429bd2035f 100644 --- a/src/audio/VoiceRecording.ts +++ b/src/audio/VoiceRecording.ts @@ -32,12 +32,26 @@ import mxRecorderWorkletPath from "./RecorderWorklet"; const CHANNELS = 1; // stereo isn't important export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. -const BITRATE = 24000; // 24kbps is pretty high quality for our use case in opus. const TARGET_MAX_LENGTH = 900; // 15 minutes in seconds. Somewhat arbitrary, though longer == larger files. const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary. export const RECORDING_PLAYBACK_SAMPLES = 44; +interface RecorderOptions { + bitrate: number; + encoderApplication: number; +} + +export const voiceRecorderOptions: RecorderOptions = { + bitrate: 24000, + encoderApplication: 2048, +}; + +export const higQualityRecorderOptions: RecorderOptions = { + bitrate: 96000, + encoderApplication: 2049, +}; + export interface IRecordingUpdate { waveform: number[]; // floating points between 0 (low) and 1 (high). timeSeconds: number; // float @@ -88,6 +102,10 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.targetMaxLength = null; } + private shouldRecordInHighQuality(): boolean { + return !MediaDeviceHandler.getAudioNoiseSuppression(); + } + private async makeRecorder() { try { this.recorderStream = await navigator.mediaDevices.getUserMedia({ @@ -137,15 +155,17 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess); } + const hqRecording = this.shouldRecordInHighQuality(); this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, - encoderApplication: 2048, // voice (default is "audio") + encoderApplication: hqRecording ? higQualityRecorderOptions.encoderApplication + : voiceRecorderOptions.encoderApplication, streamPages: true, // this speeds up the encoding process by using CPU over time encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder numberOfChannels: CHANNELS, sourceNode: this.recorderSource, - encoderBitRate: BITRATE, + encoderBitRate: hqRecording ? higQualityRecorderOptions.bitrate : voiceRecorderOptions.bitrate, // We use low values for the following to ease CPU usage - the resulting waveform // is indistinguishable for a voice message. Note that the underlying library will From 38fb60c14c43b745e1c39f88e67d4f07b5a5985c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Sun, 27 Nov 2022 14:46:34 +0100 Subject: [PATCH 3/7] RecorderWorklet: fix type mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/audio/VoiceRecording.ts:129:67 - Argument of type 'null' is not assignable to parameter of type 'string | URL'. Signed-off-by: László Várady --- src/audio/RecorderWorklet.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/audio/RecorderWorklet.ts b/src/audio/RecorderWorklet.ts index 73b053db936..58348a2cd57 100644 --- a/src/audio/RecorderWorklet.ts +++ b/src/audio/RecorderWorklet.ts @@ -85,4 +85,4 @@ class MxVoiceWorklet extends AudioWorkletProcessor { registerProcessor(WORKLET_NAME, MxVoiceWorklet); -export default null; // to appease module loaders (we never use the export) +export default ""; // to appease module loaders (we never use the export) From d3b6064d8ab691e931f611458851fa31365dbfa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Sun, 27 Nov 2022 17:56:37 +0100 Subject: [PATCH 4/7] VoiceRecording: test audio settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- src/audio/VoiceRecording.ts | 3 +- test/audio/VoiceRecording-test.ts | 70 ++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/audio/VoiceRecording.ts b/src/audio/VoiceRecording.ts index e429bd2035f..29217d0b0c8 100644 --- a/src/audio/VoiceRecording.ts +++ b/src/audio/VoiceRecording.ts @@ -14,7 +14,8 @@ See the License for the specific language governing permissions and limitations under the License. */ -import * as Recorder from 'opus-recorder'; +// @ts-ignore +import Recorder from 'opus-recorder/dist/recorder.min.js'; import encoderPath from 'opus-recorder/dist/encoderWorker.min.js'; import { SimpleObservable } from "matrix-widget-api"; import EventEmitter from "events"; diff --git a/test/audio/VoiceRecording-test.ts b/test/audio/VoiceRecording-test.ts index ac4f52eabe2..dabb30f33ad 100644 --- a/test/audio/VoiceRecording-test.ts +++ b/test/audio/VoiceRecording-test.ts @@ -14,7 +14,24 @@ See the License for the specific language governing permissions and limitations under the License. */ -import { VoiceRecording } from "../../src/audio/VoiceRecording"; +import { mocked } from 'jest-mock'; +// @ts-ignore +import Recorder from 'opus-recorder/dist/recorder.min.js'; + +import { VoiceRecording, voiceRecorderOptions, higQualityRecorderOptions } from "../../src/audio/VoiceRecording"; +import { createAudioContext } from '../..//src/audio/compat'; +import MediaDeviceHandler from "../../src/MediaDeviceHandler"; + +jest.mock('opus-recorder/dist/recorder.min.js'); +const RecorderMock = mocked(Recorder); + +jest.mock('../../src/audio/compat', () => ({ + createAudioContext: jest.fn(), +})); +const createAudioContextMock = mocked(createAudioContext); + +jest.mock("../../src/MediaDeviceHandler"); +const MediaDeviceHandlerMock = mocked(MediaDeviceHandler); /** * The tests here are heavily using access to private props. @@ -43,6 +60,7 @@ describe("VoiceRecording", () => { // @ts-ignore recording.observable = { update: jest.fn(), + close: jest.fn(), }; jest.spyOn(recording, "stop").mockImplementation(); recorderSecondsSpy = jest.spyOn(recording, "recorderSeconds", "get"); @@ -52,6 +70,56 @@ describe("VoiceRecording", () => { jest.resetAllMocks(); }); + describe("when starting a recording", () => { + beforeEach(() => { + const mockAudioContext = { + createMediaStreamSource: jest.fn().mockReturnValue({ + connect: jest.fn(), + disconnect: jest.fn(), + }), + createScriptProcessor: jest.fn().mockReturnValue({ + connect: jest.fn(), + disconnect: jest.fn(), + addEventListener: jest.fn(), + removeEventListener: jest.fn(), + }), + destination: {}, + close: jest.fn(), + }; + createAudioContextMock.mockReturnValue(mockAudioContext as unknown as AudioContext); + }); + + afterEach(async () => { + await recording.stop(); + }); + + it("should record high-quality audio if voice processing is disabled", async () => { + MediaDeviceHandlerMock.getAudioNoiseSuppression.mockReturnValue(false); + await recording.start(); + + expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledWith(expect.objectContaining({ + audio: expect.objectContaining({ noiseSuppression: { ideal: false } }), + })); + expect(RecorderMock).toHaveBeenCalledWith(expect.objectContaining({ + encoderBitRate: higQualityRecorderOptions.bitrate, + encoderApplication: higQualityRecorderOptions.encoderApplication, + })); + }); + + it("should record normal-quality voice if voice processing is enabled", async () => { + MediaDeviceHandlerMock.getAudioNoiseSuppression.mockReturnValue(true); + await recording.start(); + + expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledWith(expect.objectContaining({ + audio: expect.objectContaining({ noiseSuppression: { ideal: true } }), + })); + expect(RecorderMock).toHaveBeenCalledWith(expect.objectContaining({ + encoderBitRate: voiceRecorderOptions.bitrate, + encoderApplication: voiceRecorderOptions.encoderApplication, + })); + }); + }); + describe("when recording", () => { beforeEach(() => { // @ts-ignore From 6de57947badf43a8c3b0fc4383be302e7d16f2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Sat, 3 Dec 2022 16:08:54 +0100 Subject: [PATCH 5/7] Fix typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- src/audio/VoiceRecording.ts | 6 +++--- test/audio/VoiceRecording-test.ts | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/audio/VoiceRecording.ts b/src/audio/VoiceRecording.ts index 29217d0b0c8..0f504ba5627 100644 --- a/src/audio/VoiceRecording.ts +++ b/src/audio/VoiceRecording.ts @@ -48,7 +48,7 @@ export const voiceRecorderOptions: RecorderOptions = { encoderApplication: 2048, }; -export const higQualityRecorderOptions: RecorderOptions = { +export const highQualityRecorderOptions: RecorderOptions = { bitrate: 96000, encoderApplication: 2049, }; @@ -160,13 +160,13 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, - encoderApplication: hqRecording ? higQualityRecorderOptions.encoderApplication + encoderApplication: hqRecording ? highQualityRecorderOptions.encoderApplication : voiceRecorderOptions.encoderApplication, streamPages: true, // this speeds up the encoding process by using CPU over time encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder numberOfChannels: CHANNELS, sourceNode: this.recorderSource, - encoderBitRate: hqRecording ? higQualityRecorderOptions.bitrate : voiceRecorderOptions.bitrate, + encoderBitRate: hqRecording ? highQualityRecorderOptions.bitrate : voiceRecorderOptions.bitrate, // We use low values for the following to ease CPU usage - the resulting waveform // is indistinguishable for a voice message. Note that the underlying library will diff --git a/test/audio/VoiceRecording-test.ts b/test/audio/VoiceRecording-test.ts index dabb30f33ad..3a194af0600 100644 --- a/test/audio/VoiceRecording-test.ts +++ b/test/audio/VoiceRecording-test.ts @@ -18,7 +18,7 @@ import { mocked } from 'jest-mock'; // @ts-ignore import Recorder from 'opus-recorder/dist/recorder.min.js'; -import { VoiceRecording, voiceRecorderOptions, higQualityRecorderOptions } from "../../src/audio/VoiceRecording"; +import { VoiceRecording, voiceRecorderOptions, highQualityRecorderOptions } from "../../src/audio/VoiceRecording"; import { createAudioContext } from '../..//src/audio/compat'; import MediaDeviceHandler from "../../src/MediaDeviceHandler"; @@ -101,8 +101,8 @@ describe("VoiceRecording", () => { audio: expect.objectContaining({ noiseSuppression: { ideal: false } }), })); expect(RecorderMock).toHaveBeenCalledWith(expect.objectContaining({ - encoderBitRate: higQualityRecorderOptions.bitrate, - encoderApplication: higQualityRecorderOptions.encoderApplication, + encoderBitRate: highQualityRecorderOptions.bitrate, + encoderApplication: highQualityRecorderOptions.encoderApplication, })); }); From 9a12cfbb6710db79b7c276588d3427ef805e3836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Sat, 3 Dec 2022 16:10:36 +0100 Subject: [PATCH 6/7] VoiceRecording: refactor using destructuring assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- src/audio/VoiceRecording.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/audio/VoiceRecording.ts b/src/audio/VoiceRecording.ts index 0f504ba5627..64a415e6e66 100644 --- a/src/audio/VoiceRecording.ts +++ b/src/audio/VoiceRecording.ts @@ -156,17 +156,19 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess); } - const hqRecording = this.shouldRecordInHighQuality(); + const recorderOptions = this.shouldRecordInHighQuality() ? + highQualityRecorderOptions : voiceRecorderOptions; + const { encoderApplication, bitrate } = recorderOptions; + this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, - encoderApplication: hqRecording ? highQualityRecorderOptions.encoderApplication - : voiceRecorderOptions.encoderApplication, + encoderApplication: encoderApplication, streamPages: true, // this speeds up the encoding process by using CPU over time encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder numberOfChannels: CHANNELS, sourceNode: this.recorderSource, - encoderBitRate: hqRecording ? highQualityRecorderOptions.bitrate : voiceRecorderOptions.bitrate, + encoderBitRate: bitrate, // We use low values for the following to ease CPU usage - the resulting waveform // is indistinguishable for a voice message. Note that the underlying library will From c389aa9e866997ceabfc2753b981be689f06aa23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20V=C3=A1rady?= Date: Sat, 3 Dec 2022 16:24:07 +0100 Subject: [PATCH 7/7] VoiceRecording: add comments about constants and non-trivial conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: László Várady --- src/audio/VoiceRecording.ts | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/audio/VoiceRecording.ts b/src/audio/VoiceRecording.ts index 64a415e6e66..52b43ee3b51 100644 --- a/src/audio/VoiceRecording.ts +++ b/src/audio/VoiceRecording.ts @@ -44,13 +44,13 @@ interface RecorderOptions { } export const voiceRecorderOptions: RecorderOptions = { - bitrate: 24000, - encoderApplication: 2048, + bitrate: 24000, // recommended Opus bitrate for high-quality VoIP + encoderApplication: 2048, // voice }; export const highQualityRecorderOptions: RecorderOptions = { - bitrate: 96000, - encoderApplication: 2049, + bitrate: 96000, // recommended Opus bitrate for high-quality music/audio streaming + encoderApplication: 2049, // full band audio }; export interface IRecordingUpdate { @@ -104,6 +104,9 @@ export class VoiceRecording extends EventEmitter implements IDestroyable { } private shouldRecordInHighQuality(): boolean { + // Non-voice use case is suspected when noise suppression is disabled by the user. + // When recording complex audio, higher quality is required to avoid audio artifacts. + // This is a really arbitrary decision, but it can be refined/replaced at any time. return !MediaDeviceHandler.getAudioNoiseSuppression(); }