Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to by pass audio encoder if pre-encoded set #120

Draft
wants to merge 4 commits into
base: m114_release
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/audio_codecs/L16/audio_encoder_L16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ absl::optional<AudioEncoderL16::Config> AudioEncoderL16::SdpToConfig(
}
Config config;
config.sample_rate_hz = format.clockrate_hz;
config.pre_encoded = format.pre_encoded;
config.num_channels = rtc::dchecked_cast<int>(format.num_channels);
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
Expand Down Expand Up @@ -66,6 +67,7 @@ std::unique_ptr<AudioEncoder> AudioEncoderL16::MakeAudioEncoder(
c.num_channels = config.num_channels;
c.frame_size_ms = config.frame_size_ms;
c.payload_type = payload_type;
c.pre_encoded = config.pre_encoded;
if (!config.IsOk()) {
RTC_DCHECK_NOTREACHED();
return nullptr;
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/L16/audio_encoder_L16.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct RTC_EXPORT AudioEncoderL16 {
int sample_rate_hz = 8000;
int num_channels = 1;
int frame_size_ms = 10;
bool pre_encoded = false;
};
static absl::optional<Config> SdpToConfig(const SdpAudioFormat& audio_format);
static void AppendSupportedEncoders(std::vector<AudioCodecSpec>* specs);
Expand Down
19 changes: 19 additions & 0 deletions api/audio_codecs/audio_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,24 @@ ANAStats AudioEncoder::GetANAStats() const {
return ANAStats();
}

size_t AudioEncoder::AppendPreEncodeData(rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) {
union int16 {
int16_t val;
uint8_t arr[sizeof(int16_t)];
};

const size_t old_size = encoded->size();

for (const int16_t it : audio) {
union int16 i16 {
.val = it,
};
encoded->AppendData(i16.arr, sizeof(int16_t));
}

return encoded->size() - old_size;
}

constexpr int AudioEncoder::kMaxNumberOfChannels;
} // namespace webrtc
4 changes: 4 additions & 0 deletions api/audio_codecs/audio_encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,10 @@ class AudioEncoder {
virtual EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded) = 0;

// The AppendPreEncodeData function adds raw audio data to the end of the encoded buffer.
virtual size_t AppendPreEncodeData(rtc::ArrayView<const int16_t> audio,
rtc::Buffer* encoded);
};
} // namespace webrtc
#endif // API_AUDIO_CODECS_AUDIO_ENCODER_H_
11 changes: 8 additions & 3 deletions api/audio_codecs/audio_format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ SdpAudioFormat::SdpAudioFormat(SdpAudioFormat&&) = default;
SdpAudioFormat::SdpAudioFormat(absl::string_view name,
int clockrate_hz,
size_t num_channels)
: name(name), clockrate_hz(clockrate_hz), num_channels(num_channels) {}
: name(name),
clockrate_hz(clockrate_hz),
num_channels(num_channels),
pre_encoded(false) {}

SdpAudioFormat::SdpAudioFormat(absl::string_view name,
int clockrate_hz,
Expand All @@ -31,7 +34,8 @@ SdpAudioFormat::SdpAudioFormat(absl::string_view name,
: name(name),
clockrate_hz(clockrate_hz),
num_channels(num_channels),
parameters(param) {}
parameters(param),
pre_encoded(false) {}

SdpAudioFormat::SdpAudioFormat(absl::string_view name,
int clockrate_hz,
Expand All @@ -40,7 +44,8 @@ SdpAudioFormat::SdpAudioFormat(absl::string_view name,
: name(name),
clockrate_hz(clockrate_hz),
num_channels(num_channels),
parameters(std::move(param)) {}
parameters(std::move(param)),
pre_encoded(false) {}

bool SdpAudioFormat::Matches(const SdpAudioFormat& o) const {
return absl::EqualsIgnoreCase(name, o.name) &&
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/audio_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct RTC_EXPORT SdpAudioFormat {
int clockrate_hz;
size_t num_channels;
Parameters parameters;
bool pre_encoded;
};

// Information about how an audio format is treated by the codec implementation.
Expand Down
3 changes: 3 additions & 0 deletions api/audio_codecs/g711/audio_encoder_g711.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ absl::optional<AudioEncoderG711::Config> AudioEncoderG711::SdpToConfig(
config.type = is_pcmu ? Config::Type::kPcmU : Config::Type::kPcmA;
config.num_channels = rtc::dchecked_cast<int>(format.num_channels);
config.frame_size_ms = 20;
config.pre_encoded = format.pre_encoded;
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
const auto ptime = rtc::StringToNumber<int>(ptime_iter->second);
Expand Down Expand Up @@ -75,13 +76,15 @@ std::unique_ptr<AudioEncoder> AudioEncoderG711::MakeAudioEncoder(
AudioEncoderPcmU::Config impl_config;
impl_config.num_channels = config.num_channels;
impl_config.frame_size_ms = config.frame_size_ms;
impl_config.pre_encoded = config.pre_encoded;
impl_config.payload_type = payload_type;
return std::make_unique<AudioEncoderPcmU>(impl_config);
}
case Config::Type::kPcmA: {
AudioEncoderPcmA::Config impl_config;
impl_config.num_channels = config.num_channels;
impl_config.frame_size_ms = config.frame_size_ms;
impl_config.pre_encoded = config.pre_encoded;
impl_config.payload_type = payload_type;
return std::make_unique<AudioEncoderPcmA>(impl_config);
}
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/g711/audio_encoder_g711.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct RTC_EXPORT AudioEncoderG711 {
Type type = Type::kPcmU;
int num_channels = 1;
int frame_size_ms = 20;
bool pre_encoded = false;
};
static absl::optional<AudioEncoderG711::Config> SdpToConfig(
const SdpAudioFormat& audio_format);
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/g722/audio_encoder_g722.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ absl::optional<AudioEncoderG722Config> AudioEncoderG722::SdpToConfig(

AudioEncoderG722Config config;
config.num_channels = rtc::checked_cast<int>(format.num_channels);
config.pre_encoded = format.pre_encoded;
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
auto ptime = rtc::StringToNumber<int>(ptime_iter->second);
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/g722/audio_encoder_g722_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct AudioEncoderG722Config {
}
int frame_size_ms = 20;
int num_channels = 1;
bool pre_encoded = false;
};

} // namespace webrtc
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/ilbc/audio_encoder_ilbc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ absl::optional<AudioEncoderIlbcConfig> AudioEncoderIlbc::SdpToConfig(
}

AudioEncoderIlbcConfig config;
config.pre_encoded = format.pre_encoded;
auto ptime_iter = format.parameters.find("ptime");
if (ptime_iter != format.parameters.end()) {
auto ptime = rtc::StringToNumber<int>(ptime_iter->second);
Expand Down
1 change: 1 addition & 0 deletions api/audio_codecs/ilbc/audio_encoder_ilbc_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct AudioEncoderIlbcConfig {
int frame_size_ms = 30; // Valid values are 20, 30, 40, and 60 ms.
// Note that frame size 40 ms produces encodings with two 20 ms frames in
// them, and frame size 60 ms consists of two 30 ms frames.
bool pre_encoded = false;
};

} // namespace webrtc
Expand Down
3 changes: 2 additions & 1 deletion api/audio_codecs/opus/audio_encoder_opus_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ AudioEncoderOpusConfig::AudioEncoderOpusConfig()
complexity_threshold_window_bps(1500),
dtx_enabled(false),
uplink_bandwidth_update_interval_ms(200),
payload_type(-1) {}
payload_type(-1),
pre_encoded(false) {}
AudioEncoderOpusConfig::AudioEncoderOpusConfig(const AudioEncoderOpusConfig&) =
default;
AudioEncoderOpusConfig::~AudioEncoderOpusConfig() = default;
Expand Down
2 changes: 2 additions & 0 deletions api/audio_codecs/opus/audio_encoder_opus_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ struct RTC_EXPORT AudioEncoderOpusConfig {
// NOTE: This member isn't necessary, and will soon go away. See
// https://bugs.chromium.org/p/webrtc/issues/detail?id=7847
int payload_type;

bool pre_encoded;
};

} // namespace webrtc
Expand Down
5 changes: 4 additions & 1 deletion api/audio_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void AudioOptions::SetAll(const AudioOptions& change) {
SetFrom(&audio_network_adaptor, change.audio_network_adaptor);
SetFrom(&audio_network_adaptor_config, change.audio_network_adaptor_config);
SetFrom(&init_recording_on_send, change.init_recording_on_send);
SetFrom(&pre_encoded, change.pre_encoded);
}

bool AudioOptions::operator==(const AudioOptions& o) const {
Expand All @@ -75,7 +76,8 @@ bool AudioOptions::operator==(const AudioOptions& o) const {
combined_audio_video_bwe == o.combined_audio_video_bwe &&
audio_network_adaptor == o.audio_network_adaptor &&
audio_network_adaptor_config == o.audio_network_adaptor_config &&
init_recording_on_send == o.init_recording_on_send;
init_recording_on_send == o.init_recording_on_send &&
pre_encoded == o.pre_encoded;
}

std::string AudioOptions::ToString() const {
Expand All @@ -100,6 +102,7 @@ std::string AudioOptions::ToString() const {
ToStringIfSet(&result, "combined_audio_video_bwe", combined_audio_video_bwe);
ToStringIfSet(&result, "audio_network_adaptor", audio_network_adaptor);
ToStringIfSet(&result, "init_recording_on_send", init_recording_on_send);
ToStringIfSet(&result, "pre_encoded", pre_encoded);
result << "}";
return result.str();
}
Expand Down
2 changes: 2 additions & 0 deletions api/audio_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct RTC_EXPORT AudioOptions {
// true.
// TODO(webrtc:13566): Remove this option. See issue for details.
absl::optional<bool> init_recording_on_send;
// Audio is already pre-encoded, so we can pass encoding.
absl::optional<bool> pre_encoded;
};

} // namespace cricket
Expand Down
2 changes: 1 addition & 1 deletion audio/audio_transport_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
size_t& nSamplesOut,
int64_t* elapsed_time_ms,
int64_t* ntp_time_ms) {
TRACE_EVENT0("webrtc", "AudioTransportImpl::SendProcessedData");
TRACE_EVENT0("webrtc", "AudioTransportImpl::NeedMorePlayData");
RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
RTC_DCHECK_GE(nChannels, 1);
RTC_DCHECK_LE(nChannels, 2);
Expand Down
2 changes: 2 additions & 0 deletions media/engine/webrtc_voice_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ void WebRtcVoiceEngine::Init() {
options.audio_jitter_buffer_max_packets = 200;
options.audio_jitter_buffer_fast_accelerate = false;
options.audio_jitter_buffer_min_delay_ms = 0;
options.pre_encoded = false;
ApplyOptions(options);
}
initialized_ = true;
Expand Down Expand Up @@ -1650,6 +1651,7 @@ bool WebRtcVoiceMediaChannel::SetSendCodecs(
IsCodec(voice_codec, kRedCodecName))) {
webrtc::SdpAudioFormat format(voice_codec.name, voice_codec.clockrate,
voice_codec.channels, voice_codec.params);
format.pre_encoded = options_.pre_encoded.value_or(false);

voice_codec_info = engine()->encoder_factory_->QueryAudioEncoder(format);
if (!voice_codec_info) {
Expand Down
20 changes: 12 additions & 8 deletions modules/audio_coding/codecs/g711/audio_encoder_pcm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ AudioEncoderPcm::AudioEncoderPcm(const Config& config, int sample_rate_hz)
static_cast<size_t>(config.frame_size_ms / 10)),
full_frame_samples_(config.num_channels * config.frame_size_ms *
sample_rate_hz / 1000),
first_timestamp_in_buffer_(0) {
first_timestamp_in_buffer_(0),
pre_encoded_(config.pre_encoded) {
RTC_CHECK_GT(sample_rate_hz, 0) << "Sample rate must be larger than 0 Hz";
RTC_CHECK_EQ(config.frame_size_ms % 10, 0)
<< "Frame size must be an integer multiple of 10 ms.";
Expand Down Expand Up @@ -74,13 +75,16 @@ AudioEncoder::EncodedInfo AudioEncoderPcm::EncodeImpl(
EncodedInfo info;
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.encoded_bytes = encoded->AppendData(
full_frame_samples_ * BytesPerSample(),
[&](rtc::ArrayView<uint8_t> encoded) {
return EncodeCall(&speech_buffer_[0], full_frame_samples_,
encoded.data());
});
speech_buffer_.clear();
if (pre_encoded_) {
info.encoded_bytes = AppendPreEncodeData(audio, encoded);
} else {
info.encoded_bytes = encoded->AppendData(
full_frame_samples_ * BytesPerSample(),
[&](rtc::ArrayView<uint8_t> encoded) {
return EncodeCall(&speech_buffer_[0], full_frame_samples_,
encoded.data());
});
} speech_buffer_.clear();
info.encoder_type = GetCodecType();
return info;
}
Expand Down
4 changes: 3 additions & 1 deletion modules/audio_coding/codecs/g711/audio_encoder_pcm.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ class AudioEncoderPcm : public AudioEncoder {
int frame_size_ms;
size_t num_channels;
int payload_type;
bool pre_encoded;

protected:
explicit Config(int pt)
: frame_size_ms(20), num_channels(1), payload_type(pt) {}
: frame_size_ms(20), num_channels(1), payload_type(pt), pre_encoded(false) {}
};

~AudioEncoderPcm() override;
Expand Down Expand Up @@ -67,6 +68,7 @@ class AudioEncoderPcm : public AudioEncoder {
const int sample_rate_hz_;
const size_t num_channels_;
const int payload_type_;
bool pre_encoded_;
const size_t num_10ms_frames_per_packet_;
const size_t full_frame_samples_;
std::vector<int16_t> speech_buffer_;
Expand Down
69 changes: 38 additions & 31 deletions modules/audio_coding/codecs/g722/audio_encoder_g722.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ AudioEncoderG722Impl::AudioEncoderG722Impl(const AudioEncoderG722Config& config,
num_10ms_frames_buffered_(0),
first_timestamp_in_buffer_(0),
encoders_(new EncoderState[num_channels_]),
interleave_buffer_(2 * num_channels_) {
interleave_buffer_(2 * num_channels_),
pre_encoded_(config.pre_encoded) {
RTC_CHECK(config.IsOk());
const size_t samples_per_channel =
kSampleRateHz / 100 * num_10ms_frames_per_packet_;
Expand Down Expand Up @@ -103,38 +104,44 @@ AudioEncoder::EncodedInfo AudioEncoderG722Impl::EncodeImpl(
return EncodedInfo();
}

// Encode each channel separately.
RTC_CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_);
num_10ms_frames_buffered_ = 0;
const size_t samples_per_channel = SamplesPerChannel();
for (size_t i = 0; i < num_channels_; ++i) {
const size_t bytes_encoded = WebRtcG722_Encode(
encoders_[i].encoder, encoders_[i].speech_buffer.get(),
samples_per_channel, encoders_[i].encoded_buffer.data());
RTC_CHECK_EQ(bytes_encoded, samples_per_channel / 2);
}

const size_t bytes_to_encode = samples_per_channel / 2 * num_channels_;
EncodedInfo info;
info.encoded_bytes = encoded->AppendData(
bytes_to_encode, [&](rtc::ArrayView<uint8_t> encoded) {
// Interleave the encoded bytes of the different channels. Each separate
// channel and the interleaved stream encodes two samples per byte, most
// significant half first.
for (size_t i = 0; i < samples_per_channel / 2; ++i) {
for (size_t j = 0; j < num_channels_; ++j) {
uint8_t two_samples = encoders_[j].encoded_buffer.data()[i];
interleave_buffer_.data()[j] = two_samples >> 4;
interleave_buffer_.data()[num_channels_ + j] = two_samples & 0xf;
if (pre_encoded_) {
info.encoded_bytes = AppendPreEncodeData(audio, encoded);
} else {
// Encode each channel separately.
RTC_CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_);
num_10ms_frames_buffered_ = 0;
const size_t samples_per_channel = SamplesPerChannel();
for (size_t i = 0; i < num_channels_; ++i) {
const size_t bytes_encoded = WebRtcG722_Encode(
encoders_[i].encoder, encoders_[i].speech_buffer.get(),
samples_per_channel, encoders_[i].encoded_buffer.data());
RTC_CHECK_EQ(bytes_encoded, samples_per_channel / 2);
}

const size_t bytes_to_encode = samples_per_channel / 2 * num_channels_;

info.encoded_bytes = encoded->AppendData(
bytes_to_encode, [&](rtc::ArrayView<uint8_t> encoded) {
// Interleave the encoded bytes of the different channels. Each separate
// channel and the interleaved stream encodes two samples per byte, most
// significant half first.
for (size_t i = 0; i < samples_per_channel / 2; ++i) {
for (size_t j = 0; j < num_channels_; ++j) {
uint8_t two_samples = encoders_[j].encoded_buffer.data()[i];
interleave_buffer_.data()[j] = two_samples >> 4;
interleave_buffer_.data()[num_channels_ + j] = two_samples & 0xf;
}
for (size_t j = 0; j < num_channels_; ++j)
encoded[i * num_channels_ + j] =
interleave_buffer_.data()[2 * j] << 4 |
interleave_buffer_.data()[2 * j + 1];
}
for (size_t j = 0; j < num_channels_; ++j)
encoded[i * num_channels_ + j] =
interleave_buffer_.data()[2 * j] << 4 |
interleave_buffer_.data()[2 * j + 1];
}

return bytes_to_encode;
});

return bytes_to_encode;
});
}

info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.encoder_type = CodecType::kG722;
Expand Down
1 change: 1 addition & 0 deletions modules/audio_coding/codecs/g722/audio_encoder_g722.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class AudioEncoderG722Impl final : public AudioEncoder {
uint32_t first_timestamp_in_buffer_;
const std::unique_ptr<EncoderState[]> encoders_;
rtc::Buffer interleave_buffer_;
bool pre_encoded_;
};

} // namespace webrtc
Expand Down
Loading