Skip to content

Commit

Permalink
Auto merge of #80115 - tgnottingham:specialize_opaque_u8_sequences, r…
Browse files Browse the repository at this point in the history
…=oli-obk

rustc_serialize: specialize opaque encoding and decoding of some u8 sequences

This specializes encoding and decoding of some contiguous u8 sequences to use a more efficient implementation. The default implementations process each u8 individually, but that isn't necessary for the opaque encoder and decoder. The opaque encoding for u8s is a no-op, so we can just copy entire sequences as-is, rather than process them byte by byte.

This also changes some encode and decode implementations for contiguous sequences to forward to the slice and vector implementations, so that they can take advantage of the new specialization when applicable.
  • Loading branch information
bors committed Jan 2, 2021
2 parents 5986dd8 + be79f49 commit 929f66a
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 47 deletions.
4 changes: 2 additions & 2 deletions compiler/rustc_data_structures/src/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use rustc_serialize::{
Decodable, Encodable,
};
use std::hash::{Hash, Hasher};
use std::mem;
use std::mem::{self, MaybeUninit};

#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)]
pub struct Fingerprint(u64, u64);
Expand Down Expand Up @@ -61,7 +61,7 @@ impl Fingerprint {
}

pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result<Fingerprint, String> {
let mut bytes = [0; 16];
let mut bytes: [MaybeUninit<u8>; 16] = MaybeUninit::uninit_array();

decoder.read_raw_bytes(&mut bytes)?;

Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_macros/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ fn encodable_body(
#field_name,
#field_idx,
|__encoder|
::rustc_serialize::Encodable::encode(#bind_ident, __encoder),
::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder),
) {
::std::result::Result::Ok(()) => (),
::std::result::Result::Err(__err)
Expand Down Expand Up @@ -237,7 +237,7 @@ fn encodable_body(
__encoder,
#field_idx,
|__encoder|
::rustc_serialize::Encodable::encode(#bind_ident, __encoder),
::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder),
) {
::std::result::Result::Ok(()) => (),
::std::result::Result::Err(__err)
Expand Down
19 changes: 19 additions & 0 deletions compiler/rustc_middle/src/ty/query/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,15 @@ impl<'a, 'tcx> TyDecoder<'tcx> for CacheDecoder<'a, 'tcx> {

crate::implement_ty_decoder!(CacheDecoder<'a, 'tcx>);

// This ensures that the `Decodable<opaque::Decoder>::decode` specialization for `Vec<u8>` is used
// when a `CacheDecoder` is passed to `Decodable::decode`. Unfortunately, we have to manually opt
// into specializations this way, given how `CacheDecoder` and the decoding traits currently work.
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Vec<u8> {
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Result<Self, String> {
Decodable::decode(&mut d.opaque)
}
}

impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for SyntaxContext {
fn decode(decoder: &mut CacheDecoder<'a, 'tcx>) -> Result<Self, String> {
let syntax_contexts = decoder.syntax_contexts;
Expand Down Expand Up @@ -1149,6 +1158,16 @@ where
}
}

// This ensures that the `Encodable<opaque::Encoder>::encode` specialization for byte slices
// is used when a `CacheEncoder` having an `opaque::Encoder` is passed to `Encodable::encode`.
// Unfortunately, we have to manually opt into specializations this way, given how `CacheEncoder`
// and the encoding traits currently work.
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx, opaque::Encoder>> for [u8] {
fn encode(&self, e: &mut CacheEncoder<'a, 'tcx, opaque::Encoder>) -> opaque::EncodeResult {
self.encode(e.encoder)
}
}

// An integer that will always encode to 8 bytes.
struct IntEncodedWithFixedSize(u64);

Expand Down
42 changes: 10 additions & 32 deletions compiler/rustc_serialize/src/collection_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@ use smallvec::{Array, SmallVec};

impl<S: Encoder, A: Array<Item: Encodable<S>>> Encodable<S> for SmallVec<A> {
fn encode(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_seq(self.len(), |s| {
for (i, e) in self.iter().enumerate() {
s.emit_seq_elt(i, |s| e.encode(s))?;
}
Ok(())
})
let slice: &[A::Item] = self;
slice.encode(s)
}
}

Expand Down Expand Up @@ -292,46 +288,28 @@ where

impl<E: Encoder, T: Encodable<E>> Encodable<E> for Rc<[T]> {
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
s.emit_seq(self.len(), |s| {
for (index, e) in self.iter().enumerate() {
s.emit_seq_elt(index, |s| e.encode(s))?;
}
Ok(())
})
let slice: &[T] = self;
slice.encode(s)
}
}

impl<D: Decoder, T: Decodable<D>> Decodable<D> for Rc<[T]> {
fn decode(d: &mut D) -> Result<Rc<[T]>, D::Error> {
d.read_seq(|d, len| {
let mut vec = Vec::with_capacity(len);
for index in 0..len {
vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?);
}
Ok(vec.into())
})
let vec: Vec<T> = Decodable::decode(d)?;
Ok(vec.into())
}
}

impl<E: Encoder, T: Encodable<E>> Encodable<E> for Arc<[T]> {
fn encode(&self, s: &mut E) -> Result<(), E::Error> {
s.emit_seq(self.len(), |s| {
for (index, e) in self.iter().enumerate() {
s.emit_seq_elt(index, |s| e.encode(s))?;
}
Ok(())
})
let slice: &[T] = self;
slice.encode(s)
}
}

impl<D: Decoder, T: Decodable<D>> Decodable<D> for Arc<[T]> {
fn decode(d: &mut D) -> Result<Arc<[T]>, D::Error> {
d.read_seq(|d, len| {
let mut vec = Vec::with_capacity(len);
for index in 0..len {
vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?);
}
Ok(vec.into())
})
let vec: Vec<T> = Decodable::decode(d)?;
Ok(vec.into())
}
}
2 changes: 2 additions & 0 deletions compiler/rustc_serialize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Core encoding and decoding interfaces.
#![feature(nll)]
#![feature(associated_type_bounds)]
#![cfg_attr(bootstrap, feature(min_const_generics))]
#![feature(min_specialization)]
#![feature(vec_spare_capacity)]
#![cfg_attr(test, feature(test))]
#![allow(rustc::internal)]

Expand Down
47 changes: 45 additions & 2 deletions compiler/rustc_serialize/src/opaque.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use crate::leb128::{self, read_signed_leb128, write_signed_leb128};
use crate::serialize;
use std::borrow::Cow;
use std::mem::MaybeUninit;
use std::ptr;

// -----------------------------------------------------------------------------
// Encoder
Expand Down Expand Up @@ -179,11 +181,19 @@ impl<'a> Decoder<'a> {
}

#[inline]
pub fn read_raw_bytes(&mut self, s: &mut [u8]) -> Result<(), String> {
pub fn read_raw_bytes(&mut self, s: &mut [MaybeUninit<u8>]) -> Result<(), String> {
let start = self.position;
let end = start + s.len();
assert!(end <= self.data.len());

s.copy_from_slice(&self.data[start..end]);
// SAFETY: Both `src` and `dst` point to at least `s.len()` elements:
// `src` points to at least `s.len()` elements by above assert, and
// `dst` points to `s.len()` elements by derivation from `s`.
unsafe {
let src = self.data.as_ptr().add(start);
let dst = s.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(src, dst, s.len());
}

self.position = end;

Expand Down Expand Up @@ -316,3 +326,36 @@ impl<'a> serialize::Decoder for Decoder<'a> {
err.to_string()
}
}

// Specializations for contiguous byte sequences follow. The default implementations for slices
// encode and decode each element individually. This isn't necessary for `u8` slices when using
// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding.
// Therefore, we can use more efficient implementations that process the entire sequence at once.

// Specialize encoding byte slices. This specialization also applies to encoding `Vec<u8>`s, etc.,
// since the default implementations call `encode` on their slices internally.
impl serialize::Encodable<Encoder> for [u8] {
fn encode(&self, e: &mut Encoder) -> EncodeResult {
serialize::Encoder::emit_usize(e, self.len())?;
e.emit_raw_bytes(self);
Ok(())
}
}

// Specialize decoding `Vec<u8>`. This specialization also applies to decoding `Box<[u8]>`s, etc.,
// since the default implementations call `decode` to produce a `Vec<u8>` internally.
impl<'a> serialize::Decodable<Decoder<'a>> for Vec<u8> {
fn decode(d: &mut Decoder<'a>) -> Result<Self, String> {
let len = serialize::Decoder::read_usize(d)?;

let mut v = Vec::with_capacity(len);
let buf = &mut v.spare_capacity_mut()[..len];
d.read_raw_bytes(buf)?;

unsafe {
v.set_len(len);
}

Ok(v)
}
}
13 changes: 4 additions & 9 deletions compiler/rustc_serialize/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ impl<D: Decoder, T: Decodable<D>> Decodable<D> for Rc<T> {
}

impl<S: Encoder, T: Encodable<S>> Encodable<S> for [T] {
fn encode(&self, s: &mut S) -> Result<(), S::Error> {
default fn encode(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_seq(self.len(), |s| {
for (i, e) in self.iter().enumerate() {
s.emit_seq_elt(i, |s| e.encode(s))?
Expand All @@ -545,7 +545,7 @@ impl<S: Encoder, T: Encodable<S>> Encodable<S> for Vec<T> {
}

impl<D: Decoder, T: Decodable<D>> Decodable<D> for Vec<T> {
fn decode(d: &mut D) -> Result<Vec<T>, D::Error> {
default fn decode(d: &mut D) -> Result<Vec<T>, D::Error> {
d.read_seq(|d, len| {
let mut v = Vec::with_capacity(len);
for i in 0..len {
Expand Down Expand Up @@ -591,13 +591,8 @@ where
[T]: ToOwned<Owned = Vec<T>>,
{
fn decode(d: &mut D) -> Result<Cow<'static, [T]>, D::Error> {
d.read_seq(|d, len| {
let mut v = Vec::with_capacity(len);
for i in 0..len {
v.push(d.read_seq_elt(i, |d| Decodable::decode(d))?);
}
Ok(Cow::Owned(v))
})
let v: Vec<T> = Decodable::decode(d)?;
Ok(Cow::Owned(v))
}
}

Expand Down

0 comments on commit 929f66a

Please sign in to comment.