Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement most of RFC 2930, providing the ReadBuf abstraction #81156

Merged
merged 12 commits into from
Dec 9, 2021
22 changes: 9 additions & 13 deletions library/std/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mod tests;

use crate::ffi::OsString;
use crate::fmt;
use crate::io::{self, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
use crate::io::{self, IoSlice, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, Write};
use crate::path::{Path, PathBuf};
use crate::sys::fs as fs_imp;
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
Expand Down Expand Up @@ -624,15 +624,13 @@ impl Read for File {
self.inner.read_vectored(bufs)
}

#[inline]
fn is_read_vectored(&self) -> bool {
self.inner.is_read_vectored()
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
self.inner.read_buf(buf)
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
// SAFETY: Read is guaranteed to work on uninitialized memory
unsafe { Initializer::nop() }
fn is_read_vectored(&self) -> bool {
self.inner.is_read_vectored()
}

// Reserves space in the buffer based on the file size when available.
Expand Down Expand Up @@ -678,6 +676,10 @@ impl Read for &File {
self.inner.read(buf)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
self.inner.read_buf(buf)
}

fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
self.inner.read_vectored(bufs)
}
Expand All @@ -687,12 +689,6 @@ impl Read for &File {
self.inner.is_read_vectored()
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
// SAFETY: Read is guaranteed to work on uninitialized memory
unsafe { Initializer::nop() }
}

// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.reserve(buffer_capacity_required(self));
Expand Down
59 changes: 43 additions & 16 deletions library/std/src/io/buffered/bufreader.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::cmp;
use crate::fmt;
use crate::io::{
self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
};
use crate::mem::MaybeUninit;

/// The `BufReader<R>` struct adds buffering to any reader.
///
Expand Down Expand Up @@ -47,9 +48,10 @@ use crate::io::{
#[stable(feature = "rust1", since = "1.0.0")]
pub struct BufReader<R> {
inner: R,
buf: Box<[u8]>,
buf: Box<[MaybeUninit<u8>]>,
pos: usize,
cap: usize,
init: usize,
}

impl<R: Read> BufReader<R> {
Expand Down Expand Up @@ -91,11 +93,8 @@ impl<R: Read> BufReader<R> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
unsafe {
let mut buf = Box::new_uninit_slice(capacity).assume_init();
inner.initializer().initialize(&mut buf);
BufReader { inner, buf, pos: 0, cap: 0 }
}
let buf = Box::new_uninit_slice(capacity);
BufReader { inner, buf, pos: 0, cap: 0, init: 0 }
}
}

Expand Down Expand Up @@ -171,7 +170,8 @@ impl<R> BufReader<R> {
/// ```
#[stable(feature = "bufreader_buffer", since = "1.37.0")]
pub fn buffer(&self) -> &[u8] {
&self.buf[self.pos..self.cap]
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf[self.pos..self.cap]) }
}

/// Returns the number of bytes the internal buffer can hold at once.
Expand Down Expand Up @@ -271,6 +271,25 @@ impl<R: Read> Read for BufReader<R> {
Ok(nread)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.pos == self.cap && buf.remaining() >= self.buf.len() {
self.discard_buffer();
return self.inner.read_buf(buf);
}

let prev = buf.filled_len();

let mut rem = self.fill_buf()?;
rem.read_buf(buf)?;

self.consume(buf.filled_len() - prev); //slice impl of read_buf known to never unfill buf

Ok(())
}

// Small read_exacts from a BufReader are extremely common when used with a deserializer.
// The default implementation calls read in a loop, which results in surprisingly poor code
// generation for the common path where the buffer has enough bytes to fill the passed-in
Expand Down Expand Up @@ -303,16 +322,11 @@ impl<R: Read> Read for BufReader<R> {
self.inner.is_read_vectored()
}

// we can't skip unconditionally because of the large buffer case in read.
unsafe fn initializer(&self) -> Initializer {
self.inner.initializer()
}

// The inner reader might have an optimized `read_to_end`. Drain our buffer and then
// delegate to the inner implementation.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let nread = self.cap - self.pos;
buf.extend_from_slice(&self.buf[self.pos..self.cap]);
buf.extend_from_slice(&self.buffer());
self.discard_buffer();
Ok(nread + self.inner.read_to_end(buf)?)
}
Expand Down Expand Up @@ -363,10 +377,23 @@ impl<R: Read> BufRead for BufReader<R> {
// to tell the compiler that the pos..cap slice is always valid.
if self.pos >= self.cap {
debug_assert!(self.pos == self.cap);
self.cap = self.inner.read(&mut self.buf)?;

let mut readbuf = ReadBuf::uninit(&mut self.buf);

// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
// from the last time this function was called
unsafe {
readbuf.assume_init(self.init);
}

self.inner.read_buf(&mut readbuf)?;

self.cap = readbuf.filled_len();
self.init = readbuf.initialized_len();

self.pos = 0;
}
Ok(&self.buf[self.pos..self.cap])
Ok(self.buffer())
}

fn consume(&mut self, amt: usize) {
Expand Down
52 changes: 51 additions & 1 deletion library/std/src/io/buffered/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::io::prelude::*;
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, SeekFrom};
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, ReadBuf, SeekFrom};
use crate::mem::MaybeUninit;
use crate::panic;
use crate::sync::atomic::{AtomicUsize, Ordering};
use crate::thread;
Expand Down Expand Up @@ -55,6 +56,55 @@ fn test_buffered_reader() {
assert_eq!(reader.read(&mut buf).unwrap(), 0);
}

#[test]
fn test_buffered_reader_read_buf() {
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];
let mut reader = BufReader::with_capacity(2, inner);

let mut buf = [MaybeUninit::uninit(); 3];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [5, 6, 7]);
assert_eq!(reader.buffer(), []);

let mut buf = [MaybeUninit::uninit(); 2];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [0, 1]);
assert_eq!(reader.buffer(), []);

let mut buf = [MaybeUninit::uninit(); 1];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [2]);
assert_eq!(reader.buffer(), [3]);

let mut buf = [MaybeUninit::uninit(); 3];
let mut buf = ReadBuf::uninit(&mut buf);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [3]);
assert_eq!(reader.buffer(), []);

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled(), [3, 4]);
assert_eq!(reader.buffer(), []);

buf.clear();

reader.read_buf(&mut buf).unwrap();

assert_eq!(buf.filled_len(), 0);
}

#[test]
fn test_buffered_reader_seek() {
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];
Expand Down
81 changes: 38 additions & 43 deletions library/std/src/io/copy.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
use super::{BufWriter, ErrorKind, Read, ReadBuf, Result, Write, DEFAULT_BUF_SIZE};
use crate::mem::MaybeUninit;

/// Copies the entire contents of a reader into a writer.
Expand Down Expand Up @@ -82,33 +82,30 @@ impl<I: Write> BufferedCopySpec for BufWriter<I> {
return stack_buffer_copy(reader, writer);
}

// FIXME: #42788
//
// - This creates a (mut) reference to a slice of
// _uninitialized_ integers, which is **undefined behavior**
//
// - Only the standard library gets to soundly "ignore" this,
// based on its privileged knowledge of unstable rustc
// internals;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I am so happy to see this comment disappear. :)

unsafe {
let spare_cap = writer.buffer_mut().spare_capacity_mut();
reader.initializer().initialize(MaybeUninit::slice_assume_init_mut(spare_cap));
}

let mut len = 0;
let mut init = 0;

loop {
let buf = writer.buffer_mut();
let spare_cap = buf.spare_capacity_mut();

if spare_cap.len() >= DEFAULT_BUF_SIZE {
match reader.read(unsafe { MaybeUninit::slice_assume_init_mut(spare_cap) }) {
Ok(0) => return Ok(len), // EOF reached
Ok(bytes_read) => {
assert!(bytes_read <= spare_cap.len());
// SAFETY: The initializer contract guarantees that either it or `read`
// will have initialized these bytes. And we just checked that the number
// of bytes is within the buffer capacity.
let mut read_buf = ReadBuf::uninit(buf.spare_capacity_mut());

// SAFETY: init is either 0 or the initialized_len of the previous iteration
unsafe {
read_buf.assume_init(init);
}

if read_buf.capacity() >= DEFAULT_BUF_SIZE {
match reader.read_buf(&mut read_buf) {
Ok(()) => {
let bytes_read = read_buf.filled_len();

if bytes_read == 0 {
return Ok(len);
}

init = read_buf.initialized_len() - bytes_read;

// SAFETY: ReadBuf guarantees all of its filled bytes are init
unsafe { buf.set_len(buf.len() + bytes_read) };
len += bytes_read as u64;
// Read again if the buffer still has enough capacity, as BufWriter itself would do
Expand All @@ -129,28 +126,26 @@ fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
reader: &mut R,
writer: &mut W,
) -> Result<u64> {
let mut buf = MaybeUninit::<[u8; DEFAULT_BUF_SIZE]>::uninit();
// FIXME: #42788
//
// - This creates a (mut) reference to a slice of
// _uninitialized_ integers, which is **undefined behavior**
//
// - Only the standard library gets to soundly "ignore" this,
// based on its privileged knowledge of unstable rustc
// internals;
unsafe {
reader.initializer().initialize(buf.assume_init_mut());
}
let mut buf = [MaybeUninit::uninit(); DEFAULT_BUF_SIZE];
let mut buf = ReadBuf::uninit(&mut buf);

let mut len = 0;

let mut written = 0;
loop {
let len = match reader.read(unsafe { buf.assume_init_mut() }) {
Ok(0) => return Ok(written),
Ok(len) => len,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
match reader.read_buf(&mut buf) {
Ok(()) => {}
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
writer.write_all(unsafe { &buf.assume_init_ref()[..len] })?;
written += len as u64;

if buf.filled().is_empty() {
break;
}

len += buf.filled().len() as u64;
writer.write_all(buf.filled())?;
buf.clear();
}

Ok(len)
}
17 changes: 11 additions & 6 deletions library/std/src/io/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod tests;
use crate::io::prelude::*;

use crate::cmp;
use crate::io::{self, Error, ErrorKind, Initializer, IoSlice, IoSliceMut, SeekFrom};
use crate::io::{self, Error, ErrorKind, IoSlice, IoSliceMut, ReadBuf, SeekFrom};

use core::convert::TryInto;

Expand Down Expand Up @@ -324,6 +324,16 @@ where
Ok(n)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
let prev_filled = buf.filled_len();

Read::read_buf(&mut self.fill_buf()?, buf)?;

self.pos += (buf.filled_len() - prev_filled) as u64;

Ok(())
}

fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
let mut nread = 0;
for buf in bufs {
Expand All @@ -346,11 +356,6 @@ where
self.pos += n as u64;
Ok(())
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
Loading