From 9d090e229454305e711ee90c0c6b778dd37820c7 Mon Sep 17 00:00:00 2001 From: "Taylor C. Richberger" Date: Tue, 14 Nov 2023 11:57:56 -0700 Subject: [PATCH] implement BufOutputReader and write rand traits around it. --- Cargo.toml | 4 +- src/lib.rs | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/test.rs | 36 +++++++++--- 3 files changed, 191 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e146471c..9a74ea318 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,8 +92,8 @@ no_avx512 = [] no_neon = [] [package.metadata.docs.rs] -# Document the rayon/mmap methods and the Serialize/Deserialize/Zeroize impls on docs.rs. -features = ["mmap", "rayon", "serde", "zeroize"] +# Document the rayon/mmap methods and the Serialize/Deserialize/Zeroize/RngCore impls on docs.rs. +features = ["mmap", "rayon", "serde", "zeroize", "rand"] [dependencies] arrayref = "0.3.5" diff --git a/src/lib.rs b/src/lib.rs index facd735fb..6f2a288f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1644,20 +1644,132 @@ impl std::io::Seek for OutputReader { } } +/// A buffering wrapper around [`OutputReader`]. +/// +/// This fills some of the simpler niches of a [`std::io::BufReader`] for no_std +/// and rng use-cases that don't need a full [`std::io::BufReader`]. If you +/// need the [`std::io`] traits with buffering, you're probably better off with +/// a full [`std::io::BufReader`] wrapper around [`OutputReader`]. +/// +/// With the `rand` feature, this struct implements [`rand_core::RngCore`], +/// [`rand_core::SeedableRng`], and [`rand_core::CryptoRng`], allowing this +/// type to be used as a full [`rand::Rng`]. A [`Rng`] type alias is given as a +/// convenient suggested buffer size for Rng use. +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +/// [`std::io::BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html +/// [`OutputReader`]: struct.OutputReader.html +/// [`rand_core::RngCore`]: https://rust-random.github.io/rand/rand_core/trait.RngCore.html +/// [`rand_core::SeedableRng`]: https://rust-random.github.io/rand/rand_core/trait.SeedableRng.html +/// [`rand_core::CryptoRng`]: https://rust-random.github.io/rand/rand_core/trait.CryptoRng.html +/// [`rand::Rng`]: https://docs.rs/rand/latest/rand/trait.Rng.html +/// [`Rng`]: type.Rng.html +#[derive(Clone, Debug)] +pub struct BufOutputReader { + reader: OutputReader, + buffer: [u8; N], + + /// The amount of buffer that has been read already. + offset: usize, +} + +impl BufOutputReader { + #[inline] + pub fn new(reader: OutputReader) -> Self { + reader.into() + } + + /// The position in the output stream, minus the remaining characters in + /// the buffer. + #[inline] + pub fn position(&self) -> u64 { + let buffered = (N - self.offset) as u64; + self.reader.position() - buffered + } + + /// Drop what's remaining in the buffer and give a mutable reference to the + /// inner reader, so it can be seeked or otherwise manipulated. + #[inline] + pub fn output_reader(&mut self) -> &mut OutputReader { + self.offset = N; + &mut self.reader + } + + /// Efficiently fill the destination buffer, calling the underlying + /// [`OutputReader::fill`] as few times as possible. + /// + /// [`OutputReader::fill`]: struct.OutputReader.html#method.fill + pub fn fill(&mut self, mut dest: &mut [u8]) { + if dest.is_empty() { + return; + } + + let buffer_remaining = N - self.offset; + + if dest.len() <= buffer_remaining { + // There are enough bytes left in the buffer to consume without + // reading. + let end = self.offset + dest.len(); + dest.copy_from_slice(&self.buffer[self.offset..end]); + self.offset = end; + } else { + // First empty the buffer. + if buffer_remaining > 0 { + dest[..buffer_remaining].copy_from_slice(&self.buffer[self.offset..N]); + let copied = N - self.offset; + dest = &mut dest[copied..]; + } + + let buffers = dest.len() / N; + let remainder = dest.len() % N; + + // Copy full-sized chunks directly to the destination, bypassing + // the buffer. + if buffers > 0 { + let buffers_bytes = buffers * N; + self.reader.fill(&mut dest[..buffers_bytes]); + dest = &mut dest[buffers_bytes..]; + } + + // Fill the buffer for the remainder, if there is any. + if remainder > 0 { + self.reader.fill(&mut self.buffer); + dest.copy_from_slice(&self.buffer[..remainder]); + self.offset = remainder; + } else { + // We have emptied the remaining buffer, so mark this empty. + self.offset = N; + } + } + } +} + +impl From for BufOutputReader { + fn from(value: OutputReader) -> Self { + Self { + reader: value, + buffer: [0u8; N], + + // Start buffer unfilled. + offset: N, + } + } +} + #[cfg(feature = "rand")] -impl rand_core::SeedableRng for OutputReader { +impl rand_core::SeedableRng for BufOutputReader { type Seed = [u8; 32]; #[inline] fn from_seed(seed: Self::Seed) -> Self { let mut hasher = Hasher::new(); hasher.update(&seed); - hasher.finalize_xof() + hasher.finalize_xof().into() } } #[cfg(feature = "rand")] -impl rand_core::RngCore for OutputReader { +impl rand_core::RngCore for BufOutputReader { #[inline] fn next_u32(&mut self) -> u32 { rand_core::impls::next_u32_via_fill(self) @@ -1681,4 +1793,49 @@ impl rand_core::RngCore for OutputReader { } #[cfg(feature = "rand")] -impl rand_core::CryptoRng for OutputReader {} +impl rand_core::block::BlockRngCore for BufOutputReader +where + [u8; N]: Default, +{ + type Item = u8; + type Results = [u8; N]; + + fn generate(&mut self, results: &mut Self::Results) { + self.fill(results); + } +} + +#[cfg(feature = "rand")] +impl rand_core::CryptoRng for BufOutputReader {} + +#[cfg(feature = "rand")] +/// A convenience type alias for the recommended Rng buffer size. +/// +/// # Examples +/// +/// ``` +/// # use rand::{Rng as _, SeedableRng as _}; +/// # fn main() { +/// // Hash input and convert the output stream to an rng. +/// let mut hasher = blake3::Hasher::new(); +/// hasher.update(b"foo"); +/// hasher.update(b"bar"); +/// hasher.update(b"baz"); +/// let mut rng: blake3::Rng = hasher.finalize_xof().into(); +/// let output: u64 = rng.gen(); +/// assert_eq!(output, 0xfb61f3c9e0fe9ac0u64); +/// +/// // Alternately, seed it as a rand::SeedableRng. +/// let mut rng = blake3::Rng::from_seed(*b"0123456789abcdefghijklmnopqrstuv"); +/// let output: u64 = rng.gen(); +/// assert_eq!(output, 0x4ffa224b59a182a0u64); +/// +/// // In the real world, you will probably not use a static seed, but seed from +/// // OsRng or something of the sort. +/// let mut seed = [0u8; 32]; +/// rand::rngs::OsRng.fill(&mut seed); +/// let mut rng = blake3::Rng::from_seed(seed); +/// let _output: u64 = rng.gen(); +/// # } +/// ``` +pub type Rng = BufOutputReader<64>; diff --git a/src/test.rs b/src/test.rs index bb3c42102..bdad79ff3 100644 --- a/src/test.rs +++ b/src/test.rs @@ -824,21 +824,41 @@ fn test_serde() { #[test] #[cfg(feature = "rand")] fn test_rand_core() { - let mut seeded = crate::OutputReader::from_seed([b'0'; 32]); + let mut seeded = crate::Rng::from_seed(*b"0123456789abcdefghijklmnopqrstuv"); let mut buf = [0u8; 64]; seeded.fill_bytes(&mut buf); - // Verified using: printf 00000000000000000000000000000000 | b3sum -l 76 + // Verified using: printf 0123456789abcdefghijklmnopqrstuv | b3sum -l 76 assert_eq!( &buf, b"\ - \x9a\x91\x3b\xc3\x24\xb1\x7e\x97\x31\x3a\x3e\x6b\x1d\x24\x05\x44\ - \xbd\xab\xb7\x0e\xe2\xd0\xdd\x0f\x80\x25\x8c\x95\x70\x43\x1e\xb1\ - \x43\x9a\x91\x99\xca\x39\xbe\xae\x7f\x16\xe7\x0a\x96\xc4\x60\xba\ - \x11\x57\xb6\xc9\xd7\x85\x07\xd7\x37\xef\xae\x55\x23\x1f\x08\x6f\ + \xa0\x82\xa1\x59\x4b\x22\xfa\x4f\x83\x8f\xc8\x19\xe1\x91\x8b\x45\ + \xa4\xf0\x72\x7b\xad\xaa\x70\x1b\x6d\x52\x12\x11\xec\x99\x2e\x03\ + \x12\x0a\xb6\x70\x1f\x37\x96\xaa\xb8\xb1\xc5\x9d\xd1\x4c\x19\x77\ + \xf1\xc6\xbb\x53\x1c\x5e\x85\x4b\x08\xc8\xf9\x0a\x68\xfb\x8c\x69\ ", ); // defers to rand_core::impls, which interpret bytes little-endian. - assert_eq!(seeded.gen::(), 0x91bd7fa7u32); - assert_eq!(seeded.gen::(), 0x81f88d825bee930fu64); + assert_eq!(seeded.gen::(), 0x1e8b7a2a); + assert_eq!(seeded.gen::(), 0x30deb2349cce4029); + + // Test partial consumption, to be sure buffering doesn't cause problems + + let mut seeded = crate::Rng::from_seed(*b"0123456789abcdefghijklmnopqrstuv"); + let mut buf = [0u8; 63]; + seeded.fill_bytes(&mut buf); + // Verified using: printf 0123456789abcdefghijklmnopqrstuv | b3sum -l 76 + assert_eq!( + &buf, + b"\ + \xa0\x82\xa1\x59\x4b\x22\xfa\x4f\x83\x8f\xc8\x19\xe1\x91\x8b\x45\ + \xa4\xf0\x72\x7b\xad\xaa\x70\x1b\x6d\x52\x12\x11\xec\x99\x2e\x03\ + \x12\x0a\xb6\x70\x1f\x37\x96\xaa\xb8\xb1\xc5\x9d\xd1\x4c\x19\x77\ + \xf1\xc6\xbb\x53\x1c\x5e\x85\x4b\x08\xc8\xf9\x0a\x68\xfb\x8c\ + ", + ); + + // defers to rand_core::impls, which interpret bytes little-endian. + assert_eq!(seeded.gen::(), 0x8b7a2a69); + assert_eq!(seeded.gen::(), 0xdeb2349cce40291e); }