Skip to content

Commit

Permalink
Merge branch 'main' of github.com:SoftbearStudios/bitcode into main
Browse files Browse the repository at this point in the history
  • Loading branch information
finnbear committed Jul 29, 2023
2 parents 59b1cad + e089863 commit 5b49b1f
Show file tree
Hide file tree
Showing 22 changed files with 501 additions and 302 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@ jobs:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly
# Nightly toolchain must ship the `rust-std` component for
# `i686-unknown-linux-gnu` and `mips64-unknown-linux-gnuabi64`.
# In practice, `rust-std` almost always ships for
# `i686-unknown-linux-gnu` so we just need to check this page for a
# compatible nightly:
# https://rust-lang.github.io/rustup-components-history/mips64-unknown-linux-gnuabi64.html
toolchain: nightly-2023-07-04
override: true
components: rustfmt, miri
- name: Lint
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/target
/Cargo.lock
/bitcode_derive/Cargo.lock
.idea
perf.data
perf.data.old
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ bytemuck = { version = "1.13", features = [ "extern_crate_alloc" ] }
from_bytes_or_zeroed = "0.1"
residua-zigzag = "0.1.0"
serde = { version = "1.0", optional = true }
simdutf8 = { version = "0.1.4", optional = true }

[dev-dependencies]
arrayvec = { version = "0.7.2", features = [ "serde" ] }
Expand All @@ -36,8 +37,7 @@ serde = { version = "1.0.159", features = [ "derive" ] }

[features]
derive = [ "bitcode_derive" ]
serde = [ "dep:serde" ]
default = [ "derive" ]
default = [ "derive", "simdutf8" ]

[package.metadata.docs.rs]
features = ["serde"]
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@ The format may change between major versions, so we are free to optimize it.
| usize/isize | 64 | 64 | 64 | 8-72 | 8-80 |
| f32 | 32 | 32 | 32 | 32 | 32 |
| f64 | 64 | 64 | 64 | 64 | 64 |
| char | 8-32 | 8-32 | 8-32 | 8-32 | 16-40 |
| char | 21 | 21 | 8-32 | 8-32 | 16-40 |
| Option<()> | 1 | 1 | 8 | 8 | 8 |
| Result<(), ()> | 1 | 1-3 | 32 | 8 | 8 |
| enum { A, B, C, D } | 2 | 1-5 | 32 | 8 | 8 |
| Duration | 94 | 96 | 96 | 16-112 | 16-120 |

### Values (size in bits)

Expand Down
57 changes: 0 additions & 57 deletions bitcode_derive/Cargo.lock

This file was deleted.

1 change: 1 addition & 0 deletions bitcode_derive/src/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ impl VariantEncoding {
let variants = variants?;

quote! {
#[allow(clippy::verbose_bit_mask)]
Ok(match dec_variant_peek!() {
#variants,
})
Expand Down
5 changes: 4 additions & 1 deletion fuzz/fuzz_targets/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ extern crate bitcode;
use bitcode::{Decode, Encode};
use bitvec::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::ffi::CString;
use std::time::Duration;

fuzz_target!(|data: &[u8]| {
if data.len() < 3 {
Expand Down Expand Up @@ -114,6 +115,8 @@ fuzz_target!(|data: &[u8]| {
M(#[bitcode_hint(gamma)] u64),
N(#[bitcode_hint(ascii)] String),
O(#[bitcode_hint(ascii_lowercase)] String),
P(BTreeMap<u16, u8>),
Q(Duration),
}

#[derive(Serialize, Deserialize, Encode, Decode, Debug)]
Expand Down
16 changes: 8 additions & 8 deletions src/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ fn bench_bitcode_buffer_deserialize(b: &mut Bencher) {

#[bench]
fn bench_bitcode_long_string_serialize(b: &mut Bencher) {
let data = "abcde12345".repeat(1000);
let data = "abcde1234☺".repeat(1000);
let mut buf = Buffer::new();
buf.serialize(&data).unwrap();
b.iter(|| {
Expand All @@ -220,7 +220,7 @@ fn bench_bitcode_long_string_serialize(b: &mut Bencher) {

#[bench]
fn bench_bitcode_long_string_deserialize(b: &mut Bencher) {
let data = "abcde12345".repeat(1000);
let data = "abcde1234☺".repeat(1000);
let mut buf = Buffer::new();
let bytes = buf.serialize(&data).unwrap().to_vec();
assert_eq!(buf.deserialize::<String>(&bytes).unwrap(), data);
Expand Down Expand Up @@ -266,16 +266,12 @@ bench!(

#[cfg(test)]
mod tests {
use std::time::{Duration, Instant};

use super::*;
use std::time::{Duration, Instant};

// cargo test comparison1 --release -- --nocapture --include-ignored
#[test]
#[cfg_attr(
not(debug_assertions),
ignore = "don't run in parallel with other benchmarks"
)]
#[ignore = "don't run unless --include-ignored"]
fn comparison1() {
let data = &random_data(10000);
let print_results =
Expand Down Expand Up @@ -436,6 +432,10 @@ mod tests {
compare("Option<()>", None..=Some(()));
compare("Result<(), ()>", Ok(())..=Err(()));
compare("enum { A, B, C, D }", Enum::A..=Enum::D);
compare(
"Duration",
Duration::ZERO..=Duration::new(u64::MAX, 999_999_999),
);

println!();
println!("| Value | Bitcode (derive) | Bitcode (serde) | Bincode | Bincode (varint) | Postcard |");
Expand Down
10 changes: 8 additions & 2 deletions src/bit_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::write::Write;
use crate::{Result, E};
use bitvec::domain::Domain;
use bitvec::prelude::*;
use std::num::NonZeroUsize;

/// A slow proof of concept [`Buffer`] that uses [`BitVec`]. Useful for comparison.
#[derive(Debug, Default)]
Expand Down Expand Up @@ -171,7 +172,9 @@ impl Read for BitReader<'_> {
Ok(Word::from_le_bytes(v))
}

fn read_bytes(&mut self, len: usize) -> Result<&[u8]> {
fn read_bytes(&mut self, len: NonZeroUsize) -> Result<&[u8]> {
let len = len.get();

// Take to avoid borrowing issue.
let mut tmp = std::mem::take(self.read_bytes_buf);

Expand All @@ -190,7 +193,10 @@ impl Read for BitReader<'_> {
Ok(&self.read_bytes_buf[..len])
}

fn read_encoded_bytes<C: ByteEncoding>(&mut self, len: usize) -> Result<&[u8]> {
fn read_encoded_bytes<C: ByteEncoding>(&mut self, len: NonZeroUsize) -> Result<&[u8]> {
let len = len.get();

// Take to avoid borrowing issue.
let mut tmp = std::mem::take(self.read_bytes_buf);

let bits = len
Expand Down
86 changes: 80 additions & 6 deletions src/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ macro_rules! optimized_enc {
// Call once done encoding.
macro_rules! end_enc {
() => {
flush!();
let _ = flush!();
let _ = i;
#[allow(clippy::drop_non_drop)]
drop(buf);
Expand All @@ -171,6 +171,7 @@ pub use optimized_enc;
// These benchmarks ensure that optimized_enc is working. They all run about 8 times faster with optimized_enc.
#[cfg(all(test, not(miri)))]
mod optimized_enc_tests {
use std::collections::{BinaryHeap, VecDeque};
use test::{black_box, Bencher};

type A = u8;
Expand Down Expand Up @@ -224,9 +225,9 @@ mod optimized_enc_tests {
}

#[bench]
fn bench_byte_slice(b: &mut Bencher) {
fn bench_bool_slice(b: &mut Bencher) {
let mut buffer = crate::Buffer::new();
let foo = vec![0u8; 8 * 1000];
let foo = vec![false; 8 * 1000];

b.iter(|| {
let foo = black_box(foo.as_slice());
Expand All @@ -236,16 +237,46 @@ mod optimized_enc_tests {
}

#[bench]
fn bench_bool_slice(b: &mut Bencher) {
fn bench_vec(b: &mut Bencher) {
let mut buffer = crate::Buffer::new();
let foo = vec![false; 8 * 1000];
let foo = vec![0u8; 8 * 1000];

b.iter(|| {
let foo = black_box(foo.as_slice());
let bytes = buffer.encode(foo).unwrap();
black_box(bytes);
})
}

#[bench]
fn bench_vec_deque(b: &mut Bencher) {
let mut buffer = crate::Buffer::new();
let mut foo = VecDeque::from(vec![0u8; 8000]);
for _ in 0..4000 {
// Make it not contiguous.
foo.pop_front().unwrap();
foo.push_back(1u8);
}

b.iter(|| {
let foo = black_box(&foo);
let bytes = buffer.encode(foo).unwrap();
black_box(bytes);
})
}

// BinaryHeap::encode isn't optimized yet.
#[bench]
fn bench_binary_heap(b: &mut Bencher) {
let mut buffer = crate::Buffer::new();
let foo = BinaryHeap::from_iter((0u16..8000).map(|v| v as u8));

b.iter(|| {
let foo = black_box(&foo);
let bytes = buffer.encode(foo).unwrap();
black_box(bytes);
})
}
}

/// A macro that facilitates reading from a RegisterReader when decoding multiple values less than 64 bits.
Expand Down Expand Up @@ -331,7 +362,7 @@ macro_rules! optimized_dec {
// Call once done decoding.
macro_rules! end_dec {
() => {
flush!();
let _ = flush!();
let _ = i;
#[allow(clippy::drop_non_drop)]
drop(buf);
Expand All @@ -344,6 +375,7 @@ pub use optimized_dec;
// These benchmarks ensure that optimized_dec is working. They run 4-8 times faster with optimized_dec.
#[cfg(all(test, not(miri)))]
mod optimized_dec_tests {
use std::collections::{BTreeSet, BinaryHeap, VecDeque};
use test::{black_box, Bencher};

type A = u8;
Expand Down Expand Up @@ -435,4 +467,46 @@ mod optimized_dec_tests {
black_box(buffer.decode::<T>(bytes).unwrap())
})
}

#[bench]
fn bench_vec_deque(b: &mut Bencher) {
let mut buffer = crate::Buffer::new();
let mut foo = VecDeque::from(vec![0u8; 8000]);
for _ in 0..4000 {
// Make it not contiguous.
foo.pop_front().unwrap();
foo.push_back(1u8);
}
type T = VecDeque<u8>;

let bytes = buffer.encode(&foo).unwrap().to_vec();
let decoded: T = buffer.decode(&bytes).unwrap();
assert_eq!(foo, decoded);

b.iter(|| {
let bytes = black_box(bytes.as_slice());
black_box(buffer.decode::<T>(bytes).unwrap())
})
}

#[bench]
fn bench_binary_heap(b: &mut Bencher) {
let mut buffer = crate::Buffer::new();
let foo = BinaryHeap::from_iter((0u16..8000).map(|v| v as u8));
type T = BinaryHeap<u8>;

let bytes = buffer.encode(&foo).unwrap().to_vec();
let decoded: T = buffer.decode(&bytes).unwrap();

// Binary heaps can't be compared directly.
assert_eq!(
BTreeSet::from_iter(foo.iter().copied()),
BTreeSet::from_iter(decoded.iter().copied())
);

b.iter(|| {
let bytes = black_box(bytes.as_slice());
black_box(buffer.decode::<T>(bytes).unwrap())
})
}
}
Loading

0 comments on commit 5b49b1f

Please sign in to comment.