Skip to content

Commit

Permalink
Bitcode rewrite (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
caibear authored Mar 16, 2024
1 parent 3140043 commit 431b88f
Show file tree
Hide file tree
Showing 73 changed files with 7,977 additions and 6,952 deletions.
2 changes: 1 addition & 1 deletion .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[build]
rustflags = ["-C", "target-cpu=native"]
rustflags = ["-C", "target-cpu=native"]
18 changes: 11 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,26 @@ jobs:
# `i686-unknown-linux-gnu` so we just need to check this page for a
# compatible nightly:
# https://rust-lang.github.io/rustup-components-history/mips64-unknown-linux-gnuabi64.html
toolchain: nightly-2023-07-04
toolchain: nightly-2023-04-25
override: true
components: rustfmt, miri
- name: Lint
run: cargo fmt --check
- name: Test (debug)
- name: Check (no-default-features)
run: cargo check --no-default-features
- name: Test
run: cargo test
- name: Test (all-features)
run: cargo test --all-features
- name: Install i686 and GCC multilib
run: rustup target add i686-unknown-linux-gnu && sudo apt update && sudo apt install -y gcc-multilib
- name: Test (32-bit)
run: cargo test --target i686-unknown-linux-gnu
- name: Test (32-bit all-features)
run: cargo test --target i686-unknown-linux-gnu --all-features
- name: Setup Miri
run: cargo miri setup
- name: Test (miri)
run: MIRIFLAGS="-Zmiri-permissive-provenance" cargo miri test
- name: Test (miri all-features)
run: cargo miri test --all-features
- name: Setup Miri (big-endian)
run: rustup target add mips64-unknown-linux-gnuabi64 && cargo miri setup --target mips64-unknown-linux-gnuabi64
- name: Test (miri big-endian)
run: MIRIFLAGS="-Zmiri-permissive-provenance" cargo miri test --target mips64-unknown-linux-gnuabi64
run: cargo miri test --target mips64-unknown-linux-gnuabi64
8 changes: 3 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
/target
/Cargo.lock
/bitcode_derive/Cargo.lock
target/
Cargo.lock
perf.*
.idea
perf.data
perf.data.old
39 changes: 20 additions & 19 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,41 @@ members = [
[package]
name = "bitcode"
authors = [ "Cai Bear", "Finn Bear" ]
version = "0.5.1"
version = "0.6.0-beta.1"
edition = "2021"
license = "MIT OR Apache-2.0"
repository = "https://github.com/SoftbearStudios/bitcode"
description = "bitcode is a bitwise binary serializer"
exclude = ["fuzz/"]

[dependencies]
bitcode_derive = { version = "0.5.0", path="./bitcode_derive", optional = true }
bytemuck = { version = "1.13", features = [ "extern_crate_alloc" ] }
from_bytes_or_zeroed = "0.1"
residua-zigzag = "0.1.0"
arrayvec = { version = "0.7", default-features = false, optional = true }
bitcode_derive = { version = "0.6.0-beta.1", path = "./bitcode_derive", optional = true }
bytemuck = { version = "1.14", features = [ "min_const_generics", "must_cast" ] }
glam = { version = "0.22", default-features = false, features = [ "std" ], optional = true }
serde = { version = "1.0", optional = true }
simdutf8 = { version = "0.1.4", optional = true }

[dev-dependencies]
arrayvec = { version = "0.7.2", features = [ "serde" ] }
arrayvec = { version = "0.7", features = [ "serde" ] }
bincode = "1.3.3"
bitvec = { version = "1.0.1" }
flate2 = "1.0.25"
lz4_flex = "0.10.0"
musli = "0.0.42"
paste = "1.0.12"
postcard = { version = "1.0", features = ["alloc"] }
rand = { version = "0.8.5", default-features = false }
flate2 = "1.0.28"
lz4_flex = { version = "0.11.2", default-features = false }
paste = "1.0.14"
rand = "0.8.5"
rand_chacha = "0.3.1"
serde = { version = "1.0.159", features = [ "derive" ] }
serde = { version = "1.0", features = [ "derive" ] }

# zstd doesn't compile with miri big-endian.
[target.'cfg(not(miri))'.dev-dependencies]
zstd = "0.13.0"

[features]
derive = [ "bitcode_derive" ]
default = [ "derive", "simdutf8" ]
default = [ "derive" ]

[package.metadata.docs.rs]
features = ["serde"]
features = [ "derive", "serde" ]

[profile.bench]
lto = true
# TODO halfs speed of benches_borrowed::bench_bitcode_decode
#[profile.bench]
#lto = true
161 changes: 63 additions & 98 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,111 +2,76 @@
[![Documentation](https://docs.rs/bitcode/badge.svg)](https://docs.rs/bitcode)
[![crates.io](https://img.shields.io/crates/v/bitcode.svg)](https://crates.io/crates/bitcode)
[![Build](https://github.com/SoftbearStudios/bitcode/actions/workflows/build.yml/badge.svg)](https://github.com/SoftbearStudios/bitcode/actions/workflows/build.yml)
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)

A bitwise encoder/decoder similar to [bincode](https://github.com/bincode-org/bincode), which attempts to shrink the serialized size without sacrificing speed (as would be the case with compression).

The format may change between major versions, so we are free to optimize it.

## Comparison with [bincode](https://github.com/bincode-org/bincode)

### Features

- Bitwise serialization
- [Gamma](https://en.wikipedia.org/wiki/Elias_gamma_coding) encoded lengths and enum variant indices

### Additional features with `#[derive(bitcode::Encode, bitcode::Decode)]`

- Enums use the fewest possible bits, e.g. an enum with 4 variants uses 2 bits
- Apply attributes to fields/enum variants:

| Attribute | Type | Result |
|-----------------------------------------------|---------------|------------------------------------------------------------------------------------------------------------|
| `#[bitcode_hint(ascii)]` | String | Uses 7 bits per character |
| `#[bitcode_hint(ascii_lowercase)]` | String | Uses 5 bits per character |
| `#[bitcode_hint(expected_range = "50..100"]` | u8-u64 | Uses log2(range.end - range.start) bits |
| `#[bitcode_hint(expected_range = "0.0..1.0"]` | f32/f64 | Uses ~25 bits for `f32` and ~54 bits for `f64` |
| `#[bitcode_hint(frequency = 123)` | enum variant | Frequent variants use fewer bits (see [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding)) |
| `#[bitcode_hint(gamma)]` | i8-i64/u8-u64 | Small integers use fewer bits (see [Elias gamma coding](https://en.wikipedia.org/wiki/Elias_gamma_coding)) |
| `#[bitcode(with_serde)]` | T: Serialize | Uses `serde::Serialize` instead of `bitcode::Encode` |

### Limitations

- Doesn't support streaming APIs
- Format may change between major versions
- With `feature = "derive"`, types containing themselves must use `#[bitcode(recursive)]` to compile

## Benchmarks vs. [bincode](https://github.com/bincode-org/bincode) and [postcard](https://github.com/jamesmunns/postcard)

### Primitives (size in bits)

| Type | Bitcode (derive) | Bitcode (serde) | Bincode | Bincode (varint) | Postcard |
|---------------------|------------------|-----------------|---------|------------------|----------|
| bool | 1 | 1 | 8 | 8 | 8 |
| u8/i8 | 8 | 8 | 8 | 8 | 8 |
| u16/i16 | 16 | 16 | 16 | 8-24 | 8-24 |
| u32/i32 | 32 | 32 | 32 | 8-40 | 8-40 |
| u64/i64 | 64 | 64 | 64 | 8-72 | 8-80 |
| u128/i128 | 128 | 128 | 128 | 8-136 | 8-152 |
| usize/isize | 64 | 64 | 64 | 8-72 | 8-80 |
| f32 | 32 | 32 | 32 | 32 | 32 |
| f64 | 64 | 64 | 64 | 64 | 64 |
| char | 21 | 21 | 8-32 | 8-32 | 16-40 |
| Option<()> | 1 | 1 | 8 | 8 | 8 |
| Result<(), ()> | 1 | 1-3 | 32 | 8 | 8 |
| enum { A, B, C, D } | 2 | 1-5 | 32 | 8 | 8 |
| Duration | 94 | 96 | 96 | 16-112 | 16-120 |

<sup>Note: These are defaults, and can be optimized with hints in the case of Bitcode (derive) or custom `impl Serialize` in the case of `serde` serializers.</sup>

### Values (size in bits)

| Value | Bitcode (derive) | Bitcode (serde) | Bincode | Bincode (varint) | Postcard |
|---------------------|------------------|-----------------|---------|------------------|----------|
| [true; 4] | 4 | 4 | 32 | 32 | 32 |
| vec![(); 0] | 1 | 1 | 64 | 8 | 8 |
| vec![(); 1] | 3 | 3 | 64 | 8 | 8 |
| vec![(); 256] | 17 | 17 | 64 | 24 | 16 |
| vec![(); 65536] | 33 | 33 | 64 | 40 | 24 |
| "" | 1 | 1 | 64 | 8 | 8 |
| "abcd" | 37 | 37 | 96 | 40 | 40 |
| "abcd1234" | 71 | 71 | 128 | 72 | 72 |


### Random [Structs and Enums](https://github.com/SoftbearStudios/bitcode/blob/2a47235eee64f4a7c49ad1841a5b509abd2d0e99/src/benches.rs#L16-L88) (average size and speed)

| Format | Size (bytes) | Serialize (ns) | Deserialize (ns) |
|------------------------|--------------|----------------|------------------|
| Bitcode (derive) | 6.2 | 14 | 50 |
| Bitcode (serde) | 6.7 | 18 | 59 |
| Bincode | 20.3 | 17 | 61 |
| Bincode (varint) | 10.9 | 26 | 68 |
| Bincode (LZ4) | 9.9 | 58 | 73 |
| Bincode (Deflate Fast) | 8.4 | 336 | 279 |
| Bincode (Deflate Best) | 7.8 | 1990 | 275 |
| Postcard | 10.7 | 21 | 57 |

### More benchmarks

[rust_serialization_benchmark](https://david.kolo.ski/rust_serialization_benchmark/)

## Acknowledgement

Some test cases were derived from [bincode](https://github.com/bincode-org/bincode) (see comment in `tests.rs`).
A binary encoder/decoder with the following goals:
- 🔥 Blazingly fast
- 🐁 Tiny serialized size
- 💎 Highly compressible by Deflate/LZ4/Zstd

In contrast, these are non-goals:
- Stable format across major versions
- Self describing format
- Compatibility with languages other than Rust

See [rust_serialization_benchmark](https://github.com/djkoloski/rust_serialization_benchmark) for benchmarks.

## Example
```rust
use bitcode::{Encode, Decode};

#[derive(Encode, Decode, PartialEq, Debug)]
struct Foo<'a> {
x: u32,
y: &'a str,
}

let original = Foo {
x: 10,
y: "abc",
};

let encoded: Vec<u8> = bitcode::encode(&original); // No error
let decoded: Foo<'_> = bitcode::decode(&encoded).unwrap();
assert_eq!(original, decoded);
```

## Library Example

Add bitcode to libraries without specifying the major version so binary crates can pick the version.
This is a minimal stable subset of the bitcode API so avoid using any other functionality.
```toml
bitcode = { version = "0", features = ["derive"], default-features = false, optional = true }
```
```rust
#[cfg_attr(feature = "bitcode", derive(bitcode::Encode, bitcode::Decode))]
pub struct Vec2 {
x: f32,
y: f32,
}
```

## Tuple vs Array
If you have multiple values of the same type:
- Use a tuple or struct when the values are semantically different: `x: u32, y: u32`
- Use an array when all values are semantically similar: `pixels: [u8; 16]`

## Implementation Details
- Heavily inspired by <https://github.com/That3Percent/tree-buf>
- All instances of each field are grouped together making compression easier
- Uses smaller integers where possible all the way down to 1 bit
- Validation is performed up front on typed vectors before deserialization
- Code is designed to be auto-vectorized by LLVM

## License

Licensed under either of

* Apache License, Version 2.0
([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license
([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
* Apache License, Version 2.0
([LICENSE-APACHE](LICENSE-APACHE) or <http://www.apache.org/licenses/LICENSE-2.0>)
* MIT license
([LICENSE-MIT](LICENSE-MIT) or <http://opensource.org/licenses/MIT>)

at your option.

## Contribution

Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
dual licensed as above, without any additional terms or conditions.
dual licensed as above, without any additional terms or conditions.
5 changes: 2 additions & 3 deletions bitcode_derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "bitcode_derive"
authors = [ "Cai Bear", "Finn Bear" ]
version = "0.5.0"
version = "0.6.0-beta.1"
edition = "2021"
license = "MIT OR Apache-2.0"
repository = "https://github.com/SoftbearStudios/bitcode/"
Expand All @@ -11,7 +11,6 @@ description = "Implementation of #[derive(Encode, Decode)] for bitcode"
proc-macro = true

[dependencies]
packagemerge = "0.1"
proc-macro2 = "1.0"
quote = "1.0"
syn = { version = "2.0.3", features = [ "extra-traits" ] }
syn = { version = "2.0.3", features = [ "extra-traits", "visit-mut" ] }
Loading

0 comments on commit 431b88f

Please sign in to comment.