Skip to content

Commit

Permalink
Create two different Env for master3 and the normal branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Aug 21, 2024
1 parent e3a82b5 commit bccccaf
Show file tree
Hide file tree
Showing 6 changed files with 835 additions and 816 deletions.
2 changes: 1 addition & 1 deletion examples/all-types-heed3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use heed3::{Database, EnvOpenOptions};
use serde::{Deserialize, Serialize};

fn main() -> Result<(), Box<dyn Error>> {
let path = Path::new("target").join("heed.mdb");
let path = Path::new("target").join("heed3.mdb");

fs::create_dir_all(&path)?;

Expand Down
75 changes: 34 additions & 41 deletions heed/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
[package]
name = "heed"
name = "heed3"
version = "0.20.5"
authors = ["Kerollmops <[email protected]>"]
description = "A fully typed LMDB wrapper with minimum overhead"
description = "A fully typed LMDB wrapper with minimum overhead and optional support for encryption"
license = "MIT"
repository = "https://github.com/Kerollmops/heed"
keywords = ["lmdb", "database", "storage", "typed"]
keywords = ["lmdb", "database", "storage", "typed", "encryption"]
categories = ["database", "data-structures"]
readme = "../README.md"
edition = "2021"

[dependencies]
# TODO update dependencies
aead = { version = "0.5.1", default-features = false, optional = true }
bitflags = { version = "2.6.0", features = ["serde"] }
byteorder = { version = "1.5.0", default-features = false }
generic-array = { version = "0.14.6", features = ["serde"], optional = true }
heed-master3-proc-macro = { path = "../heed-master3-proc-macro", optional = true }
heed-traits = { version = "0.20.0", path = "../heed-traits" }
heed-types = { version = "0.20.1", default-features = false, path = "../heed-types" }
libc = "0.2.155"
lmdb-master-sys = { version = "0.2.4", path = "../lmdb-master-sys" }
lmdb-master3-sys = { version = "0.2.4", path = "../lmdb-master3-sys" }
once_cell = "1.19.0"
page_size = "0.6.0"
serde = { version = "1.0.203", features = ["derive"], optional = true }
synchronoise = "1.0.1"

[dev-dependencies]
# TODO update dependencies
argon2 = { version = "0.4.1", features = ["std"] }
serde = { version = "1.0.203", features = ["derive"] }
chacha20poly1305 = "0.10.1"
tempfile = "3.10.1"

[target.'cfg(windows)'.dependencies]
Expand All @@ -32,7 +39,13 @@ url = "2.5.2"
[features]
# The `serde` feature makes some types serializable,
# like the `EnvOpenOptions` struct.
default = ["serde", "serde-bincode", "serde-json"]
# TODO remove encryption from defaults
default = ["serde", "serde-bincode", "serde-json", "encryption"]

# Enable the LMDB encryption feature
# TODO add more information here
encryption = ["dep:heed-master3-proc-macro", "dep:aead", "dep:generic-array"]

serde = ["bitflags/serde", "dep:serde"]

# The #MDB_NOTLS flag is automatically set on Env opening,
Expand Down Expand Up @@ -68,7 +81,7 @@ unbounded_depth = ["heed-types/unbounded_depth"]
# There are tradeoffs for both POSIX and SysV semaphores; which you
# should look into before enabling this feature. Also, see here:
# <https://github.com/LMDB/lmdb/blob/3947014aed7ffe39a79991fa7fb5b234da47ad1a/libraries/liblmdb/lmdb.h#L46-L69>
posix-sem = ["lmdb-master-sys/posix-sem"]
posix-sem = ["lmdb-master3-sys/posix-sem"]

# These features configure the MDB_IDL_LOGN macro, which determines
# the size of the free and dirty page lists (and thus the amount of memory
Expand All @@ -80,15 +93,15 @@ posix-sem = ["lmdb-master-sys/posix-sem"]
#
# For more information on the motivation for these features (and their effect),
# see https://github.com/mozilla/lmdb/pull/2.
mdb_idl_logn_8 = ["lmdb-master-sys/mdb_idl_logn_8"]
mdb_idl_logn_9 = ["lmdb-master-sys/mdb_idl_logn_9"]
mdb_idl_logn_10 = ["lmdb-master-sys/mdb_idl_logn_10"]
mdb_idl_logn_11 = ["lmdb-master-sys/mdb_idl_logn_11"]
mdb_idl_logn_12 = ["lmdb-master-sys/mdb_idl_logn_12"]
mdb_idl_logn_13 = ["lmdb-master-sys/mdb_idl_logn_13"]
mdb_idl_logn_14 = ["lmdb-master-sys/mdb_idl_logn_14"]
mdb_idl_logn_15 = ["lmdb-master-sys/mdb_idl_logn_15"]
mdb_idl_logn_16 = ["lmdb-master-sys/mdb_idl_logn_16"]
mdb_idl_logn_8 = ["lmdb-master3-sys/mdb_idl_logn_8"]
mdb_idl_logn_9 = ["lmdb-master3-sys/mdb_idl_logn_9"]
mdb_idl_logn_10 = ["lmdb-master3-sys/mdb_idl_logn_10"]
mdb_idl_logn_11 = ["lmdb-master3-sys/mdb_idl_logn_11"]
mdb_idl_logn_12 = ["lmdb-master3-sys/mdb_idl_logn_12"]
mdb_idl_logn_13 = ["lmdb-master3-sys/mdb_idl_logn_13"]
mdb_idl_logn_14 = ["lmdb-master3-sys/mdb_idl_logn_14"]
mdb_idl_logn_15 = ["lmdb-master3-sys/mdb_idl_logn_15"]
mdb_idl_logn_16 = ["lmdb-master3-sys/mdb_idl_logn_16"]

# Setting this enables you to use keys longer than 511 bytes. The exact limit
# is computed by LMDB at compile time. You can find the exact value by calling
Expand All @@ -108,35 +121,15 @@ mdb_idl_logn_16 = ["lmdb-master-sys/mdb_idl_logn_16"]
# stored key must fit within the smallest limit of all architectures used. For
# example, if you are moving databases between Apple M1 and Apple Intel
# computers then you need to keep your keys within the smaller 1982 byte limit.
longer-keys = ["lmdb-master-sys/longer-keys"]
longer-keys = ["lmdb-master3-sys/longer-keys"]

# Examples are located outside the standard heed/examples directory to prevent
# conflicts between heed3 and heed examples when working on both crates.
[[example]]
name = "all-types"
path = "../examples/all-types.rs"

[[example]]
name = "clear-database"
path = "../examples/clear-database.rs"

[[example]]
name = "cursor-append"
path = "../examples/cursor-append.rs"

[[example]]
name = "custom-comparator"
path = "../examples/custom-comparator.rs"

[[example]]
name = "multi-env"
path = "../examples/multi-env.rs"

[[example]]
name = "nested"
path = "../examples/nested.rs"
name = "all-types-heed3"
path = "../examples/all-types-heed3.rs"

[[example]]
name = "rmp-serde"
path = "../examples/rmp-serde.rs"
required-features = ["serde-rmp"]
name = "encrypt-heed3"
path = "../examples/encrypt-heed3.rs"
required-features = ["encryption"]
247 changes: 247 additions & 0 deletions heed/src/env/clear.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
use std::collections::hash_map::Entry;
use std::ffi::CString;
#[cfg(windows)]
use std::ffi::OsStr;
use std::io::ErrorKind::NotFound;
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use std::sync::Arc;
use std::{io, ptr};

use synchronoise::SignalEvent;

use crate::env::{canonicalize_path, Env, EnvFlags, EnvInner, OPENED_ENV};
use crate::mdb::ffi;
use crate::mdb::lmdb_error::mdb_result;
use crate::{Error, Result};

pub struct EnvEntry {
pub(super) env: Option<Env>,
pub(super) signal_event: Arc<SignalEvent>,
pub(super) options: EnvOpenOptions,
}

/// Options and flags which can be used to configure how an environment is opened.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EnvOpenOptions {
map_size: Option<usize>,
max_readers: Option<u32>,
max_dbs: Option<u32>,
flags: EnvFlags,
}

impl Default for EnvOpenOptions {
fn default() -> Self {
Self::new()
}
}

impl EnvOpenOptions {
/// Creates a blank new set of options ready for configuration.
pub fn new() -> EnvOpenOptions {
EnvOpenOptions {
map_size: None,
max_readers: None,
max_dbs: None,
flags: EnvFlags::empty(),
}
}
}

impl EnvOpenOptions {
/// Set the size of the memory map to use for this environment.
pub fn map_size(&mut self, size: usize) -> &mut Self {
self.map_size = Some(size);
self
}

/// Set the maximum number of threads/reader slots for the environment.
pub fn max_readers(&mut self, readers: u32) -> &mut Self {
self.max_readers = Some(readers);
self
}

/// Set the maximum number of named databases for the environment.
pub fn max_dbs(&mut self, dbs: u32) -> &mut Self {
self.max_dbs = Some(dbs);
self
}

/// Set one or [more LMDB flags](http://www.lmdb.tech/doc/group__mdb__env.html).
///
/// ```
/// use std::fs;
/// use std::path::Path;
/// use heed::{EnvOpenOptions, Database, EnvFlags};
/// use heed::types::*;
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// fs::create_dir_all(Path::new("target").join("database.mdb"))?;
/// let mut env_builder = EnvOpenOptions::new();
/// unsafe { env_builder.flags(EnvFlags::NO_TLS | EnvFlags::NO_META_SYNC); }
/// let dir = tempfile::tempdir().unwrap();
/// let env = unsafe { env_builder.open(dir.path())? };
///
/// // we will open the default unamed database
/// let mut wtxn = env.write_txn()?;
/// let db: Database<Str, U32<byteorder::NativeEndian>> = env.create_database(&mut wtxn, None)?;
///
/// // opening a write transaction
/// db.put(&mut wtxn, "seven", &7)?;
/// db.put(&mut wtxn, "zero", &0)?;
/// db.put(&mut wtxn, "five", &5)?;
/// db.put(&mut wtxn, "three", &3)?;
/// wtxn.commit()?;
///
/// // Force the OS to flush the buffers (see Flag::NoSync and Flag::NoMetaSync).
/// env.force_sync();
///
/// // opening a read transaction
/// // to check if those values are now available
/// let mut rtxn = env.read_txn()?;
///
/// let ret = db.get(&rtxn, "zero")?;
/// assert_eq!(ret, Some(0));
///
/// let ret = db.get(&rtxn, "five")?;
/// assert_eq!(ret, Some(5));
/// # Ok(()) }
/// ```
///
/// # Safety
///
/// It is unsafe to use unsafe LMDB flags such as `NO_SYNC`, `NO_META_SYNC`, or `NO_LOCK`.
pub unsafe fn flags(&mut self, flags: EnvFlags) -> &mut Self {
self.flags |= flags;
self
}

/// Open an environment that will be located at the specified path.
///
/// # Safety
/// LMDB is backed by a memory map [^1] which comes with some safety precautions.
///
/// Memory map constructors are marked `unsafe` because of the potential
/// for Undefined Behavior (UB) using the map if the underlying file is
/// subsequently modified, in or out of process.
///
/// LMDB itself has a locking system that solves this problem,
/// but it will not save you from making mistakes yourself.
///
/// These are some things to take note of:
///
/// - Avoid long-lived transactions, they will cause the database to grow quickly [^2]
/// - Avoid aborting your process with an active transaction [^3]
/// - Do not use LMDB on remote filesystems, even between processes on the same host [^4]
/// - You must manage concurrent accesses yourself if using [`EnvFlags::NO_LOCK`] [^5]
/// - Anything that causes LMDB's lock file to be broken will cause synchronization issues and may introduce UB [^6]
///
/// `heed` itself upholds some safety invariants, including but not limited to:
/// - Calling [`EnvOpenOptions::open`] twice in the same process, at the same time is OK [^7]
///
/// For more details, it is highly recommended to read LMDB's official documentation. [^8]
///
/// [^1]: <https://en.wikipedia.org/wiki/Memory_map>
/// [^2]: <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L107-L114>
/// [^3]: <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L118-L121>
/// [^4]: <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L129>
/// [^5]: <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L129>
/// [^6]: <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L49-L52>
/// [^7]: <https://github.com/LMDB/lmdb/blob/b8e54b4c31378932b69f1298972de54a565185b1/libraries/liblmdb/lmdb.h#L102-L105>
/// [^8]: <http://www.lmdb.tech/doc/index.html>
pub unsafe fn open<P: AsRef<Path>>(&self, path: P) -> Result<Env> {
let mut lock = OPENED_ENV.write().unwrap();

let path = match canonicalize_path(path.as_ref()) {
Err(err) => {
if err.kind() == NotFound && self.flags.contains(EnvFlags::NO_SUB_DIR) {
let path = path.as_ref();
match path.parent().zip(path.file_name()) {
Some((dir, file_name)) => canonicalize_path(dir)?.join(file_name),
None => return Err(err.into()),
}
} else {
return Err(err.into());
}
}
Ok(path) => path,
};

match lock.entry(path) {
Entry::Occupied(entry) => {
let env = entry.get().env.clone().ok_or(Error::DatabaseClosing)?;
let options = entry.get().options.clone();
if &options == self {
Ok(env)
} else {
Err(Error::BadOpenOptions { env, options })
}
}
Entry::Vacant(entry) => {
let path = entry.key();
let path_str = CString::new(path.as_os_str().as_bytes()).unwrap();

unsafe {
let mut env: *mut ffi::MDB_env = ptr::null_mut();
mdb_result(ffi::mdb_env_create(&mut env))?;

if let Some(size) = self.map_size {
if size % page_size::get() != 0 {
let msg = format!(
"map size ({}) must be a multiple of the system page size ({})",
size,
page_size::get()
);
return Err(Error::Io(io::Error::new(
io::ErrorKind::InvalidInput,
msg,
)));
}
mdb_result(ffi::mdb_env_set_mapsize(env, size))?;
}

if let Some(readers) = self.max_readers {
mdb_result(ffi::mdb_env_set_maxreaders(env, readers))?;
}

if let Some(dbs) = self.max_dbs {
mdb_result(ffi::mdb_env_set_maxdbs(env, dbs))?;
}

// When the `read-txn-no-tls` feature is enabled, we must force LMDB
// to avoid using the thread local storage, this way we allow users
// to use references of RoTxn between threads safely.
let flags = if cfg!(feature = "read-txn-no-tls") {
self.flags | EnvFlags::NO_TLS
} else {
self.flags
};

let result =
mdb_result(ffi::mdb_env_open(env, path_str.as_ptr(), flags.bits(), 0o600));

match result {
Ok(()) => {
let signal_event = Arc::new(SignalEvent::manual(false));
let inner = EnvInner { env, path: path.clone() };
let env = Env(Arc::new(inner));
let cache_entry = EnvEntry {
env: Some(env.clone()),
options: self.clone(),
signal_event,
};
entry.insert(cache_entry);
Ok(env)
}
Err(e) => {
ffi::mdb_env_close(env);
Err(e.into())
}
}
}
}
}
}
}
Loading

0 comments on commit bccccaf

Please sign in to comment.