Skip to content

Commit

Permalink
Remove BlobSchemaV1 (partial progress on #5603)
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth committed Sep 27, 2024
1 parent 775f83c commit f9888f8
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 163 deletions.
118 changes: 0 additions & 118 deletions provider/blob/src/blob_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ use zerovec::vecs::{Index16, Index32, VarZeroSlice, VarZeroVec, VarZeroVecFormat
#[cfg_attr(feature = "export", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub(crate) enum BlobSchema<'data> {
#[serde(borrow)]
V001(BlobSchemaV1<'data>),
#[serde(borrow)]
V002(BlobSchemaV2<'data, Index16>),
#[serde(borrow)]
Expand All @@ -41,7 +39,6 @@ impl<'data> BlobSchema<'data> {

pub fn load(&self, marker: DataMarkerInfo, req: DataRequest) -> Result<&'data [u8], DataError> {
match self {
BlobSchema::V001(s) => s.load(marker, req),
BlobSchema::V002(s) => s.load(marker, req),
BlobSchema::V002Bigger(s) => s.load(marker, req),
}
Expand All @@ -52,7 +49,6 @@ impl<'data> BlobSchema<'data> {
marker: DataMarkerInfo,
) -> Result<BTreeSet<DataIdentifierCow>, DataError> {
match self {
BlobSchema::V001(s) => s.iter_ids(marker),
BlobSchema::V002(s) => s.iter_ids(marker),
BlobSchema::V002Bigger(s) => s.iter_ids(marker),
}
Expand All @@ -61,126 +57,12 @@ impl<'data> BlobSchema<'data> {
#[cfg(debug_assertions)]
fn check_invariants(&self) {
match self {
BlobSchema::V001(s) => s.check_invariants(),
BlobSchema::V002(s) => s.check_invariants(),
BlobSchema::V002Bigger(s) => s.check_invariants(),
}
}
}

/// Version 1 of the ICU4X data blob schema.
#[derive(Clone, Copy, Debug, serde::Deserialize, yoke::Yokeable)]
#[yoke(prove_covariance_manually)]
#[cfg_attr(feature = "export", derive(serde::Serialize))]
pub(crate) struct BlobSchemaV1<'data> {
/// Map from marker hash and locale to buffer index.
/// Weak invariant: the `usize` values are valid indices into `self.buffers`
/// Weak invariant: there is at least one value for every integer in 0..self.buffers.len()
#[serde(borrow)]
pub markers: ZeroMap2dBorrowed<'data, DataMarkerPathHash, Index32U8, usize>,
/// Vector of buffers
#[serde(borrow)]
pub buffers: &'data VarZeroSlice<[u8], Index32>,
}

impl Default for BlobSchemaV1<'_> {
fn default() -> Self {
Self {
markers: ZeroMap2dBorrowed::new(),
buffers: VarZeroSlice::new_empty(),
}
}
}

impl<'data> BlobSchemaV1<'data> {
pub fn load(&self, marker: DataMarkerInfo, req: DataRequest) -> Result<&'data [u8], DataError> {
let idx = self
.markers
.get0(&marker.path.hashed())
.ok_or(DataErrorKind::MarkerNotFound)
.and_then(|cursor| {
if marker.is_singleton && !req.id.locale.is_default() {
return Err(DataErrorKind::InvalidRequest);
}
cursor
.get1_copied_by(|k| {
struct Comparator<'a>(&'a DataLocale, &'a DataMarkerAttributes);
impl writeable::Writeable for Comparator<'_> {
fn write_to<W: core::fmt::Write + ?Sized>(
&self,
sink: &mut W,
) -> core::fmt::Result {
self.0.write_to(sink)?;
if !self.1.is_empty() {
sink.write_char(REQUEST_SEPARATOR)?;
sink.write_str(self.1)?;
}
Ok(())
}
}
Comparator(req.id.locale, req.id.marker_attributes)
.writeable_cmp_bytes(&k.0)
.reverse()
})
.ok_or(DataErrorKind::IdentifierNotFound)
})
.map_err(|kind| kind.with_req(marker, req))?;
self.buffers
.get(idx)
.ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(marker, req))
}

pub fn iter_ids(
&self,
marker: DataMarkerInfo,
) -> Result<BTreeSet<DataIdentifierCow>, DataError> {
Ok(self
.markers
.get0(&marker.path.hashed())
.ok_or_else(|| DataErrorKind::MarkerNotFound.with_marker(marker))?
.iter1_copied()
.filter_map(|(s, _)| core::str::from_utf8(&s.0).ok())
.filter_map(|s| {
#[allow(unused_imports)]
use alloc::borrow::ToOwned;
if let Some((locale, attrs)) = s.split_once(REQUEST_SEPARATOR) {
Some(DataIdentifierCow::from_owned(
DataMarkerAttributes::try_from_str(attrs).ok()?.to_owned(),
locale.parse().ok()?,
))
} else {
Some(DataIdentifierCow::from_locale(s.parse().ok()?))
}
})
.collect())
}

/// Verifies the weak invariants using debug assertions
#[cfg(debug_assertions)]
fn check_invariants(&self) {
if self.markers.is_empty() && self.buffers.is_empty() {
return;
}
// Note: We could check that every index occurs at least once, but that's a more expensive
// operation, so we will just check for the min and max index.
let mut seen_min = false;
let mut seen_max = self.buffers.is_empty();
for cursor in self.markers.iter0() {
for (locale, idx) in cursor.iter1_copied() {
debug_assert!(idx < self.buffers.len() || locale == Index32U8::SENTINEL);
if idx == 0 {
seen_min = true;
}
if idx + 1 == self.buffers.len() {
seen_max = true;
}
}
}
debug_assert!(seen_min);
debug_assert!(seen_max);
}
}

/// Version 2 of the ICU4X data blob schema.
///
/// This itself has two modes, using [`Index16`] or [`Index32`] buffers for the locales array.
Expand Down
45 changes: 0 additions & 45 deletions provider/blob/src/export/blob_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use zerovec::ZeroVec;
use postcard::ser_flavors::{AllocVec, Flavor};

enum VersionConfig {
V001,
V002,
}

Expand Down Expand Up @@ -129,7 +128,6 @@ impl DataExporter for BlobExporter<'_> {

fn close(&mut self) -> Result<(), DataError> {
match self.version {
VersionConfig::V001 => self.close_v1(),
VersionConfig::V002 => self.close_v2(),
}
}
Expand Down Expand Up @@ -171,49 +169,6 @@ impl BlobExporter<'_> {
FinalizedBuffers { vzv, remap }
}

fn close_v1(&mut self) -> Result<(), DataError> {
let FinalizedBuffers { vzv, remap } = self.finalize_buffers();

// Now build up the ZeroMap2d, changing old ID to new ID
let mut zm = self
.resources
.get_mut()
.expect("poison")
.iter()
.flat_map(|(hash, sub_map)| {
sub_map
.iter()
.map(|(locale, old_id)| (*hash, locale, old_id))
})
.map(|(hash, locale, old_id)| {
(
hash,
Index32U8::parse_byte_slice(locale)
.expect("[u8] to IndexU32U8 should never fail"),
remap.get(old_id).expect("in-bound index"),
)
})
.collect::<ZeroMap2d<DataMarkerPathHash, Index32U8, usize>>();

for marker in self.all_markers.lock().expect("poison").iter() {
if zm.get0(marker).is_none() {
zm.insert(marker, Index32U8::SENTINEL, &vzv.len());
}
}

if !zm.is_empty() {
let blob = BlobSchema::V001(BlobSchemaV1 {
markers: zm.as_borrowed(),
buffers: &vzv,
});
log::info!("Serializing blob to output stream...");

let output = postcard::to_allocvec(&blob)?;
self.sink.write_all(&output)?;
}
Ok(())
}

fn close_v2(&mut self) -> Result<(), DataError> {
let FinalizedBuffers { vzv, remap } = self.finalize_buffers();

Expand Down

0 comments on commit f9888f8

Please sign in to comment.