diff --git a/components/collator/src/elements.rs b/components/collator/src/elements.rs index 932679afd33..88088d92568 100644 --- a/components/collator/src/elements.rs +++ b/components/collator/src/elements.rs @@ -33,7 +33,19 @@ use crate::provider::CollationDataV1; /// Marker that a complex decomposition isn't round-trippable /// under re-composition. -const NON_ROUND_TRIP_MARKER: u16 = 1; +/// +/// TODO: When taking a data format break, swap this and +/// `BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER` around +/// to make backward-combiningness use the same bit in all +/// cases. +const NON_ROUND_TRIP_MARKER: u16 = 0b1; + +/// Marker that a complex decomposition starts with a starter +/// that can combine backwards. +const BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER: u16 = 0b10; + +/// Values above this are treated as a BMP character. +const HIGHEST_MARKER: u16 = NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER; /// Marker value for U+FDFA in NFKD const FDFA_MARKER: u16 = 3; @@ -1038,7 +1050,7 @@ where } else { let trail_or_complex = (decomposition >> 16) as u16; let lead = decomposition as u16; - if lead > NON_ROUND_TRIP_MARKER && trail_or_complex != 0 { + if lead > HIGHEST_MARKER && trail_or_complex != 0 { // Decomposition into two BMP characters: starter and non-starter self.upcoming.push( CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(char_from_u16( @@ -1052,7 +1064,7 @@ where low_c, trie_value, ), ); - } else if lead > NON_ROUND_TRIP_MARKER { + } else if trail_or_complex == 0 { debug_assert_ne!( lead, FDFA_MARKER, "How come U+FDFA NFKD marker seen in NFD?" @@ -1334,7 +1346,7 @@ where } else { let trail_or_complex = (decomposition >> 16) as u16; let lead = decomposition as u16; - if lead > NON_ROUND_TRIP_MARKER && trail_or_complex != 0 { + if lead > HIGHEST_MARKER && trail_or_complex != 0 { // Decomposition into two BMP characters: starter and non-starter c = char_from_u16(lead); ce32 = data.ce32_for_char(c); @@ -1397,9 +1409,11 @@ where } combining_characters .push(CharacterAndClass::new_with_placeholder(combining)); - } else if lead > NON_ROUND_TRIP_MARKER { - debug_assert_ne!(lead, 1, "How come U+FDFA NFKD marker seen in NFD?"); - debug_assert_ne!(lead, 2, "How come non-starter marker seen here?"); + } else if trail_or_complex == 0 { + debug_assert_ne!( + lead, FDFA_MARKER, + "How come U+FDFA NFKD marker seen in NFD?" + ); // Decomposition into one BMP character c = char_from_u16(lead); ce32 = data.ce32_for_char(c);