unicode-org · sffc · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024
@@ -63,7 +63,7 @@ where
             let mut source_variants = source.variants.iter();
             'outer: for raw_variant in raw_variants {
                 for source_variant in source_variants.by_ref() {
-                    match source_variant.strict_cmp(raw_variant.as_bytes()) {
+                    match source_variant.as_str().cmp(raw_variant) {
                         Ordering::Equal => {
                             // The source_variant is equal, move to next raw_variant
                             continue 'outer;
@@ -122,13 +122,11 @@ fn uts35_replacement<'a, I>(
         loop {
             match (sources.peek(), skips.peek(), replacements.peek()) {
                 (Some(&source), Some(skip), _)
-                    if source.strict_cmp(skip.as_bytes()) == Ordering::Greater =>
+                    if source.as_str().cmp(skip) == Ordering::Greater =>
                 {
                     skips.next();
                 }
-                (Some(&source), Some(skip), _)
-                    if source.strict_cmp(skip.as_bytes()) == Ordering::Equal =>
-                {
+                (Some(&source), Some(skip), _) if source.as_str().cmp(skip) == Ordering::Equal => {
                     skips.next();
                     sources.next();
                 }

@@ -311,7 +311,7 @@ impl Keywords {
     /// }
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        writeable::cmp_bytes(self, other)
+        writeable::cmp_utf8(self, other)
     }
 
     pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {

@@ -305,7 +305,7 @@ impl FromStr for Value {
 
 impl PartialEq<&str> for Value {
     fn eq(&self, other: &&str) -> bool {
-        writeable::cmp_bytes(self, other.as_bytes()).is_eq()
+        writeable::cmp_utf8(self, other.as_bytes()).is_eq()
     }
 }
 

@@ -14,6 +14,16 @@ use alloc::borrow::Cow;
 
 /// A core struct representing a [`Unicode BCP47 Language Identifier`].
 ///
+/// # Ordering
+///
+/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
+/// multiple possible orderings. Depending on your use case, two orderings are available:
+///
+/// 1. A string ordering, suitable for stable serialization: [`LanguageIdentifier::strict_cmp`]
+/// 2. A struct ordering, suitable for use with a BTreeSet: [`LanguageIdentifier::total_cmp`]
+///
+/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
+///
 /// # Parsing
 ///
 /// Unicode recognizes three levels of standard conformance for any language identifier:
@@ -30,18 +40,6 @@ use alloc::borrow::Cow;
 /// This operation normalizes syntax to be well-formed. No legacy subtag replacements is performed.
 /// For validation and canonicalization, see `LocaleCanonicalizer`.
 ///
-/// # Ordering
-///
-/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
-/// multiple possible orderings, and the team did not want to favor one over any other.
-///
-/// Instead, there are functions available that return these different orderings:
-///
-/// - [`LanguageIdentifier::strict_cmp`]
-/// - [`LanguageIdentifier::total_cmp`]
-///
-/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
-///
 /// # Examples
 ///
 /// Simple example:
@@ -214,31 +212,44 @@ impl LanguageIdentifier {
     ///
     /// # Examples
     ///
+    /// Sorting a list of langids with this method requires converting one of them to a string:
+    ///
     /// ```
     /// use icu::locale::LanguageIdentifier;
     /// use std::cmp::Ordering;
+    /// use writeable::Writeable;
     ///
+    /// // Random input order:
     /// let bcp47_strings: &[&str] = &[
-    ///     "pl-Latn-PL",
-    ///     "und",
-    ///     "und-Adlm",
-    ///     "und-GB",
-    ///     "und-ZA",
+    ///     "ar-Latn",
+    ///     "zh-Hant-TW",
+    ///     "zh-TW",
     ///     "und-fonipa",
-    ///     "zh",
+    ///     "zh-Hant",
+    ///     "ar-SA",
     /// ];
     ///
-    /// for ab in bcp47_strings.windows(2) {
-    ///     let a = ab[0];
-    ///     let b = ab[1];
-    ///     assert!(a.cmp(b) == Ordering::Less);
-    ///     let a_langid = a.parse::<LanguageIdentifier>().unwrap();
-    ///     assert!(a_langid.strict_cmp(a.as_bytes()) == Ordering::Equal);
-    ///     assert!(a_langid.strict_cmp(b.as_bytes()) == Ordering::Less);
-    /// }
+    /// let mut langids = bcp47_strings.iter().map(|s| s.parse().unwrap()).collect::<Vec<LanguageIdentifier>>();
+    /// langids.sort_by(|a, b| {
+    ///     let b = b.write_to_string();
+    ///     a.strict_cmp(b.as_bytes())
+    /// });
+    /// let strict_cmp_strings = langids.iter().map(|l| l.to_string()).collect::<Vec<String>>();
+    ///
+    /// // Output ordering, sorted alphabetically
+    /// let expected_ordering: &[&str] = &[
+    ///     "ar-Latn",
+    ///     "ar-SA",
+    ///     "und-fonipa",
+    ///     "zh-Hant",
+    ///     "zh-Hant-TW",
+    ///     "zh-TW",
+    /// ];
+    ///
+    /// assert_eq!(expected_ordering, strict_cmp_strings);
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        writeable::cmp_bytes(self, other)
+        writeable::cmp_utf8(self, other)
     }
 
     pub(crate) fn as_tuple(
@@ -260,7 +271,41 @@ impl LanguageIdentifier {
     ///
     /// # Examples
     ///
-    /// Using a wrapper to add one of these to a [`BTreeSet`]:
+    /// This method returns a nonsensical ordering derived from the fields of the struct:
+    ///
+    /// ```
+    /// use icu::locale::LanguageIdentifier;
+    /// use std::cmp::Ordering;
+    ///
+    /// // Input strings, sorted alphabetically
+    /// let bcp47_strings: &[&str] = &[
+    ///     "ar-Latn",
+    ///     "ar-SA",
+    ///     "und-fonipa",
+    ///     "zh-Hant",
+    ///     "zh-Hant-TW",
+    ///     "zh-TW",
+    /// ];
+    /// assert!(bcp47_strings.windows(2).all(|w| w[0] < w[1]));
+    ///
+    /// let mut langids = bcp47_strings.iter().map(|s| s.parse().unwrap()).collect::<Vec<LanguageIdentifier>>();
+    /// langids.sort_by(LanguageIdentifier::total_cmp);
+    /// let total_cmp_strings = langids.iter().map(|l| l.to_string()).collect::<Vec<String>>();
+    ///
+    /// // Output ordering, sorted arbitrarily
+    /// let expected_ordering: &[&str] = &[
+    ///     "ar-SA",
+    ///     "ar-Latn",
+    ///     "und-fonipa",
+    ///     "zh-TW",
+    ///     "zh-Hant",
+    ///     "zh-Hant-TW",
+    /// ];
+    ///
+    /// assert_eq!(expected_ordering, total_cmp_strings);
+    /// ```
+    ///
+    /// Use a wrapper to add a [`LanguageIdentifier`] to a [`BTreeSet`]:
     ///
     /// ```no_run
     /// use icu::locale::LanguageIdentifier;

@@ -21,6 +21,16 @@ use core::str::FromStr;
 /// [`Locale`] exposes all of the same fields and methods as [`LanguageIdentifier`], and
 /// on top of that is able to parse, manipulate and serialize unicode extension fields.
 ///
+/// # Ordering
+///
+/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
+/// multiple possible orderings. Depending on your use case, two orderings are available:
+///
+/// 1. A string ordering, suitable for stable serialization: [`Locale::strict_cmp`]
+/// 2. A struct ordering, suitable for use with a BTreeSet: [`Locale::total_cmp`]
+///
+/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
+///
 /// # Parsing
 ///
 /// Unicode recognizes three levels of standard conformance for a locale:
@@ -37,18 +47,6 @@ use core::str::FromStr;
 /// This operation normalizes syntax to be well-formed. No legacy subtag replacements is performed.
 /// For validation and canonicalization, see `LocaleCanonicalizer`.
 ///
-/// # Ordering
-///
-/// This type deliberately does not implement `Ord` or `PartialOrd` because there are
-/// multiple possible orderings, and the team did not want to favor one over any other.
-///
-/// Instead, there are functions available that return these different orderings:
-///
-/// - [`Locale::strict_cmp`]
-/// - [`Locale::total_cmp`]
-///
-/// See issue: <https://github.com/unicode-org/icu4x/issues/1215>
-///
 /// # Examples
 ///
 /// Simple example:
@@ -198,31 +196,46 @@ impl Locale {
     ///
     /// # Examples
     ///
+    /// Sorting a list of locales with this method requires converting one of them to a string:
+    ///
     /// ```
     /// use icu::locale::Locale;
     /// use std::cmp::Ordering;
+    /// use writeable::Writeable;
     ///
+    /// // Random input order:
     /// let bcp47_strings: &[&str] = &[
-    ///     "pl-Latn-PL",
-    ///     "und",
+    ///     "und-u-ca-hebrew",
+    ///     "ar-Latn",
+    ///     "zh-Hant-TW",
+    ///     "zh-TW",
+    ///     "und-fonipa",
+    ///     "zh-Hant",
+    ///     "ar-SA",
+    /// ];
+    ///
+    /// let mut locales = bcp47_strings.iter().map(|s| s.parse().unwrap()).collect::<Vec<Locale>>();
+    /// locales.sort_by(|a, b| {
+    ///     let b = b.write_to_string();
+    ///     a.strict_cmp(b.as_bytes())
+    /// });
+    /// let strict_cmp_strings = locales.iter().map(|l| l.to_string()).collect::<Vec<String>>();
+    ///
+    /// // Output ordering, sorted alphabetically
+    /// let expected_ordering: &[&str] = &[
+    ///     "ar-Latn",
+    ///     "ar-SA",
     ///     "und-fonipa",
-    ///     "und-t-m0-true",
     ///     "und-u-ca-hebrew",
-    ///     "und-u-ca-japanese",
-    ///     "zh",
+    ///     "zh-Hant",
+    ///     "zh-Hant-TW",
+    ///     "zh-TW",
     /// ];
     ///
-    /// for ab in bcp47_strings.windows(2) {
-    ///     let a = ab[0];
-    ///     let b = ab[1];
-    ///     assert!(a.cmp(b) == Ordering::Less);
-    ///     let a_loc = a.parse::<Locale>().unwrap();
-    ///     assert!(a_loc.strict_cmp(a.as_bytes()) == Ordering::Equal);
-    ///     assert!(a_loc.strict_cmp(b.as_bytes()) == Ordering::Less);
-    /// }
+    /// assert_eq!(expected_ordering, strict_cmp_strings);
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        writeable::cmp_bytes(self, other)
+        writeable::cmp_utf8(self, other)
     }
 
     #[allow(clippy::type_complexity)]
@@ -263,7 +276,43 @@ impl Locale {
     ///
     /// # Examples
     ///
-    /// Using a wrapper to add one of these to a [`BTreeSet`]:
+    /// This method returns a nonsensical ordering derived from the fields of the struct:
+    ///
+    /// ```
+    /// use icu::locale::Locale;
+    /// use std::cmp::Ordering;
+    ///
+    /// // Input strings, sorted alphabetically
+    /// let bcp47_strings: &[&str] = &[
+    ///     "ar-Latn",
+    ///     "ar-SA",
+    ///     "und-fonipa",
+    ///     "und-u-ca-hebrew",
+    ///     "zh-Hant",
+    ///     "zh-Hant-TW",
+    ///     "zh-TW",
+    /// ];
+    /// assert!(bcp47_strings.windows(2).all(|w| w[0] < w[1]));
+    ///
+    /// let mut locales = bcp47_strings.iter().map(|s| s.parse().unwrap()).collect::<Vec<Locale>>();
+    /// locales.sort_by(Locale::total_cmp);
+    /// let total_cmp_strings = locales.iter().map(|l| l.to_string()).collect::<Vec<String>>();
+    ///
+    /// // Output ordering, sorted arbitrarily
+    /// let expected_ordering: &[&str] = &[
+    ///     "ar-SA",
+    ///     "ar-Latn",
+    ///     "und-u-ca-hebrew",
+    ///     "und-fonipa",
+    ///     "zh-TW",
+    ///     "zh-Hant",
+    ///     "zh-Hant-TW",
+    /// ];
+    ///
+    /// assert_eq!(expected_ordering, total_cmp_strings);
+    /// ```
+    ///
+    /// Use a wrapper to add a [`Locale`] to a [`BTreeSet`]:
     ///
     /// ```no_run
     /// use icu::locale::Locale;

@@ -552,7 +552,7 @@ impl DataLocale {
     /// }
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        writeable::cmp_bytes(self, other)
+        writeable::cmp_utf8(self, other)
     }
 
     /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.

@@ -558,7 +558,7 @@ impl DataProvider<MetazoneGenericNamesLongV1Marker> for SourceDataProvider {
                     let Some(location) = locations.get(tz) else {
                         return true;
                     };
-                    writeable::cmp_bytes(
+                    writeable::cmp_utf8(
                         &time_zone_names_resource
                             .region_format
                             .interpolate([location]),
@@ -627,12 +627,12 @@ impl DataProvider<MetazoneSpecificNamesLongV1Marker> for SourceDataProvider {
                         return true;
                     };
                     if zv == ZoneVariant::Daylight {
-                        writeable::cmp_bytes(
+                        writeable::cmp_utf8(
                             &locations.get().pattern_daylight.interpolate([location]),
                             v.as_bytes(),
                         ) != Ordering::Equal
                     } else if zv == ZoneVariant::Standard {
-                        writeable::cmp_bytes(
+                        writeable::cmp_utf8(
                             &locations.get().pattern_standard.interpolate([location]),
                             v.as_bytes(),
                         ) != Ordering::Equal

@@ -134,7 +134,7 @@ fn writeable_benches(c: &mut Criterion) {
             .into_owned()
         });
     });
-    c.bench_function("writeable/cmp_bytes", |b| {
+    c.bench_function("writeable/cmp_str", |b| {
         b.iter(|| {
             let short = black_box(SHORT_STR);
             let medium = black_box(MEDIUM_STR);
@@ -143,7 +143,7 @@ fn writeable_benches(c: &mut Criterion) {
             [short, medium, long, long_overlap].map(|s1| {
                 [short, medium, long, long_overlap].map(|s2| {
                     let message = WriteableMessage { message: s1 };
-                    writeable::cmp_bytes(&message, s2.as_bytes())
+                    writeable::cmp_str(&message, s2)
                 })
             })
         });
@@ -238,10 +238,10 @@ fn complex_benches(c: &mut Criterion) {
         MEDIUM_STR,
         LONG_STR,
     ];
-    c.bench_function("complex/cmp_bytes", |b| {
+    c.bench_function("complex/cmp_str", |b| {
         b.iter(|| {
             black_box(REFERENCE_STRS)
-                .map(|s| writeable::cmp_bytes(black_box(&COMPLEX_WRITEABLE_MEDIUM), s.as_bytes()))
+                .map(|s| writeable::cmp_str(black_box(&COMPLEX_WRITEABLE_MEDIUM), s))
         });
     });
 }