-
-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use Intl.Segmenter
#35
Comments
Is there a formal specification of how this feature works? If implemented simply, I would expect const segmenter = new Intl.Segmenter('en');
const text = 'Slice a string with ANSI escape codes';
console.log(sliceAnsi(text, 0, 10, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 11, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 12, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 13, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 14, { segmenter })); // 'Slice a string' So far, the output is as expected. But what about the following? console.log(sliceAnsi(text, 1, 10, { segmenter })); // 'lice a'
console.log(sliceAnsi(text, 1, 11, { segmenter })); // 'lice a'
console.log(sliceAnsi(text, 1, 12, { segmenter })); // 'lice a'
console.log(sliceAnsi(text, 1, 13, { segmenter })); // 'lice a string'
console.log(sliceAnsi(text, 1, 14, { segmenter })); // 'lice a string' The second argument of My gut feeling is that this behavior is confusing to the user and this feature should not be implemented in Thanks for a nice library! |
@mizdra I think you are confused. |
Intl.Segmenter
when targeting Node.js 16Intl.Segmenter
@sindresorhus yeah, default adopting Intl.Segmenter would allow stopping exploding country flags excerpt from version 5 that got in my repository via https://www.npmjs.com/package/cli-truncate (checked that latest version behaves the same way but likely in more performant way):
but
however for Segmenter flag it's single unsplittable grapheme Here is the spike for slice-ansi@5 that solved my problem: diff --git a/node_modules/slice-ansi/index.js b/node_modules/slice-ansi/index.js
index e10af34..f6bbf20 100755
--- a/node_modules/slice-ansi/index.js
+++ b/node_modules/slice-ansi/index.js
@@ -50,8 +50,26 @@ const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => {
return output.join('');
};
+function isRegionalIndicator(string) {
+ for (const point of string) {
+ let number = point.codePointAt(0)
+ // U+1F1E6 🇦 REGIONAL INDICATOR SYMBOL LETTER A
+ // U+1F1FF 🇿 REGIONAL INDICATOR SYMBOL LETTER Z
+ if (!(0x1F1E6 <= number && number <= 0x1F1FF)) {
+ return false
+ }
+ }
+ return true
+}
+
export default function sliceAnsi(string, begin, end) {
- const characters = [...string];
+ const characters = Array.from(
+ (function* () {
+ for (let grapheme of new Intl.Segmenter('en', { granularity: 'grapheme' }).segment(string)) {
+ yield grapheme.segment
+ }
+ })()
+ )
const ansiCodes = [];
let stringEnd = typeof end === 'number' ? end : characters.length;
@@ -83,7 +101,7 @@ export default function sliceAnsi(string, begin, end) {
visible++;
}
- if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) {
+ if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt()) || isRegionalIndicator(character)) {
visible++;
if (typeof end !== 'number') { |
Related issue: sindresorhus/string-length#14
The text was updated successfully, but these errors were encountered: