diff --git a/Sharprompt.Tests/EastAsianWidthTests.cs b/Sharprompt.Tests/EastAsianWidthTests.cs index f4cbe35..d8a15cd 100644 --- a/Sharprompt.Tests/EastAsianWidthTests.cs +++ b/Sharprompt.Tests/EastAsianWidthTests.cs @@ -16,6 +16,13 @@ public class EastAsianWidthTests [InlineData("🍣", 2)] [InlineData("🍣🍖🥂", 6)] [InlineData("aあ𩸽🍣", 7)] + [InlineData("\u200D", 0)] + [InlineData("❤️", 2)] + [InlineData("👍🏻", 2)] + [InlineData("1️⃣", 2)] + [InlineData("🇯🇵", 2)] + [InlineData("👩‍💻", 2)] + [InlineData("👨‍👩‍👧‍👦", 2)] public void GetWidth(string value, int width) { Assert.Equal(width, value.GetWidth()); diff --git a/Sharprompt.Tests/TextInputBufferTests.cs b/Sharprompt.Tests/TextInputBufferTests.cs index d5fcb4a..40bb001 100644 --- a/Sharprompt.Tests/TextInputBufferTests.cs +++ b/Sharprompt.Tests/TextInputBufferTests.cs @@ -35,6 +35,8 @@ public void Insert(string value, int length) [InlineData("𩸽𠈻𠮷", "𩸽𠈻")] [InlineData("🍣🍖🥂", "🍣🍖")] [InlineData("aあ𩸽🍣", "aあ𩸽")] + [InlineData("👩‍💻", "")] + [InlineData("a👩‍💻", "a")] public void Backspace(string value, string substring) { var textInputBuffer = new TextInputBuffer(); @@ -57,6 +59,8 @@ public void Backspace(string value, string substring) [InlineData("𩸽𠈻𠮷", "𠈻𠮷")] [InlineData("🍣🍖🥂", "🍖🥂")] [InlineData("aあ𩸽🍣", "あ𩸽🍣")] + [InlineData("👩‍💻", "")] + [InlineData("👩‍💻a", "a")] public void Delete(string value, string substring) { var textInputBuffer = new TextInputBuffer(); @@ -81,6 +85,7 @@ public void Delete(string value, string substring) [InlineData("𩸽𠈻𠮷", "𩸽𠈻", "𠮷")] [InlineData("🍣🍖🥂", "🍣🍖", "🥂")] [InlineData("aあ𩸽🍣", "aあ𩸽", "🍣")] + [InlineData("a👩‍💻", "a", "👩‍💻")] public void MoveBackward(string value, string backward, string forward) { var textInputBuffer = new TextInputBuffer(); @@ -102,6 +107,7 @@ public void MoveBackward(string value, string backward, string forward) [InlineData("𩸽𠈻𠮷", "𩸽", "𠈻𠮷")] [InlineData("🍣🍖🥂", "🍣", "🍖🥂")] [InlineData("aあ𩸽🍣", "a", "あ𩸽🍣")] + [InlineData("👩‍💻a", "👩‍💻", "a")] public void MoveForward(string value, string backward, string forward) { var textInputBuffer = new TextInputBuffer(); @@ -209,6 +215,7 @@ public void MoveToNextWord(string value, string backward, string forward) [InlineData("aあ_𩸽🍣", "", "𩸽🍣")] [InlineData("aあ𩸽_🍣", "", "🍣")] [InlineData("aあ𩸽🍣_", "", "")] + [InlineData("a👩‍💻_", "", "")] [InlineData("_ abc def ", "", " abc def ")] [InlineData(" _abc def ", "", "abc def ")] [InlineData(" a_bc def ", " ", "bc def ")] diff --git a/Sharprompt/Internal/EastAsianWidth.cs b/Sharprompt/Internal/EastAsianWidth.cs index a2b6991..fe636c4 100644 --- a/Sharprompt/Internal/EastAsianWidth.cs +++ b/Sharprompt/Internal/EastAsianWidth.cs @@ -1,10 +1,31 @@ -namespace Sharprompt.Internal; +using System.Globalization; +using System.Text; + +namespace Sharprompt.Internal; internal static class EastAsianWidth { + private const char ZeroWidthJoiner = '\u200D'; + public static int GetWidth(this string value) { var width = 0; + var textElementEnumerator = StringInfo.GetTextElementEnumerator(value); + + while (textElementEnumerator.MoveNext()) + { + width += GetTextElementWidth(textElementEnumerator.GetTextElement()); + } + + return width; + } + + private static int GetTextElementWidth(string value) + { + var width = 0; + var hasVisibleCodePoint = false; + var hasEmojiSequence = false; + var regionalIndicatorCount = 0; for (var i = 0; i < value.Length; i++) { @@ -21,13 +42,63 @@ public static int GetWidth(this string value) codePoint = value[i]; } - width += GetWidth(codePoint); + if (IsEmojiSequenceCodePoint(codePoint)) + { + hasEmojiSequence = true; + } + + if (IsRegionalIndicator(codePoint)) + { + regionalIndicatorCount++; + } + + var codePointWidth = GetWidth(codePoint); + + if (codePointWidth > 0) + { + hasVisibleCodePoint = true; + width += codePointWidth; + } + } + + if (!hasVisibleCodePoint) + { + return 0; + } + + if (hasEmojiSequence || regionalIndicatorCount == 2) + { + return 2; } return width; } - private static int GetWidth(uint codePoint) => IsFullWidth(codePoint) ? 2 : 1; + private static int GetWidth(uint codePoint) => IsZeroWidth(codePoint) ? 0 : IsFullWidth(codePoint) ? 2 : 1; + + private static bool IsZeroWidth(uint codePoint) + { + if (!Rune.TryCreate((int)codePoint, out var rune)) + { + return false; + } + + return Rune.GetUnicodeCategory(rune) is UnicodeCategory.Control or UnicodeCategory.Format or UnicodeCategory.NonSpacingMark or UnicodeCategory.EnclosingMark; + } + + private static bool IsEmojiSequenceCodePoint(uint codePoint) => + codePoint == ZeroWidthJoiner || + IsVariationSelector(codePoint) || + IsEmojiModifier(codePoint) || + codePoint == 0x20E3; + + private static bool IsVariationSelector(uint codePoint) => + (codePoint >= 0xFE00 && codePoint <= 0xFE0F) || + (codePoint >= 0xE0100 && codePoint <= 0xE01EF); + + private static bool IsEmojiModifier(uint codePoint) => codePoint is >= 0x1F3FB and <= 0x1F3FF; + + private static bool IsRegionalIndicator(uint codePoint) => codePoint is >= 0x1F1E6 and <= 0x1F1FF; private static bool IsFullWidth(uint codePoint) { diff --git a/Sharprompt/Internal/TextInputBuffer.cs b/Sharprompt/Internal/TextInputBuffer.cs index bac9888..951e2d2 100644 --- a/Sharprompt/Internal/TextInputBuffer.cs +++ b/Sharprompt/Internal/TextInputBuffer.cs @@ -1,12 +1,15 @@ -using System.Text; +using System.Globalization; +using System.Text; namespace Sharprompt.Internal; internal class TextInputBuffer { private readonly StringBuilder _inputBuffer = new(); + private int[] _textElementStarts = []; private int _position; + private bool _isTextElementStartsDirty = true; public int Length => _inputBuffer.Length; @@ -14,114 +17,164 @@ internal class TextInputBuffer public bool IsEnd => _position == _inputBuffer.Length; - public void Insert(char value) => _inputBuffer.Insert(_position++, value); + public void Insert(char value) + { + _inputBuffer.Insert(_position++, value); + _isTextElementStartsDirty = true; + } public void Backspace() { - var count = 1; - - if (char.IsLowSurrogate(_inputBuffer[--_position])) - { - count++; - _position--; - } + var start = GetPreviousTextElementStart(_position); + var count = _position - start; + _position = start; _inputBuffer.Remove(_position, count); + _isTextElementStartsDirty = true; } public void Delete() { - var count = 1; - - if (char.IsHighSurrogate(_inputBuffer[_position])) - { - count++; - } + var end = GetNextTextElementEnd(_position); - _inputBuffer.Remove(_position, count); + _inputBuffer.Remove(_position, end - _position); + _isTextElementStartsDirty = true; } public void BackspaceWord() { var count = 0; - while (_position > 0 && char.IsWhiteSpace(_inputBuffer[_position - 1])) + while (_position > 0) { - _position--; - count++; + var start = GetPreviousTextElementStart(_position); + + if (!IsWhiteSpace(start, _position - start)) + { + break; + } + + count += _position - start; + _position = start; } - while (_position > 0 && !char.IsWhiteSpace(_inputBuffer[_position - 1])) + while (_position > 0) { - _position--; - count++; + var start = GetPreviousTextElementStart(_position); + + if (IsWhiteSpace(start, _position - start)) + { + break; + } + + count += _position - start; + _position = start; } _inputBuffer.Remove(_position, count); + _isTextElementStartsDirty = true; } public void DeleteWord() { var count = 0; - while (_position + count < _inputBuffer.Length && !char.IsWhiteSpace(_inputBuffer[_position + count])) + while (_position + count < _inputBuffer.Length) { - count++; + var start = _position + count; + var end = GetNextTextElementEnd(start); + + if (IsWhiteSpace(start, end - start)) + { + break; + } + + count += end - start; } - while (_position + count < _inputBuffer.Length && char.IsWhiteSpace(_inputBuffer[_position + count])) + while (_position + count < _inputBuffer.Length) { - count++; + var start = _position + count; + var end = GetNextTextElementEnd(start); + + if (!IsWhiteSpace(start, end - start)) + { + break; + } + + count += end - start; } _inputBuffer.Remove(_position, count); + _isTextElementStartsDirty = true; } public void Clear() { _position = 0; _inputBuffer.Clear(); + _textElementStarts = []; + _isTextElementStartsDirty = false; } public void MoveBackward() { - if (char.IsLowSurrogate(_inputBuffer[--_position])) - { - _position--; - } + _position = GetPreviousTextElementStart(_position); } - public void MoveForward() - { - if (char.IsHighSurrogate(_inputBuffer[_position++])) - { - _position++; - } - } + public void MoveForward() => _position = GetNextTextElementEnd(_position); public void MoveToPreviousWord() { - while (_position > 0 && char.IsWhiteSpace(_inputBuffer[_position - 1])) + while (_position > 0) { - _position--; + var start = GetPreviousTextElementStart(_position); + + if (!IsWhiteSpace(start, _position - start)) + { + break; + } + + _position = start; } - while (_position > 0 && !char.IsWhiteSpace(_inputBuffer[_position - 1])) + while (_position > 0) { - _position--; + var start = GetPreviousTextElementStart(_position); + + if (IsWhiteSpace(start, _position - start)) + { + break; + } + + _position = start; } } public void MoveToNextWord() { - while (_position < _inputBuffer.Length && !char.IsWhiteSpace(_inputBuffer[_position])) + while (_position < _inputBuffer.Length) { - _position++; + var end = GetNextTextElementEnd(_position); + + if (IsWhiteSpace(_position, end - _position)) + { + break; + } + + _position = end; } - while (_position < _inputBuffer.Length && char.IsWhiteSpace(_inputBuffer[_position])) + while (_position < _inputBuffer.Length) { - _position++; + var end = GetNextTextElementEnd(_position); + + if (!IsWhiteSpace(_position, end - _position)) + { + break; + } + + _position = end; } } @@ -134,4 +187,58 @@ public void MoveToNextWord() public string ToForwardString() => _inputBuffer.ToString(_position, _inputBuffer.Length - _position); public override string ToString() => _inputBuffer.ToString(); + + private int GetPreviousTextElementStart(int position) + { + var indices = GetTextElementStarts(); + var index = System.Array.BinarySearch(indices, position); + + if (index >= 0) + { + return index > 0 ? indices[index - 1] : 0; + } + + index = ~index; + + return index > 0 ? indices[index - 1] : 0; + } + + private int GetNextTextElementEnd(int position) + { + var indices = GetTextElementStarts(); + var index = System.Array.BinarySearch(indices, position); + + if (index >= 0) + { + return index + 1 < indices.Length ? indices[index + 1] : _inputBuffer.Length; + } + + index = ~index; + + return index < indices.Length ? indices[index] : _inputBuffer.Length; + } + + private bool IsWhiteSpace(int start, int count) + { + for (var i = start; i < start + count; i++) + { + if (!char.IsWhiteSpace(_inputBuffer[i])) + { + return false; + } + } + + return true; + } + + private int[] GetTextElementStarts() + { + if (_isTextElementStartsDirty) + { + _textElementStarts = StringInfo.ParseCombiningCharacters(_inputBuffer.ToString()); + _isTextElementStartsDirty = false; + } + + return _textElementStarts; + } }