From b290a364f1bd5e1cf18c56ef6bd64bb788638d74 Mon Sep 17 00:00:00 2001 From: Sorin Sarca Date: Fri, 17 Oct 2025 15:37:20 +0300 Subject: [PATCH] Added static getStringLength --- .github/workflows/tests.yml | 4 +- CHANGELOG.md | 4 ++ README.md | 4 +- composer.json | 3 ++ src/UnicodeString.php | 105 ++++++++++++++++++++++++++++++++++++ tests/UnicodeStringTest.php | 11 ++++ 6 files changed, 127 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f89014b..a7c1473 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - php: [7.4, 8.0, 8.1, 8.2, 8.3, 8.4] + php: [7.4, 8.0, 8.1, 8.2, 8.3, 8.4, 8.5] name: PHP ${{ matrix.php }} @@ -32,4 +32,4 @@ jobs: run: composer update --no-interaction --no-progress - name: Execute tests - run: vendor/bin/phpunit --verbose + run: composer run tests diff --git a/CHANGELOG.md b/CHANGELOG.md index 32fc721..88a33dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ CHANGELOG ------------- +### v2.1.0, 2025.10.17 + +* Added static `getStringLength()` + ### v2.0.0, 2021.04.13 * The library was fully refactored diff --git a/README.md b/README.md index f6e97d5..4059919 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ The full documentation for this library can be found [here][documentation]. ## Installation -**Opis String** is available on [Packagist] and it can be installed from a +**Opis String** is available on [Packagist], and it can be installed from a command line interface by using [Composer]. ```bash @@ -40,7 +40,7 @@ Or you could directly reference it into your `composer.json` file as a dependenc ```json { "require": { - "opis/string": "^2.0" + "opis/string": "^2.1" } } ``` diff --git a/composer.json b/composer.json index 0b96c63..e5040ab 100644 --- a/composer.json +++ b/composer.json @@ -32,6 +32,9 @@ "Opis\\String\\Test\\": "tests/" } }, + "scripts": { + "tests": "./vendor/bin/phpunit --verbose --color" + }, "extra": { "branch-alias": { "dev-master": "2.x-dev" diff --git a/src/UnicodeString.php b/src/UnicodeString.php index b9806ee..d8947a2 100644 --- a/src/UnicodeString.php +++ b/src/UnicodeString.php @@ -1274,6 +1274,111 @@ public static function walkString(string $str): iterable } } + /** + * Compute string length + * @param string $str + * @return int + * @throws InvalidStringException + */ + public static function getStringLength(string $str): int + { + $count = 0; + $length = strlen($str); + + $i = 0; + while ($i < $length) { + $ord0 = ord($str[$i++]); + + if ($ord0 < 0x80) { + $count++; + continue; + } + + if ($i === $length || $ord0 < 0xC2 || $ord0 > 0xF4) { + throw new InvalidStringException($str, $i - 1); + } + + $ord1 = ord($str[$i++]); + + if ($ord0 < 0xE0) { + if ($ord1 < 0x80 || $ord1 >= 0xC0) { + throw new InvalidStringException($str, $i - 1); + } + + // $ord1 = ($ord0 - 0xC0) * 64 + $ord1 - 0x80; + $count++; + + continue; + } + + if ($i === $length) { + throw new InvalidStringException($str, $i - 1); + } + + $ord2 = ord($str[$i++]); + + if ($ord0 < 0xF0) { + if ($ord0 === 0xE0) { + if ($ord1 < 0xA0 || $ord1 >= 0xC0) { + throw new InvalidStringException($str, $i - 2); + } + } elseif ($ord0 === 0xED) { + if ($ord1 < 0x80 || $ord1 >= 0xA0) { + throw new InvalidStringException($str, $i - 2); + } + } elseif ($ord1 < 0x80 || $ord1 >= 0xC0) { + throw new InvalidStringException($str, $i - 2); + } + + if ($ord2 < 0x80 || $ord2 >= 0xC0) { + throw new InvalidStringException($str, $i - 1); + } + + // $ord2 = ($ord0 - 0xE0) * 0x1000 + ($ord1 - 0x80) * 64 + $ord2 - 0x80; + $count++; + + continue; + } + + if ($i === $length) { + throw new InvalidStringException($str, $i - 1); + } + + $ord3 = ord($str[$i++]); + + if ($ord0 < 0xF5) { + if ($ord0 === 0xF0) { + if ($ord1 < 0x90 || $ord1 >= 0xC0) { + throw new InvalidStringException($str, $i - 3); + } + } elseif ($ord0 === 0xF4) { + if ($ord1 < 0x80 || $ord1 >= 0x90) { + throw new InvalidStringException($str, $i - 3); + } + } elseif ($ord1 < 0x80 || $ord1 >= 0xC0) { + throw new InvalidStringException($str, $i - 3); + } + + if ($ord2 < 0x80 || $ord2 >= 0xC0) { + throw new InvalidStringException($str, $i - 2); + } + + if ($ord3 < 0x80 || $ord3 >= 0xC0) { + throw new InvalidStringException($str, $i - 1); + } + + // $ord3 = ($ord0 - 0xF0) * 0x40000 + ($ord1 - 0x80) * 0x1000 + ($ord2 - 0x80) * 64 + $ord3 - 0x80; + $count++; + + continue; + } + + throw new InvalidStringException($str, $i - 1); + } + + return $count; + } + /** * Converts each code point to a char * @param array $codes diff --git a/tests/UnicodeStringTest.php b/tests/UnicodeStringTest.php index 4ff036d..9785976 100644 --- a/tests/UnicodeStringTest.php +++ b/tests/UnicodeStringTest.php @@ -106,11 +106,22 @@ public function testLength() { $this->assertEquals(0, wstr::from('')->length()); $this->assertEquals(1, wstr::from(' ')->length()); + $this->assertEquals(1, wstr::from("\x00")->length()); $this->assertEquals(5, wstr::from('abcde')->length()); $this->assertEquals(10, wstr::from('ăĂâÂîÎșȘțȚ')->length()); $this->assertEquals(15, wstr::from('abcdeăĂâÂîÎșȘțȚ')->length()); } + public function testStaticLength() + { + $this->assertEquals(0, wstr::getStringLength('')); + $this->assertEquals(1, wstr::getStringLength(' ')); + $this->assertEquals(1, wstr::getStringLength("\x00")); + $this->assertEquals(5, wstr::getStringLength('abcde')); + $this->assertEquals(10, wstr::getStringLength('ăĂâÂîÎșȘțȚ')); + $this->assertEquals(15, wstr::getStringLength('abcdeăĂâÂîÎșȘțȚ')); + } + public function testIsEmpty() { $this->assertTrue(wstr::from('')->isEmpty());