diff --git a/CHANGELOG.md b/CHANGELOG.md index a650dd5..4da9047 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ CHANGELOG ========= +### 1.7.0 (unreleased) ### + +* Add `SpaceBeforePunctuation` fixer for locale-aware punctuation spacing +* Add `LocaleConfig` class for centralized locale configuration +* Extend `SmartQuotes` to support 45+ languages via `LocaleConfig` +* Deprecate `FrenchNoBreakSpace` fixer in favor of `SpaceBeforePunctuation` + ### 1.6.0 (2025-12-15) ### * Reduced the package size diff --git a/README.md b/README.md index a323f5f..d85f94e 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Just tell the Fixer class [which Fixer](#available-fixers) you want to run on yo ```php use JoliTypo\Fixer; -$fixer = new Fixer(['SmartQuotes', 'FrenchNoBreakSpace']); +$fixer = new Fixer(['SmartQuotes', 'SpaceBeforePunctuation']); $fixer->setLocale('fr_FR'); $fixedContent = $fixer->fix('

Je suis "très content" de t\'avoir invité sur Jolicode.com !

'); @@ -136,8 +136,22 @@ and do not forget to specify a locale on the Fixer instance. This Fixer replaces legacy `EnglishQuotes`, `FrenchQuotes` and `GermanQuotes`. -FrenchNoBreakSpace ------------------- +SpaceBeforePunctuation +---------------------- + +Locale-aware fixer for spacing before punctuation marks. Handles: +- **French** (`fr`, `fr_FR`, `fr_BE`, `fr_CH`): Adds non-breaking space before `:` and thin non-breaking space before `;`, `!`, `?` +- **Canadian French** (`fr_CA`): No space before punctuation (follows English conventions) +- **Swiss German** (`de_CH`): Uses French-style guillemets with thin spaces +- **All other locales**: Removes any incorrect space before punctuation + +This fixer requires a locale to be set on the Fixer with `$fixer->setLocale('fr_FR');`. + +FrenchNoBreakSpace (deprecated) +------------------------------- + +> [!WARNING] +> This fixer is deprecated. Use `SpaceBeforePunctuation` instead. Replaces some classic spaces by non-breaking spaces following the French typographic code. No break space are placed before `:`, thin no break space before `;`, `!` and `?`. @@ -200,7 +214,7 @@ fr_FR Those rules apply for most of the recommendations of "Abrégé du code typographique à l'usage de la presse", ISBN: 9782351130667. ```php -$fixer = new Fixer(['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'FrenchNoBreakSpace', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark']); +$fixer = new Fixer(['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark']); $fixer->setLocale('fr_FR'); ``` @@ -226,6 +240,26 @@ $fixer->setLocale('de_DE'); More to come (contributions welcome!). +Locale support for spacing and quotes +------------------------------------- + +JoliTypo supports locale-specific rules for spacing before punctuation and quotation marks: + +| Locale | Space Before `: ; ! ?` | Quote Style | +|--------|------------------------|-------------| +| fr_FR, fr_BE, fr_CH | YES (nbsp/nnbsp) | « text » | +| fr_CA | NO | « text » | +| de_DE, de_AT | NO | „text“ | +| de_CH | NO | «text» | +| en_* | NO | “text” | +| es_*, it_*, pt_* | NO | «text» | +| pl_*, cs_*, sk_*, hu_*, ro_*, bg_* | NO | „text“ | +| ru_*, uk_*, be_* | NO | «text» | +| sv_*, fi_* | NO | "text" | +| nl_*, tr_* | NO | "text" | + +See `LocaleConfig::QUOTE_STYLES_BY_LOCALE` for the complete list of supported languages. + Documentation ============= @@ -351,6 +385,13 @@ Thanks to theses online resources for helping a developer understand typography: - [FR] "Abrégé du code typographique à l'usage de la presse", ISBN: 9782351130667 - https://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks +Typography rules by language: + +- https://type.today/en/journal/spaces - Comprehensive guide on spacing in typography +- https://type.today/en/journal/quotes - Comprehensive guide on quotation marks by language +- https://www.mancko.com/typography-punctuation/en/ - Multi-language typography reference +- [FR] https://fr.wikipedia.org/wiki/Ponctuation#Espaces_et_ponctuation - French punctuation spacing rules +

JoliCode is sponsoring this project diff --git a/src/JoliTypo/Bridge/Symfony/DependencyInjection/JoliTypoExtension.php b/src/JoliTypo/Bridge/Symfony/DependencyInjection/JoliTypoExtension.php index 061c28b..1b65238 100644 --- a/src/JoliTypo/Bridge/Symfony/DependencyInjection/JoliTypoExtension.php +++ b/src/JoliTypo/Bridge/Symfony/DependencyInjection/JoliTypoExtension.php @@ -50,9 +50,9 @@ private function createPresetDefinition(ContainerBuilder $container, array $conf } $definition->addArgument($fixers); - $container->setDefinition(sprintf('joli_typo.fixer.%s', $name), $definition); + $container->setDefinition(\sprintf('joli_typo.fixer.%s', $name), $definition); - $presets[$name] = new Reference(sprintf('joli_typo.fixer.%s', $name)); + $presets[$name] = new Reference(\sprintf('joli_typo.fixer.%s', $name)); } return $presets; diff --git a/src/JoliTypo/Bridge/Twig/JoliTypoExtension.php b/src/JoliTypo/Bridge/Twig/JoliTypoExtension.php index f5012f8..bd197ea 100644 --- a/src/JoliTypo/Bridge/Twig/JoliTypoExtension.php +++ b/src/JoliTypo/Bridge/Twig/JoliTypoExtension.php @@ -41,7 +41,7 @@ public function getFilters(): array public function translate($text, $preset = 'default'): string { if (!isset($this->presets[$preset])) { - throw new InvalidConfigurationException(sprintf('There is no "%s" preset configured.', $preset)); + throw new InvalidConfigurationException(\sprintf('There is no "%s" preset configured.', $preset)); } return $this->presets[$preset]->fix($text); diff --git a/src/JoliTypo/Exception/InvalidMarkupException.php b/src/JoliTypo/Exception/InvalidMarkupException.php index 41ddc78..3ea1316 100644 --- a/src/JoliTypo/Exception/InvalidMarkupException.php +++ b/src/JoliTypo/Exception/InvalidMarkupException.php @@ -11,5 +11,5 @@ class InvalidMarkupException extends \RuntimeException { - protected $message = 'An error happened when trying to read your HTML with \\DOMDocument.'; + protected $message = 'An error happened when trying to read your HTML with \DOMDocument.'; } diff --git a/src/JoliTypo/Fixer.php b/src/JoliTypo/Fixer.php index 583113a..56b4f98 100644 --- a/src/JoliTypo/Fixer.php +++ b/src/JoliTypo/Fixer.php @@ -36,12 +36,11 @@ class Fixer public const COPY = '©'; // © public const ALL_SPACES = "\xE2\x80\xAF|\xC2\xAD|\xC2\xA0|\\s"; // All supported spaces, used in regexps. Better than \s - public const RECOMMENDED_RULES_BY_LOCALE = [ - 'en_GB' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], - 'fr_FR' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'FrenchNoBreakSpace', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], - 'fr_CA' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], - 'de_DE' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], - ]; + /** + * @deprecated since 1.7.0, use LocaleConfig::RECOMMENDED_RULES_BY_LOCALE instead + * @see LocaleConfig::RECOMMENDED_RULES_BY_LOCALE + */ + public const RECOMMENDED_RULES_BY_LOCALE = LocaleConfig::RECOMMENDED_RULES_BY_LOCALE; private array $protectedTags = ['head', 'link', 'pre', 'code', 'script', 'style']; @@ -193,17 +192,17 @@ private function compileRules(array $rules): void $className = $rule::class; } else { $className = class_exists($rule) ? $rule : (class_exists( - 'JoliTypo\\Fixer\\' . $rule - ) ? 'JoliTypo\\Fixer\\' . $rule : false); + 'JoliTypo\Fixer\\' . $rule + ) ? 'JoliTypo\Fixer\\' . $rule : false); if (!$className) { - throw new BadRuleSetException(sprintf('Fixer %s not found', $rule)); + throw new BadRuleSetException(\sprintf('Fixer %s not found', $rule)); } $fixer = new $className($this->getLocale()); } if (!$fixer instanceof FixerInterface) { - throw new BadRuleSetException(sprintf('%s must implement FixerInterface', $className)); + throw new BadRuleSetException(\sprintf('%s must implement FixerInterface', $className)); } $this->_rules[$className] = $fixer; @@ -351,7 +350,7 @@ private function exportDOMDocument(\DOMDocument $dom): string // Remove added body & doctype $content = preg_replace( [ - '/^\\<\\!DOCTYPE.*?.*?/si', + '/^\<\!DOCTYPE.*?.*?/si', '!\n?$!si', ], '', diff --git a/src/JoliTypo/Fixer/FrenchNoBreakSpace.php b/src/JoliTypo/Fixer/FrenchNoBreakSpace.php index d3ed863..923afc4 100644 --- a/src/JoliTypo/Fixer/FrenchNoBreakSpace.php +++ b/src/JoliTypo/Fixer/FrenchNoBreakSpace.php @@ -9,7 +9,6 @@ namespace JoliTypo\Fixer; -use JoliTypo\Fixer; use JoliTypo\FixerInterface; use JoliTypo\StateBag; @@ -19,16 +18,20 @@ * NO_BREAK_SPACE inside « ». * * As recommended by "Abrégé du code typographique à l'usage de la presse", ISBN: 978-2351130667 + * + * @deprecated since 1.7.0, use SpaceBeforePunctuation instead */ class FrenchNoBreakSpace implements FixerInterface { - public function fix(string $content, ?StateBag $stateBag = null) - { - $content = preg_replace('@[' . Fixer::ALL_SPACES . ']+(:)@mu', Fixer::NO_BREAK_SPACE . '$1', $content); - $content = preg_replace('@[' . Fixer::ALL_SPACES . ']+([;!\?])@mu', Fixer::NO_BREAK_THIN_SPACE . '$1', $content); + private SpaceBeforePunctuation $delegate; - $content = preg_replace('@' . Fixer::LAQUO . '[' . Fixer::ALL_SPACES . ']?@mu', Fixer::LAQUO . Fixer::NO_BREAK_SPACE, $content); + public function __construct() + { + $this->delegate = new SpaceBeforePunctuation('fr_FR'); + } - return preg_replace('@[' . Fixer::ALL_SPACES . ']?' . Fixer::RAQUO . '@mu', Fixer::NO_BREAK_SPACE . Fixer::RAQUO, $content); + public function fix(string $content, ?StateBag $stateBag = null): string + { + return $this->delegate->fix($content, $stateBag); } } diff --git a/src/JoliTypo/Fixer/SmartQuotes.php b/src/JoliTypo/Fixer/SmartQuotes.php index 2a9ca0d..3420483 100644 --- a/src/JoliTypo/Fixer/SmartQuotes.php +++ b/src/JoliTypo/Fixer/SmartQuotes.php @@ -10,32 +10,32 @@ namespace JoliTypo\Fixer; use JoliTypo\Exception\BadFixerConfigurationException; -use JoliTypo\Fixer; use JoliTypo\FixerInterface; use JoliTypo\LocaleAwareFixerInterface; +use JoliTypo\LocaleConfig; use JoliTypo\StateBag; +/** + * Replaces straight double quotes with typographic quotation marks. + * + * The style of quotation marks depends on the locale: + * - French: « … » (guillemets with non-breaking spaces) + * - German: „…" (low-high double quotes) + * - English: "…" (curly double quotes) + * - Finnish/Swedish: "…" (same closing quote on both sides) + * - And many more... + * + * @see LocaleConfig::QUOTE_STYLES_BY_LOCALE for the full list + */ class SmartQuotes extends BaseOpenClosePair implements FixerInterface, LocaleAwareFixerInterface { - /** - * @var string - */ - protected $opening = ''; + protected string $opening = ''; - /** - * @var string - */ - protected $openingSuffix = ''; + protected string $openingSuffix = ''; - /** - * @var string - */ - protected $closing = ''; + protected string $closing = ''; - /** - * @var string - */ - protected $closingPrefix = ''; + protected string $closingPrefix = ''; public function __construct(string $locale) { @@ -70,131 +70,36 @@ public function fix(string $content, ?StateBag $stateBag = null) } /** - * Default configuration for supported lang. + * Set locale and configure quotation marks accordingly. */ - public function setLocale(string $locale) + public function setLocale(string $locale): void { - // Handle from locale + country - switch (strtolower($locale)) { - // “…” - case 'pt-br': - $this->opening = Fixer::LDQUO; - $this->openingSuffix = ''; - $this->closing = Fixer::RDQUO; - $this->closingPrefix = ''; - - return; - // «…» - case 'de-ch': - $this->opening = Fixer::LAQUO; - $this->openingSuffix = ''; - $this->closing = Fixer::RAQUO; - $this->closingPrefix = ''; - - return; - } + $style = LocaleConfig::getQuotationStyle($locale); - // Handle from locale only - $short = Fixer::getLanguageFromLocale($locale); - - switch ($short) { - // « … » - case 'fr': - $this->opening = Fixer::LAQUO; - $this->openingSuffix = Fixer::NO_BREAK_SPACE; - $this->closing = Fixer::RAQUO; - $this->closingPrefix = Fixer::NO_BREAK_SPACE; - - break; - // «…» - case 'hy': - case 'az': - case 'hz': - case 'eu': - case 'be': - case 'ca': - case 'el': - case 'it': - case 'no': - case 'fa': - case 'lv': - case 'pt': - case 'ru': - case 'es': - case 'uk': - $this->opening = Fixer::LAQUO; - $this->openingSuffix = ''; - $this->closing = Fixer::RAQUO; - $this->closingPrefix = ''; - - break; - // „…“ - case 'de': - case 'ka': - case 'cs': - case 'et': - case 'is': - case 'lt': - case 'mk': - case 'ro': - case 'sk': - case 'sl': - case 'wen': - $this->opening = Fixer::BDQUO; - $this->openingSuffix = ''; - $this->closing = Fixer::LDQUO; - $this->closingPrefix = ''; - - break; - // “…” - case 'en': - case 'us': - case 'gb': - case 'af': - case 'ar': - case 'eo': - case 'id': - case 'ga': - case 'ko': - case 'br': - case 'th': - case 'tr': - case 'vi': - $this->opening = Fixer::LDQUO; - $this->openingSuffix = ''; - $this->closing = Fixer::RDQUO; - $this->closingPrefix = ''; - - break; - // ”…” - case 'fi': - case 'sv': - case 'bs': - $this->opening = Fixer::RDQUO; - $this->openingSuffix = ''; - $this->closing = Fixer::RDQUO; - $this->closingPrefix = ''; - - break; + if (null !== $style) { + $this->opening = $style['opening']; + $this->openingSuffix = $style['openingSuffix']; + $this->closing = $style['closing']; + $this->closingPrefix = $style['closingPrefix']; } } - public function setOpening(string $opening) + public function setOpening(string $opening): void { $this->opening = $opening; } - public function setOpeningSuffix(string $openingSuffix) + public function setOpeningSuffix(string $openingSuffix): void { $this->openingSuffix = $openingSuffix; } - public function setClosing(string $closing) + public function setClosing(string $closing): void { $this->closing = $closing; } - public function setClosingPrefix(string $closingPrefix) + public function setClosingPrefix(string $closingPrefix): void { $this->closingPrefix = $closingPrefix; } diff --git a/src/JoliTypo/Fixer/SpaceBeforePunctuation.php b/src/JoliTypo/Fixer/SpaceBeforePunctuation.php new file mode 100644 index 0000000..22ae2c7 --- /dev/null +++ b/src/JoliTypo/Fixer/SpaceBeforePunctuation.php @@ -0,0 +1,97 @@ +setLocale($locale); + } + } + + public function setLocale(string $locale): void + { + $this->currentRule = LocaleConfig::getSpacingRule($locale); + } + + public function fix(string $content, ?StateBag $stateBag = null): string + { + return match ($this->currentRule) { + LocaleConfig::SPACING_RULE_FRENCH => $this->applyFrenchRules($content), + LocaleConfig::SPACING_RULE_SWISS_GERMAN => $this->applySwissGermanRules($content), + default => $this->removeSpacesBeforePunctuation($content), + }; + } + + private function applyFrenchRules(string $content): string + { + // NO_BREAK_SPACE before colon (only when there's already a space) + $content = preg_replace('@[' . Fixer::ALL_SPACES . ']+(:)@mu', Fixer::NO_BREAK_SPACE . '$1', $content); + + // NO_BREAK_THIN_SPACE before ; ! ? + $content = preg_replace('@[' . Fixer::ALL_SPACES . ']+([;!\?])@mu', Fixer::NO_BREAK_THIN_SPACE . '$1', $content); + + // Handle French guillemets « » + $content = preg_replace('@' . Fixer::LAQUO . '[' . Fixer::ALL_SPACES . ']?@mu', Fixer::LAQUO . Fixer::NO_BREAK_SPACE, $content); + + return preg_replace('@[' . Fixer::ALL_SPACES . ']?' . Fixer::RAQUO . '@mu', Fixer::NO_BREAK_SPACE . Fixer::RAQUO, $content); + } + + private function applySwissGermanRules(string $content): string + { + // Swiss German uses French-style guillemets « » with thin non-breaking spaces + $content = preg_replace('@' . Fixer::LAQUO . '[' . Fixer::ALL_SPACES . ']?@mu', Fixer::LAQUO . Fixer::NO_BREAK_THIN_SPACE, $content); + $content = preg_replace('@[' . Fixer::ALL_SPACES . ']?' . Fixer::RAQUO . '@mu', Fixer::NO_BREAK_THIN_SPACE . Fixer::RAQUO, $content); + + // But still remove spaces before punctuation (like German) + return $this->removeSpacesBeforePunctuation($content); + } + + private function removeSpacesBeforePunctuation(string $content): string + { + // Remove all types of spaces (including nbsp, thin spaces) before : ; ! ? + // This ensures consistent behavior regardless of input spacing + // Excludes URLs (://), time formats, IPv6, etc. + $content = preg_replace('@([^' . Fixer::ALL_SPACES . ':])[' . Fixer::ALL_SPACES . ']+(:)(?![/\d])@mu', '$1$2', $content); + + return preg_replace('@([^' . Fixer::ALL_SPACES . '])[' . Fixer::ALL_SPACES . ']+([;!\?])@mu', '$1$2', $content); + } +} diff --git a/src/JoliTypo/LocaleConfig.php b/src/JoliTypo/LocaleConfig.php new file mode 100644 index 0000000..d0e7911 --- /dev/null +++ b/src/JoliTypo/LocaleConfig.php @@ -0,0 +1,294 @@ + self::QUOTE_STYLE_FRENCH, + + // ===================================================================== + // Guillemets without spaces: «…» + // ===================================================================== + 'hy' => self::QUOTE_STYLE_GUILLEMETS, // Armenian + 'az' => self::QUOTE_STYLE_GUILLEMETS, // Azerbaijani + 'eu' => self::QUOTE_STYLE_GUILLEMETS, // Basque + 'be' => self::QUOTE_STYLE_GUILLEMETS, // Belarusian + 'ca' => self::QUOTE_STYLE_GUILLEMETS, // Catalan + 'el' => self::QUOTE_STYLE_GUILLEMETS, // Greek + 'it' => self::QUOTE_STYLE_GUILLEMETS, // Italian + 'no' => self::QUOTE_STYLE_GUILLEMETS, // Norwegian + 'nb' => self::QUOTE_STYLE_GUILLEMETS, // Norwegian Bokmål + 'nn' => self::QUOTE_STYLE_GUILLEMETS, // Norwegian Nynorsk + 'fa' => self::QUOTE_STYLE_GUILLEMETS, // Persian + 'lv' => self::QUOTE_STYLE_GUILLEMETS, // Latvian + 'pt' => self::QUOTE_STYLE_GUILLEMETS, // Portuguese + 'ru' => self::QUOTE_STYLE_GUILLEMETS, // Russian + 'es' => self::QUOTE_STYLE_GUILLEMETS, // Spanish + 'uk' => self::QUOTE_STYLE_GUILLEMETS, // Ukrainian + 'da' => self::QUOTE_STYLE_GUILLEMETS, // Danish (also uses »…«) + + // Specific locale overrides (lowercase for normalization) + 'de_ch' => self::QUOTE_STYLE_GUILLEMETS, // Swiss German + 'pt_br' => self::QUOTE_STYLE_ENGLISH, // Brazilian Portuguese + + // ===================================================================== + // German style: „…" (low-high) + // ===================================================================== + 'de' => self::QUOTE_STYLE_GERMAN, // German + 'ka' => self::QUOTE_STYLE_GERMAN, // Georgian + 'cs' => self::QUOTE_STYLE_GERMAN, // Czech + 'et' => self::QUOTE_STYLE_GERMAN, // Estonian + 'is' => self::QUOTE_STYLE_GERMAN, // Icelandic + 'lt' => self::QUOTE_STYLE_GERMAN, // Lithuanian + 'mk' => self::QUOTE_STYLE_GERMAN, // Macedonian + 'ro' => self::QUOTE_STYLE_GERMAN, // Romanian + 'sk' => self::QUOTE_STYLE_GERMAN, // Slovak + 'sl' => self::QUOTE_STYLE_GERMAN, // Slovenian + 'pl' => self::QUOTE_STYLE_GERMAN, // Polish + 'hr' => self::QUOTE_STYLE_GERMAN, // Croatian + 'sr' => self::QUOTE_STYLE_GERMAN, // Serbian + 'bg' => self::QUOTE_STYLE_GERMAN, // Bulgarian + 'hu' => self::QUOTE_STYLE_GERMAN, // Hungarian + + // ===================================================================== + // English style: "…" + // ===================================================================== + 'en' => self::QUOTE_STYLE_ENGLISH, + 'af' => self::QUOTE_STYLE_ENGLISH, // Afrikaans + 'ar' => self::QUOTE_STYLE_ENGLISH, // Arabic + 'eo' => self::QUOTE_STYLE_ENGLISH, // Esperanto + 'id' => self::QUOTE_STYLE_ENGLISH, // Indonesian + 'ga' => self::QUOTE_STYLE_ENGLISH, // Irish + 'ko' => self::QUOTE_STYLE_ENGLISH, // Korean + 'br' => self::QUOTE_STYLE_ENGLISH, // Breton + 'th' => self::QUOTE_STYLE_ENGLISH, // Thai + 'tr' => self::QUOTE_STYLE_ENGLISH, // Turkish + 'vi' => self::QUOTE_STYLE_ENGLISH, // Vietnamese + 'nl' => self::QUOTE_STYLE_ENGLISH, // Dutch + + // ===================================================================== + // Finnish/Swedish style: "…" (same quote on both sides) + // ===================================================================== + 'fi' => self::QUOTE_STYLE_FINNISH, // Finnish + 'sv' => self::QUOTE_STYLE_FINNISH, // Swedish + 'bs' => self::QUOTE_STYLE_FINNISH, // Bosnian + ]; + + /** + * Recommended fixer rules by locale. + * + * These are the default sets of fixers recommended for each locale. + * You can customize this list when instantiating the Fixer class. + */ + public const RECOMMENDED_RULES_BY_LOCALE = [ + // English + 'en_GB' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'en_US' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // French + 'fr_FR' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'fr_CA' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'fr_BE' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'fr_CH' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // German + 'de_DE' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'de_AT' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'de_CH' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // Other Western European + 'es_ES' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'it_IT' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'pt_PT' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'pt_BR' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'nl_NL' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'nl_BE' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'ca_ES' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // Nordic + 'sv_SE' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'da_DK' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'nb_NO' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'nn_NO' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'fi_FI' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // Central/Eastern European + 'pl_PL' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'cs_CZ' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'sk_SK' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'hu_HU' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'ro_RO' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // Slavic + 'ru_RU' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'uk_UA' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'be_BY' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'bg_BG' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'sr_RS' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'hr_HR' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + + // Other + 'el_GR' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + 'tr_TR' => ['Ellipsis', 'Dimension', 'Unit', 'Dash', 'SmartQuotes', 'SpaceBeforePunctuation', 'NoSpaceBeforeComma', 'CurlyQuote', 'Hyphen', 'Trademark'], + ]; + + /** + * Get the spacing rule for a given locale. + * + * SPACING_RULE_FRENCH: Add non-breaking spaces before double punctuation (: ; ! ?) + * SPACING_RULE_SWISS_GERMAN: French-style guillemets with thin spaces, no space before punctuation + * SPACING_RULE_NONE: Remove any space before punctuation (default for most languages) + */ + public static function getSpacingRule(string $locale): string + { + // Normalize locale (handle both fr_FR and fr-FR formats) + $normalizedLocale = strtolower(str_replace('-', '_', $locale)); + + // Check exact match first for locale exceptions + return match ($normalizedLocale) { + // Canadian French follows English conventions (no space) + 'fr_ca' => self::SPACING_RULE_NONE, + // Swiss German uses French-style guillemets with thin spaces + 'de_ch' => self::SPACING_RULE_SWISS_GERMAN, + // French locales use non-breaking spaces before double punctuation + 'fr', 'fr_fr', 'fr_be', 'fr_ch' => self::SPACING_RULE_FRENCH, + // All other locales: check language fallback or default to none + default => match (Fixer::getLanguageFromLocale($locale)) { + 'fr' => self::SPACING_RULE_FRENCH, + default => self::SPACING_RULE_NONE, + }, + }; + } + + /** + * Get recommended rules for a given locale. + * + * @return array|null Returns null if no specific rules are defined for this locale + */ + public static function getRecommendedRules(string $locale): ?array + { + // Check exact match first (e.g., fr_CA) + if (isset(self::RECOMMENDED_RULES_BY_LOCALE[$locale])) { + return self::RECOMMENDED_RULES_BY_LOCALE[$locale]; + } + + // For locales not explicitly defined, return null + // The caller can then decide to use a default or throw an exception + return null; + } + + /** + * Get quotation style for a given locale. + * + * @return array{opening: string, openingSuffix: string, closing: string, closingPrefix: string}|null + */ + public static function getQuotationStyle(string $locale): ?array + { + $style = self::getQuotationStyleType($locale); + + if (null === $style) { + return null; + } + + return match ($style) { + self::QUOTE_STYLE_FRENCH => [ + 'opening' => Fixer::LAQUO, + 'openingSuffix' => Fixer::NO_BREAK_SPACE, + 'closing' => Fixer::RAQUO, + 'closingPrefix' => Fixer::NO_BREAK_SPACE, + ], + self::QUOTE_STYLE_GUILLEMETS => [ + 'opening' => Fixer::LAQUO, + 'openingSuffix' => '', + 'closing' => Fixer::RAQUO, + 'closingPrefix' => '', + ], + self::QUOTE_STYLE_GERMAN => [ + 'opening' => Fixer::BDQUO, + 'openingSuffix' => '', + 'closing' => Fixer::LDQUO, + 'closingPrefix' => '', + ], + self::QUOTE_STYLE_ENGLISH => [ + 'opening' => Fixer::LDQUO, + 'openingSuffix' => '', + 'closing' => Fixer::RDQUO, + 'closingPrefix' => '', + ], + self::QUOTE_STYLE_FINNISH => [ + 'opening' => Fixer::RDQUO, + 'openingSuffix' => '', + 'closing' => Fixer::RDQUO, + 'closingPrefix' => '', + ], + default => null, + }; + } + + /** + * Get quotation style type for a given locale. + */ + public static function getQuotationStyleType(string $locale): ?string + { + // Normalize locale (handle both fr_FR and fr-FR formats) + $normalizedLocale = strtolower(str_replace('-', '_', $locale)); + + // Check exact match first (e.g., pt_br, de_ch) + if (isset(self::QUOTE_STYLES_BY_LOCALE[$normalizedLocale])) { + return self::QUOTE_STYLES_BY_LOCALE[$normalizedLocale]; + } + + // Check language part (e.g., fr from fr_FR) + $language = Fixer::getLanguageFromLocale($locale); + if (isset(self::QUOTE_STYLES_BY_LOCALE[$language])) { + return self::QUOTE_STYLES_BY_LOCALE[$language]; + } + + // No style defined for this locale + return null; + } +} diff --git a/tests/JoliTypo/Tests/Bridge/app/AppKernel.php b/tests/JoliTypo/Tests/Bridge/app/AppKernel.php index 539124f..517f8a0 100644 --- a/tests/JoliTypo/Tests/Bridge/app/AppKernel.php +++ b/tests/JoliTypo/Tests/Bridge/app/AppKernel.php @@ -34,7 +34,7 @@ public function registerContainerConfiguration(LoaderInterface $loader): void // Set framework.router.utf8 to avoid deprecated error on SF 5.1 if (version_compare(self::VERSION, '5.0', 'gt')) { - $loader->load(function (ContainerBuilder $container) { + $loader->load(static function (ContainerBuilder $container) { $container->loadFromExtension('framework', [ 'router' => [ 'utf8' => true, @@ -44,7 +44,7 @@ public function registerContainerConfiguration(LoaderInterface $loader): void } if (trait_exists(MailerAssertionsTrait::class)) { - $loader->load(function (ContainerBuilder $container) { + $loader->load(static function (ContainerBuilder $container) { $container->loadFromExtension('twig', [ ]); }); diff --git a/tests/JoliTypo/Tests/Fixer/SmartQuotesTest.php b/tests/JoliTypo/Tests/Fixer/SmartQuotesTest.php index 1f9ae2e..f5f91c6 100644 --- a/tests/JoliTypo/Tests/Fixer/SmartQuotesTest.php +++ b/tests/JoliTypo/Tests/Fixer/SmartQuotesTest.php @@ -20,7 +20,7 @@ public function testSimpleString(): void $fixer = new Fixer\SmartQuotes('de'); $this->assertInstanceOf('JoliTypo\Fixer\SmartQuotes', $fixer); - $this->assertSame('„I am smart“', $fixer->fix('"I am smart"')); + $this->assertSame(Fixer::BDQUO . 'I am smart' . Fixer::LDQUO, $fixer->fix('"I am smart"')); $fixer->setOpening('«'); $fixer->setClosing('»'); @@ -40,4 +40,207 @@ public function testBadConfig(): void $fixer = new Fixer\SmartQuotes('unknown'); $fixer->fix('nope'); } + + // ========================================================================= + // French style: « … » (with non-breaking spaces) + // ========================================================================= + + public function testFrenchQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('fr'); + + $this->assertSame( + Fixer::LAQUO . Fixer::NO_BREAK_SPACE . 'Bonjour' . Fixer::NO_BREAK_SPACE . Fixer::RAQUO, + $fixer->fix('"Bonjour"') + ); + } + + public function testFrenchFranceQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('fr_FR'); + + $this->assertSame( + Fixer::LAQUO . Fixer::NO_BREAK_SPACE . 'Bonjour' . Fixer::NO_BREAK_SPACE . Fixer::RAQUO, + $fixer->fix('"Bonjour"') + ); + } + + // ========================================================================= + // Guillemets without spaces: «…» + // ========================================================================= + + public function testRussianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('ru'); + + $this->assertSame(Fixer::LAQUO . 'Привет' . Fixer::RAQUO, $fixer->fix('"Привет"')); + } + + public function testSpanishQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('es'); + + $this->assertSame(Fixer::LAQUO . 'Hola' . Fixer::RAQUO, $fixer->fix('"Hola"')); + } + + public function testItalianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('it'); + + $this->assertSame(Fixer::LAQUO . 'Ciao' . Fixer::RAQUO, $fixer->fix('"Ciao"')); + } + + public function testGreekQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('el'); + + $this->assertSame(Fixer::LAQUO . 'Γεια' . Fixer::RAQUO, $fixer->fix('"Γεια"')); + } + + public function testPortugueseQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('pt'); + + $this->assertSame(Fixer::LAQUO . 'Olá' . Fixer::RAQUO, $fixer->fix('"Olá"')); + } + + public function testUkrainianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('uk'); + + $this->assertSame(Fixer::LAQUO . 'Привіт' . Fixer::RAQUO, $fixer->fix('"Привіт"')); + } + + public function testNorwegianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('no'); + + $this->assertSame(Fixer::LAQUO . 'Hei' . Fixer::RAQUO, $fixer->fix('"Hei"')); + } + + public function testSwissGermanQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('de_CH'); + + // Swiss German uses guillemets without spaces + $this->assertSame(Fixer::LAQUO . 'Hallo' . Fixer::RAQUO, $fixer->fix('"Hallo"')); + } + + // ========================================================================= + // German style: „…" (low-high) + // ========================================================================= + + public function testGermanQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('de'); + + $this->assertSame(Fixer::BDQUO . 'Hallo' . Fixer::LDQUO, $fixer->fix('"Hallo"')); + } + + public function testPolishQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('pl'); + + $this->assertSame(Fixer::BDQUO . 'Cześć' . Fixer::LDQUO, $fixer->fix('"Cześć"')); + } + + public function testCzechQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('cs'); + + $this->assertSame(Fixer::BDQUO . 'Ahoj' . Fixer::LDQUO, $fixer->fix('"Ahoj"')); + } + + public function testRomanianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('ro'); + + $this->assertSame(Fixer::BDQUO . 'Bună' . Fixer::LDQUO, $fixer->fix('"Bună"')); + } + + public function testHungarianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('hu'); + + $this->assertSame(Fixer::BDQUO . 'Szia' . Fixer::LDQUO, $fixer->fix('"Szia"')); + } + + public function testBulgarianQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('bg'); + + $this->assertSame(Fixer::BDQUO . 'Здравей' . Fixer::LDQUO, $fixer->fix('"Здравей"')); + } + + // ========================================================================= + // English style: "…" + // ========================================================================= + + public function testEnglishQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('en'); + + $this->assertSame(Fixer::LDQUO . 'Hello' . Fixer::RDQUO, $fixer->fix('"Hello"')); + } + + public function testDutchQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('nl'); + + $this->assertSame(Fixer::LDQUO . 'Hallo' . Fixer::RDQUO, $fixer->fix('"Hallo"')); + } + + public function testTurkishQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('tr'); + + $this->assertSame(Fixer::LDQUO . 'Merhaba' . Fixer::RDQUO, $fixer->fix('"Merhaba"')); + } + + public function testBrazilianPortugueseQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('pt_BR'); + + // Brazilian Portuguese uses English-style quotes + $this->assertSame(Fixer::LDQUO . 'Olá' . Fixer::RDQUO, $fixer->fix('"Olá"')); + } + + // ========================================================================= + // Finnish/Swedish style: "…" (same quote on both sides) + // ========================================================================= + + public function testFinnishQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('fi'); + + // Finnish uses the same closing quote on both sides + $this->assertSame(Fixer::RDQUO . 'Hei' . Fixer::RDQUO, $fixer->fix('"Hei"')); + } + + public function testSwedishQuoteStyle(): void + { + $fixer = new Fixer\SmartQuotes('sv'); + + // Swedish uses the same closing quote on both sides + $this->assertSame(Fixer::RDQUO . 'Hej' . Fixer::RDQUO, $fixer->fix('"Hej"')); + } + + // ========================================================================= + // Locale can be changed + // ========================================================================= + + public function testLocaleCanBeChanged(): void + { + $fixer = new Fixer\SmartQuotes('en'); + $this->assertSame(Fixer::LDQUO . 'Hi' . Fixer::RDQUO, $fixer->fix('"Hi"')); + + $fixer->setLocale('de'); + $this->assertSame(Fixer::BDQUO . 'Hi' . Fixer::LDQUO, $fixer->fix('"Hi"')); + + $fixer->setLocale('fr'); + $this->assertSame( + Fixer::LAQUO . Fixer::NO_BREAK_SPACE . 'Hi' . Fixer::NO_BREAK_SPACE . Fixer::RAQUO, + $fixer->fix('"Hi"') + ); + } } diff --git a/tests/JoliTypo/Tests/Fixer/SpaceBeforePunctuationTest.php b/tests/JoliTypo/Tests/Fixer/SpaceBeforePunctuationTest.php new file mode 100644 index 0000000..8daae8b --- /dev/null +++ b/tests/JoliTypo/Tests/Fixer/SpaceBeforePunctuationTest.php @@ -0,0 +1,324 @@ +assertInstanceOf(Fixer\SpaceBeforePunctuation::class, $fixer); + + // Exclamation mark: should use NO_BREAK_THIN_SPACE + $this->assertSame('Superman' . Fixer::NO_BREAK_THIN_SPACE . '!', $fixer->fix('Superman !')); + + // Question mark: should use NO_BREAK_THIN_SPACE + $this->assertSame('Superman' . Fixer::NO_BREAK_THIN_SPACE . '?', $fixer->fix('Superman ?')); + + // Multiple punctuation marks + $this->assertSame('Superman' . Fixer::NO_BREAK_THIN_SPACE . '!?', $fixer->fix('Superman !?')); + $this->assertSame('Superman' . Fixer::NO_BREAK_THIN_SPACE . '? Nope.', $fixer->fix('Superman ? Nope.')); + + // Colon: should use NO_BREAK_SPACE + $this->assertSame('Superman' . Fixer::NO_BREAK_SPACE . ': the movie', $fixer->fix('Superman : the movie')); + + // Colon without space before: should not be modified (preserves URLs, times, etc.) + $this->assertSame('Superman: the movie', $fixer->fix('Superman: the movie')); + + // Semicolon: should use NO_BREAK_THIN_SPACE + $this->assertSame('Superman' . Fixer::NO_BREAK_THIN_SPACE . '; the movie', $fixer->fix('Superman ; the movie')); + + // Replace existing nbsp with correct space + $this->assertSame('Superman' . Fixer::NO_BREAK_THIN_SPACE . '; the movie', $fixer->fix("Superman\u{a0}; the movie")); + + // French guillemets + $this->assertSame(Fixer::LAQUO . Fixer::NO_BREAK_SPACE . 'test' . Fixer::NO_BREAK_SPACE . Fixer::RAQUO, $fixer->fix('« test »')); + $this->assertSame(Fixer::LAQUO . Fixer::NO_BREAK_SPACE . 'test' . Fixer::NO_BREAK_SPACE . Fixer::RAQUO, $fixer->fix('«test»')); + } + + public function testFrenchLocaleEdgeCases(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('fr_FR'); + + // IPv6 addresses should not be modified + $this->assertSame('fdda:5cc1:23:4::1f', $fixer->fix('fdda:5cc1:23:4::1f')); + + // Brand names with exclamation should not add space if none exists + $this->assertSame('Here is a brand name: Yahoo!', $fixer->fix('Here is a brand name: Yahoo!')); + } + + public function testEnglishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('en_GB'); + + // English should remove spaces before punctuation + $this->assertSame('Hello!', $fixer->fix('Hello !')); + $this->assertSame('Hello?', $fixer->fix('Hello ?')); + $this->assertSame('Hello;', $fixer->fix('Hello ;')); + $this->assertSame('Hello:', $fixer->fix('Hello :')); + + // No space should remain unchanged + $this->assertSame('Hello!', $fixer->fix('Hello!')); + $this->assertSame('Hello?', $fixer->fix('Hello?')); + } + + public function testEnglishLocaleEdgeCases(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('en_GB'); + + // URLs should not be modified + $this->assertSame('http://example.com', $fixer->fix('http://example.com')); + $this->assertSame('https://example.com', $fixer->fix('https://example.com')); + + // Time format should not be modified (no space before colon) + $this->assertSame('10:30', $fixer->fix('10:30')); + + // IPv6 should not be modified (no space before colons) + $this->assertSame('fdda:5cc1::1f', $fixer->fix('fdda:5cc1::1f')); + } + + public function testCanadianFrenchLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('fr_CA'); + + // Canadian French behaves like English: no space before punctuation + $this->assertSame('Bonjour!', $fixer->fix('Bonjour !')); + $this->assertSame('Bonjour?', $fixer->fix('Bonjour ?')); + $this->assertSame('Bonjour;', $fixer->fix('Bonjour ;')); + $this->assertSame('Bonjour:', $fixer->fix('Bonjour :')); + } + + public function testGermanLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('de_DE'); + + // German: no space before punctuation + $this->assertSame('Hallo!', $fixer->fix('Hallo !')); + $this->assertSame('Hallo?', $fixer->fix('Hallo ?')); + $this->assertSame('Hallo;', $fixer->fix('Hallo ;')); + $this->assertSame('Hallo:', $fixer->fix('Hallo :')); + } + + public function testSwissGermanLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('de_CH'); + + // Swiss German: no space before punctuation (like German) + $this->assertSame('Hallo!', $fixer->fix('Hallo !')); + $this->assertSame('Hallo?', $fixer->fix('Hallo ?')); + + // But Swiss German uses French-style guillemets with thin spaces + $this->assertSame(Fixer::LAQUO . Fixer::NO_BREAK_THIN_SPACE . 'test' . Fixer::NO_BREAK_THIN_SPACE . Fixer::RAQUO, $fixer->fix('« test »')); + $this->assertSame(Fixer::LAQUO . Fixer::NO_BREAK_THIN_SPACE . 'test' . Fixer::NO_BREAK_THIN_SPACE . Fixer::RAQUO, $fixer->fix('«test»')); + } + + public function testSpanishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('es_ES'); + + // Spanish: no space before punctuation + $this->assertSame('Hola!', $fixer->fix('Hola !')); + $this->assertSame('Hola?', $fixer->fix('Hola ?')); + } + + public function testItalianLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('it_IT'); + + // Italian: no space before punctuation + $this->assertSame('Ciao!', $fixer->fix('Ciao !')); + $this->assertSame('Ciao?', $fixer->fix('Ciao ?')); + } + + public function testLocaleCanBeChanged(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('en_GB'); + + // English: remove space + $this->assertSame('Hello!', $fixer->fix('Hello !')); + + // Change to French + $fixer->setLocale('fr_FR'); + $this->assertSame('Bonjour' . Fixer::NO_BREAK_THIN_SPACE . '!', $fixer->fix('Bonjour !')); + + // Change back to English + $fixer->setLocale('en_GB'); + $this->assertSame('Hello!', $fixer->fix('Hello !')); + } + + public function testBelgianFrenchLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('fr_BE'); + + // Belgian French follows French rules + $this->assertSame('Bonjour' . Fixer::NO_BREAK_THIN_SPACE . '!', $fixer->fix('Bonjour !')); + $this->assertSame('Bonjour' . Fixer::NO_BREAK_SPACE . ': test', $fixer->fix('Bonjour : test')); + } + + public function testSwissFrenchLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('fr_CH'); + + // Swiss French follows French rules + $this->assertSame('Bonjour' . Fixer::NO_BREAK_THIN_SPACE . '!', $fixer->fix('Bonjour !')); + $this->assertSame('Bonjour' . Fixer::NO_BREAK_SPACE . ': test', $fixer->fix('Bonjour : test')); + } + + public function testUnknownLocaleFallsBackToDefault(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('xx_XX'); + + // Unknown locale: should remove spaces (default behavior) + $this->assertSame('Hello!', $fixer->fix('Hello !')); + $this->assertSame('Hello?', $fixer->fix('Hello ?')); + } + + // ========================================================================= + // Nordic languages + // ========================================================================= + + public function testSwedishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('sv_SE'); + + $this->assertSame('Hej!', $fixer->fix('Hej !')); + $this->assertSame('Hej?', $fixer->fix('Hej ?')); + $this->assertSame('Hej;', $fixer->fix('Hej ;')); + $this->assertSame('Hej:', $fixer->fix('Hej :')); + } + + public function testDanishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('da_DK'); + + $this->assertSame('Hej!', $fixer->fix('Hej !')); + $this->assertSame('Hej?', $fixer->fix('Hej ?')); + } + + public function testNorwegianLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('nb_NO'); + + $this->assertSame('Hei!', $fixer->fix('Hei !')); + $this->assertSame('Hei?', $fixer->fix('Hei ?')); + } + + public function testFinnishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('fi_FI'); + + $this->assertSame('Hei!', $fixer->fix('Hei !')); + $this->assertSame('Hei?', $fixer->fix('Hei ?')); + } + + // ========================================================================= + // Slavic languages + // ========================================================================= + + public function testPolishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('pl_PL'); + + $this->assertSame('Cześć!', $fixer->fix('Cześć !')); + $this->assertSame('Cześć?', $fixer->fix('Cześć ?')); + $this->assertSame('Cześć;', $fixer->fix('Cześć ;')); + $this->assertSame('Cześć:', $fixer->fix('Cześć :')); + } + + public function testRussianLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('ru_RU'); + + $this->assertSame('Привет!', $fixer->fix('Привет !')); + $this->assertSame('Привет?', $fixer->fix('Привет ?')); + $this->assertSame('Привет;', $fixer->fix('Привет ;')); + $this->assertSame('Привет:', $fixer->fix('Привет :')); + } + + public function testUkrainianLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('uk_UA'); + + $this->assertSame('Привіт!', $fixer->fix('Привіт !')); + $this->assertSame('Привіт?', $fixer->fix('Привіт ?')); + } + + public function testCzechLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('cs_CZ'); + + $this->assertSame('Ahoj!', $fixer->fix('Ahoj !')); + $this->assertSame('Ahoj?', $fixer->fix('Ahoj ?')); + } + + // ========================================================================= + // Other European languages + // ========================================================================= + + public function testPortugueseLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('pt_PT'); + + $this->assertSame('Olá!', $fixer->fix('Olá !')); + $this->assertSame('Olá?', $fixer->fix('Olá ?')); + } + + public function testDutchLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('nl_NL'); + + $this->assertSame('Hallo!', $fixer->fix('Hallo !')); + $this->assertSame('Hallo?', $fixer->fix('Hallo ?')); + } + + public function testGreekLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('el_GR'); + + $this->assertSame('Γεια!', $fixer->fix('Γεια !')); + $this->assertSame('Γεια?', $fixer->fix('Γεια ?')); + } + + public function testTurkishLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('tr_TR'); + + $this->assertSame('Merhaba!', $fixer->fix('Merhaba !')); + $this->assertSame('Merhaba?', $fixer->fix('Merhaba ?')); + } + + public function testHungarianLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('hu_HU'); + + $this->assertSame('Szia!', $fixer->fix('Szia !')); + $this->assertSame('Szia?', $fixer->fix('Szia ?')); + } + + public function testRomanianLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('ro_RO'); + + $this->assertSame('Bună!', $fixer->fix('Bună !')); + $this->assertSame('Bună?', $fixer->fix('Bună ?')); + } + + public function testCatalanLocale(): void + { + $fixer = new Fixer\SpaceBeforePunctuation('ca_ES'); + + // Catalan explicitly does NOT use French spacing rules + $this->assertSame('Hola!', $fixer->fix('Hola !')); + $this->assertSame('Hola?', $fixer->fix('Hola ?')); + } +} diff --git a/tests/JoliTypo/Tests/JoliTypoTest.php b/tests/JoliTypo/Tests/JoliTypoTest.php index 6330c54..6534bff 100644 --- a/tests/JoliTypo/Tests/JoliTypoTest.php +++ b/tests/JoliTypo/Tests/JoliTypoTest.php @@ -88,7 +88,7 @@ public function testBadClassName(): void { $this->expectException(BadRuleSetException::class); - new Fixer(['Ellipsis', 'Acme\\Demo\\Fixer']); + new Fixer(['Ellipsis', 'Acme\Demo\Fixer']); } public function testBadLocale(): void diff --git a/tests/JoliTypo/Tests/LocaleConfigTest.php b/tests/JoliTypo/Tests/LocaleConfigTest.php new file mode 100644 index 0000000..f9843d5 --- /dev/null +++ b/tests/JoliTypo/Tests/LocaleConfigTest.php @@ -0,0 +1,177 @@ +assertSame(LocaleConfig::SPACING_RULE_FRENCH, LocaleConfig::getSpacingRule('fr')); + $this->assertSame(LocaleConfig::SPACING_RULE_FRENCH, LocaleConfig::getSpacingRule('fr_FR')); + $this->assertSame(LocaleConfig::SPACING_RULE_FRENCH, LocaleConfig::getSpacingRule('fr_BE')); + $this->assertSame(LocaleConfig::SPACING_RULE_FRENCH, LocaleConfig::getSpacingRule('fr_CH')); + } + + public function testGetSpacingRuleCanadianFrench(): void + { + // Canadian French uses no space before punctuation + $this->assertSame(LocaleConfig::SPACING_RULE_NONE, LocaleConfig::getSpacingRule('fr_CA')); + } + + public function testGetSpacingRuleSwissGerman(): void + { + $this->assertSame(LocaleConfig::SPACING_RULE_SWISS_GERMAN, LocaleConfig::getSpacingRule('de_CH')); + } + + public function testGetSpacingRuleNone(): void + { + // Test various languages that should return SPACING_RULE_NONE + $locales = ['en', 'en_GB', 'en_US', 'de', 'de_DE', 'es', 'it', 'pt', 'nl', 'pl', 'ru', 'cs']; + + foreach ($locales as $locale) { + $this->assertSame( + LocaleConfig::SPACING_RULE_NONE, + LocaleConfig::getSpacingRule($locale), + "Expected SPACING_RULE_NONE for locale: {$locale}" + ); + } + } + + public function testGetSpacingRuleUnknownLocaleFallsBackToNone(): void + { + $this->assertSame(LocaleConfig::SPACING_RULE_NONE, LocaleConfig::getSpacingRule('xx_XX')); + $this->assertSame(LocaleConfig::SPACING_RULE_NONE, LocaleConfig::getSpacingRule('unknown')); + } + + public function testGetSpacingRuleFallsBackToLanguage(): void + { + // fr_LU (Luxembourg French) is not explicitly defined, should fall back to 'fr' + $this->assertSame(LocaleConfig::SPACING_RULE_FRENCH, LocaleConfig::getSpacingRule('fr_LU')); + } + + // ========================================================================= + // Quotation Styles + // ========================================================================= + + public function testGetQuotationStyleFrench(): void + { + $style = LocaleConfig::getQuotationStyle('fr'); + + $this->assertSame(Fixer::LAQUO, $style['opening']); + $this->assertSame(Fixer::NO_BREAK_SPACE, $style['openingSuffix']); + $this->assertSame(Fixer::RAQUO, $style['closing']); + $this->assertSame(Fixer::NO_BREAK_SPACE, $style['closingPrefix']); + } + + public function testGetQuotationStyleGuillemets(): void + { + $style = LocaleConfig::getQuotationStyle('ru'); + + $this->assertSame(Fixer::LAQUO, $style['opening']); + $this->assertSame('', $style['openingSuffix']); + $this->assertSame(Fixer::RAQUO, $style['closing']); + $this->assertSame('', $style['closingPrefix']); + } + + public function testGetQuotationStyleGerman(): void + { + $style = LocaleConfig::getQuotationStyle('de'); + + $this->assertSame(Fixer::BDQUO, $style['opening']); + $this->assertSame('', $style['openingSuffix']); + $this->assertSame(Fixer::LDQUO, $style['closing']); + $this->assertSame('', $style['closingPrefix']); + } + + public function testGetQuotationStyleEnglish(): void + { + $style = LocaleConfig::getQuotationStyle('en'); + + $this->assertSame(Fixer::LDQUO, $style['opening']); + $this->assertSame('', $style['openingSuffix']); + $this->assertSame(Fixer::RDQUO, $style['closing']); + $this->assertSame('', $style['closingPrefix']); + } + + public function testGetQuotationStyleFinnish(): void + { + $style = LocaleConfig::getQuotationStyle('fi'); + + // Finnish uses the same closing quote on both sides + $this->assertSame(Fixer::RDQUO, $style['opening']); + $this->assertSame('', $style['openingSuffix']); + $this->assertSame(Fixer::RDQUO, $style['closing']); + $this->assertSame('', $style['closingPrefix']); + } + + public function testGetQuotationStyleUnknownReturnsNull(): void + { + $this->assertNull(LocaleConfig::getQuotationStyle('unknown')); + $this->assertNull(LocaleConfig::getQuotationStyle('xx_XX')); + } + + public function testGetQuotationStyleTypeReturnsCorrectTypes(): void + { + $this->assertSame(LocaleConfig::QUOTE_STYLE_FRENCH, LocaleConfig::getQuotationStyleType('fr')); + $this->assertSame(LocaleConfig::QUOTE_STYLE_GUILLEMETS, LocaleConfig::getQuotationStyleType('ru')); + $this->assertSame(LocaleConfig::QUOTE_STYLE_GERMAN, LocaleConfig::getQuotationStyleType('de')); + $this->assertSame(LocaleConfig::QUOTE_STYLE_ENGLISH, LocaleConfig::getQuotationStyleType('en')); + $this->assertSame(LocaleConfig::QUOTE_STYLE_FINNISH, LocaleConfig::getQuotationStyleType('fi')); + } + + public function testGetQuotationStyleHandlesDashLocale(): void + { + // Should handle both fr_FR and fr-FR formats + $style = LocaleConfig::getQuotationStyle('pt-BR'); + + $this->assertSame(Fixer::LDQUO, $style['opening']); + $this->assertSame(Fixer::RDQUO, $style['closing']); + } + + // ========================================================================= + // Recommended Rules + // ========================================================================= + + public function testGetRecommendedRulesReturnsArrayForKnownLocale(): void + { + $rules = LocaleConfig::getRecommendedRules('en_GB'); + + $this->assertIsArray($rules); + $this->assertContains('Ellipsis', $rules); + $this->assertContains('SmartQuotes', $rules); + $this->assertContains('SpaceBeforePunctuation', $rules); + } + + public function testGetRecommendedRulesReturnsNullForUnknownLocale(): void + { + $this->assertNull(LocaleConfig::getRecommendedRules('unknown')); + $this->assertNull(LocaleConfig::getRecommendedRules('xx_XX')); + } + + public function testRecommendedRulesIncludeSpaceBeforePunctuation(): void + { + // All recommended rules should include SpaceBeforePunctuation + foreach (LocaleConfig::RECOMMENDED_RULES_BY_LOCALE as $locale => $rules) { + $this->assertContains( + 'SpaceBeforePunctuation', + $rules, + "SpaceBeforePunctuation should be in recommended rules for {$locale}" + ); + } + } +} diff --git a/tools/cli/bin/jolitypo b/tools/cli/bin/jolitypo index 56d57f2..d250f4b 100755 --- a/tools/cli/bin/jolitypo +++ b/tools/cli/bin/jolitypo @@ -2,6 +2,7 @@ getOption('rule'); if (!$rules) { - if (!array_key_exists($locale, Fixer::RECOMMENDED_RULES_BY_LOCALE)) { + $rules = LocaleConfig::getRecommendedRules($locale); + if (null === $rules) { throw new \InvalidArgumentException(sprintf('There is no recommended rules for "%s" locale. Please specify manually the rules to apply.', $locale)); } - - $rules = Fixer::RECOMMENDED_RULES_BY_LOCALE[$locale]; } diff --git a/tools/phar/castor.php b/tools/phar/castor.php index 2323380..a561d67 100644 --- a/tools/phar/castor.php +++ b/tools/phar/castor.php @@ -35,7 +35,7 @@ function update(): void function compile() { with( - function () { + static function () { if (!is_dir(__DIR__ . '/../cli/vendor')) { \cli\install(); }