diff --git a/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php b/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php index 80982d2..88dd64c 100644 --- a/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php +++ b/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php @@ -76,19 +76,32 @@ public function __construct($data_directory = NULL) { } /** - * Implements TransliterationInterface::transliterate(). + * {@inheritdoc} */ - public function transliterate($string, $langcode = 'en', $unknown_character = '?') { + public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) { $result = ''; + $length = 0; // Split into Unicode characters and transliterate each one. foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) { $code = self::ordUTF8($character); + $to_add = ''; if ($code == -1) { - $result .= $unknown_character; + $to_add = $unknown_character; } else { - $result .= $this->replace($code, $langcode, $unknown_character); + $to_add = $this->replace($code, $langcode, $unknown_character); } + + // Check if this exceeds the maximum allowed length. + if (isset($max_length)) { + $length += strlen($to_add); + if ($length > $max_length) { + // There is no more space. + return $result; + } + } + + $result .= $to_add; } return $result; diff --git a/core/lib/Drupal/Component/Transliteration/TransliterationInterface.php b/core/lib/Drupal/Component/Transliteration/TransliterationInterface.php index b88a55d..8eda816 100644 --- a/core/lib/Drupal/Component/Transliteration/TransliterationInterface.php +++ b/core/lib/Drupal/Component/Transliteration/TransliterationInterface.php @@ -25,10 +25,14 @@ * @param string $unknown_character * (optional) The character to substitute for characters in $string without * transliterated equivalents. Defaults to '?'. + * @param int $max_length + * (optional) If provided, return at most this many characters, ensuring + * that the transliteration does not split in the middle of an input + * character's transliteration. * * @return string * $string with non-US-ASCII characters transliterated to US-ASCII * characters, and unknown characters replaced with $unknown_character. */ - public function transliterate($string, $langcode = 'en', $unknown_character = '?'); + public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL); } diff --git a/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php b/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php index 7630b0b..2e5ed60 100644 --- a/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php +++ b/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php @@ -106,5 +106,13 @@ public function testPHPTransliteration() { '@actual' => $actual, ))); } + + // Test with max length, using German. It should never split up the + // transliteration of a single character. + $input = 'Ä Ö Ü Å Ø äöüåøhello'; + $trunc_output = 'Ae Oe Ue A O aeoe'; + $this->assertIdentical($trunc_output, $transliterator_service->transliterate($input, 'de', '?', 17), 'Truncating to 17 characters works'); + $this->assertIdentical($trunc_output, $transliterator_service->transliterate($input, 'de', '?', 18), 'Truncating to 18 characters works'); + } }