diff --git a/core/composer.json b/core/composer.json index 3970b28..08bad4f 100644 --- a/core/composer.json +++ b/core/composer.json @@ -18,6 +18,7 @@ "symfony/validator": "~2.8", "symfony/process": "~2.8", "symfony/polyfill-iconv": "~1.0", + "symfony/polyfill-mbstring": "~1.0", "symfony/yaml": "~2.8", "twig/twig": "^1.23.1", "doctrine/common": "2.5.*", diff --git a/core/includes/theme.maintenance.inc b/core/includes/theme.maintenance.inc index 80cc52c..310c9cd 100644 --- a/core/includes/theme.maintenance.inc +++ b/core/includes/theme.maintenance.inc @@ -5,7 +5,6 @@ * Theming for maintenance pages. */ -use Drupal\Component\Utility\Unicode; use Drupal\Core\Site\Settings; /** @@ -29,7 +28,6 @@ function _drupal_maintenance_theme() { require_once __DIR__ . '/file.inc'; require_once __DIR__ . '/module.inc'; require_once __DIR__ . '/database.inc'; - Unicode::check(); // Install and update pages are treated differently to prevent theming overrides. if (defined('MAINTENANCE_MODE') && (MAINTENANCE_MODE == 'install' || MAINTENANCE_MODE == 'update')) { diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php index f6b4668..02479df 100644 --- a/core/lib/Drupal/Component/Utility/Unicode.php +++ b/core/lib/Drupal/Component/Utility/Unicode.php @@ -88,13 +88,6 @@ class Unicode { const STATUS_ERROR = -1; /** - * Holds the multibyte capabilities of the current environment. - * - * @var int - */ - protected static $status = 0; - - /** * Gets the current status of unicode/multibyte support on this environment. * * @return int @@ -107,7 +100,13 @@ class Unicode { * An error occurred. No unicode support. */ public static function getStatus() { - return static::$status; + switch (static::check()) { + case 'mb_strlen': + return Unicode::STATUS_SINGLEBYTE; + case '': + return Unicode::STATUS_MULTIBYTE; + } + return Unicode::STATUS_ERROR; } /** @@ -123,12 +122,13 @@ public static function getStatus() { * * @param int $status * The new status of multibyte support. + * + * @deprecated in Drupal 8.3.0, will be removed before Drupal 9.0.0. In Drupal + * 9 there will be no way to set the status and in Drupal 8 this ability has + * been removed. mb_*() functions are supplied using Symfony's polyfill. */ public static function setStatus($status) { - if (!in_array($status, array(static::STATUS_SINGLEBYTE, static::STATUS_MULTIBYTE, static::STATUS_ERROR))) { - throw new \InvalidArgumentException('Invalid status value for unicode support.'); - } - static::$status = $status; + @trigger_error('\Drupal\Component\Utility\Unicode::setStatus() is deprecated and will be removed in Drupal 9.0.0.', E_USER_DEPRECATED); } /** @@ -143,38 +143,33 @@ public static function setStatus($status) { * Otherwise, an empty string. */ public static function check() { + // Set appropriate configuration. + mb_internal_encoding('utf-8'); + mb_language('uni'); + // Check for mbstring extension. - if (!function_exists('mb_strlen')) { - static::$status = static::STATUS_SINGLEBYTE; + if (!extension_loaded('mbstring')) { return 'mb_strlen'; } // Check mbstring configuration. if (ini_get('mbstring.func_overload') != 0) { - static::$status = static::STATUS_ERROR; return 'mbstring.func_overload'; } if (ini_get('mbstring.encoding_translation') != 0) { - static::$status = static::STATUS_ERROR; return 'mbstring.encoding_translation'; } // mbstring.http_input and mbstring.http_output are deprecated and empty by // default in PHP 5.6. if (version_compare(PHP_VERSION, '5.6.0') == -1) { if (ini_get('mbstring.http_input') != 'pass') { - static::$status = static::STATUS_ERROR; return 'mbstring.http_input'; } if (ini_get('mbstring.http_output') != 'pass') { - static::$status = static::STATUS_ERROR; return 'mbstring.http_output'; } } - // Set appropriate configuration. - mb_internal_encoding('utf-8'); - mb_language('uni'); - static::$status = static::STATUS_MULTIBYTE; return ''; } @@ -224,17 +219,7 @@ public static function encodingFromBOM($data) { * Converted data or FALSE. */ public static function convertToUtf8($data, $encoding) { - if (function_exists('iconv')) { - return @iconv($encoding, 'utf-8', $data); - } - elseif (function_exists('mb_convert_encoding')) { - return @mb_convert_encoding($data, 'utf-8', $encoding); - } - elseif (function_exists('recode_string')) { - return @recode_string($encoding . '..utf-8', $data); - } - // Cannot convert. - return FALSE; + return @iconv($encoding, 'utf-8', $data); } /** @@ -279,15 +264,12 @@ public static function truncateBytes($string, $len) { * * @return int * The length of the string. + * + * @deprecated in Drupal 8.3.0, will be removed before Drupal 9.0.0. Use + * mb_strlen() instead. */ public static function strlen($text) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strlen($text); - } - else { - // Do not count UTF-8 continuation bytes. - return strlen(preg_replace("/[\x80-\xBF]/", '', $text)); - } + return mb_strlen($text); } /** @@ -298,18 +280,12 @@ public static function strlen($text) { * * @return string * The string in uppercase. + * + * @deprecated in Drupal 8.3.0, will be removed before Drupal 9.0.0. Use + * mb_strtoupper() instead. */ public static function strtoupper($text) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strtoupper($text); - } - else { - // Use C-locale for ASCII-only uppercase. - $text = strtoupper($text); - // Case flip Latin-1 accented letters. - $text = preg_replace_callback('/\xC3[\xA0-\xB6\xB8-\xBE]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text); - return $text; - } + return mb_strtoupper($text); } /** @@ -320,18 +296,12 @@ public static function strtoupper($text) { * * @return string * The string in lowercase. + * + * @deprecated in Drupal 8.3.0, will be removed before Drupal 9.0.0. Use + * mb_strtolower() instead. */ public static function strtolower($text) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strtolower($text); - } - else { - // Use C-locale for ASCII-only lowercase. - $text = strtolower($text); - // Case flip Latin-1 accented letters. - $text = preg_replace_callback('/\xC3[\x80-\x96\x98-\x9E]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text); - return $text; - } + return mb_strtolower($text); } /** @@ -397,92 +367,12 @@ public static function ucwords($text) { * * @return string * The shortened string. + * + * @deprecated in Drupal 8.3.0, will be removed before Drupal 9.0.0. Use + * mb_substr() instead. */ public static function substr($text, $start, $length = NULL) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length); - } - else { - $strlen = strlen($text); - // Find the starting byte offset. - $bytes = 0; - if ($start > 0) { - // Count all the characters except continuation bytes from the start - // until we have found $start characters or the end of the string. - $bytes = -1; $chars = -1; - while ($bytes < $strlen - 1 && $chars < $start) { - $bytes++; - $c = ord($text[$bytes]); - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - } - } - } - elseif ($start < 0) { - // Count all the characters except continuation bytes from the end - // until we have found abs($start) characters. - $start = abs($start); - $bytes = $strlen; $chars = 0; - while ($bytes > 0 && $chars < $start) { - $bytes--; - $c = ord($text[$bytes]); - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - } - } - } - $istart = $bytes; - - // Find the ending byte offset. - if ($length === NULL) { - $iend = $strlen; - } - elseif ($length > 0) { - // Count all the characters except continuation bytes from the starting - // index until we have found $length characters or reached the end of - // the string, then backtrace one byte. - $iend = $istart - 1; - $chars = -1; - $last_real = FALSE; - while ($iend < $strlen - 1 && $chars < $length) { - $iend++; - $c = ord($text[$iend]); - $last_real = FALSE; - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - $last_real = TRUE; - } - } - // Backtrace one byte if the last character we found was a real - // character and we don't need it. - if ($last_real && $chars >= $length) { - $iend--; - } - } - elseif ($length < 0) { - // Count all the characters except continuation bytes from the end - // until we have found abs($start) characters, then backtrace one byte. - $length = abs($length); - $iend = $strlen; $chars = 0; - while ($iend > 0 && $chars < $length) { - $iend--; - $c = ord($text[$iend]); - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - } - } - // Backtrace one byte if we are not at the beginning of the string. - if ($iend > 0) { - $iend--; - } - } - else { - // $length == 0, return an empty string. - return ''; - } - - return substr($text, $istart, max(0, $iend - $istart + 1)); - } + return mb_substr($text, $start, $length); } /** @@ -707,18 +597,12 @@ public static function validateUtf8($text) { * The position where $needle occurs in $haystack, always relative to the * beginning (independent of $offset), or FALSE if not found. Note that * a return value of 0 is not the same as FALSE. + * + * @deprecated in Drupal 8.3.0, will be removed before Drupal 9.0.0. Use + * mb_strpos() instead. */ public static function strpos($haystack, $needle, $offset = 0) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strpos($haystack, $needle, $offset); - } - else { - // Remove Unicode continuation characters, to be compatible with - // Unicode::strlen() and Unicode::substr(). - $haystack = preg_replace("/[\x80-\xBF]/", '', $haystack); - $needle = preg_replace("/[\x80-\xBF]/", '', $needle); - return strpos($haystack, $needle, $offset); - } + return mb_strpos($haystack, $needle, $offset); } } diff --git a/core/lib/Drupal/Component/Utility/composer.json b/core/lib/Drupal/Component/Utility/composer.json index 13671ef..3f489ea 100644 --- a/core/lib/Drupal/Component/Utility/composer.json +++ b/core/lib/Drupal/Component/Utility/composer.json @@ -7,7 +7,9 @@ "require": { "php": ">=5.5.9", "paragonie/random_compat": "^1.0|^2.0", - "drupal/core-render": "~8.2" + "drupal/core-render": "~8.2", + "symfony/polyfill-iconv": "~1.0", + "symfony/polyfill-mbstring": "~1.0" }, "autoload": { "psr-4": { diff --git a/core/lib/Drupal/Core/DrupalKernel.php b/core/lib/Drupal/Core/DrupalKernel.php index b8130b3..a0a89df 100644 --- a/core/lib/Drupal/Core/DrupalKernel.php +++ b/core/lib/Drupal/Core/DrupalKernel.php @@ -5,7 +5,6 @@ use Composer\Autoload\ClassLoader; use Drupal\Component\Assertion\Handle; use Drupal\Component\FileCache\FileCacheFactory; -use Drupal\Component\Utility\Unicode; use Drupal\Component\Utility\UrlHelper; use Drupal\Core\Config\BootstrapConfigStorageFactory; use Drupal\Core\Config\NullStorage; @@ -966,8 +965,9 @@ public static function bootEnvironment($app_root = NULL) { // numbers handling. setlocale(LC_ALL, 'C'); - // Detect string handling method. - Unicode::check(); + // Set appropriate configuration for multi-byte strings. + mb_internal_encoding('utf-8'); + mb_language('uni'); // Indicate that code is operating in a test child site. if (!defined('DRUPAL_TEST_IN_CHILD_SITE')) { diff --git a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php index 39759b5..2534e74 100644 --- a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @@ -15,54 +15,6 @@ class UnicodeTest extends UnitTestCase { /** - * {@inheritdoc} - * - * @covers ::check - */ - protected function setUp() { - // Initialize unicode component. - Unicode::check(); - } - - /** - * Getting and settings the multibyte environment status. - * - * @dataProvider providerTestStatus - * @covers ::getStatus - * @covers ::setStatus - */ - public function testStatus($value, $expected, $invalid = FALSE) { - if ($invalid) { - $this->setExpectedException('InvalidArgumentException'); - } - Unicode::setStatus($value); - $this->assertEquals($expected, Unicode::getStatus()); - } - - /** - * Data provider for testStatus(). - * - * @see testStatus() - * - * @return array - * An array containing: - * - The status value to set. - * - The status value to expect after setting the new value. - * - (optional) Boolean indicating invalid status. Defaults to FALSE. - */ - public function providerTestStatus() { - return array( - array(Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE), - array(rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE), - array(rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE), - array(Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE), - array(rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE), - array(Unicode::STATUS_ERROR, Unicode::STATUS_ERROR), - array(Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE), - ); - } - - /** * Tests multibyte encoding and decoding. * * @dataProvider providerTestMimeHeader @@ -97,9 +49,7 @@ public function providerTestMimeHeader() { * @covers ::strtolower * @covers ::caseFlip */ - public function testStrtolower($text, $expected, $multibyte = FALSE) { - $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE; - Unicode::setStatus($status); + public function testStrtolower($text, $expected) { $this->assertEquals($expected, Unicode::strtolower($text)); } @@ -109,22 +59,14 @@ public function testStrtolower($text, $expected, $multibyte = FALSE) { * @see testStrtolower() * * @return array - * An array containing a string, its lowercase version and whether it should - * be processed as multibyte. + * An array containing a string and its lowercase version. */ public function providerStrtolower() { - $cases = array( + return array( array('tHe QUIcK bRoWn', 'the quick brown'), array('FrançAIS is ÜBER-åwesome', 'français is über-åwesome'), + array('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ'), ); - foreach ($cases as $case) { - // Test the same string both in multibyte and singlebyte conditions. - array_push($case, TRUE); - $cases[] = $case; - } - // Add a multibyte string. - $cases[] = array('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ', TRUE); - return $cases; } /** @@ -134,9 +76,7 @@ public function providerStrtolower() { * @covers ::strtoupper * @covers ::caseFlip */ - public function testStrtoupper($text, $expected, $multibyte = FALSE) { - $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE; - Unicode::setStatus($status); + public function testStrtoupper($text, $expected) { $this->assertEquals($expected, Unicode::strtoupper($text)); } @@ -146,22 +86,14 @@ public function testStrtoupper($text, $expected, $multibyte = FALSE) { * @see testStrtoupper() * * @return array - * An array containing a string, its uppercase version and whether it should - * be processed as multibyte. + * An array containing a string and its uppercase version. */ public function providerStrtoupper() { - $cases = array( + return array( array('tHe QUIcK bRoWn', 'THE QUICK BROWN'), array('FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'), + array('αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'), ); - foreach ($cases as $case) { - // Test the same string both in multibyte and singlebyte conditions. - array_push($case, TRUE); - $cases[] = $case; - } - // Add a multibyte string. - $cases[] = array('αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE); - return $cases; } /** @@ -199,9 +131,7 @@ public function providerUcfirst() { * @dataProvider providerLcfirst * @covers ::lcfirst */ - public function testLcfirst($text, $expected, $multibyte = FALSE) { - $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE; - Unicode::setStatus($status); + public function testLcfirst($text, $expected) { $this->assertEquals($expected, Unicode::lcfirst($text)); } @@ -211,8 +141,7 @@ public function testLcfirst($text, $expected, $multibyte = FALSE) { * @see testLcfirst() * * @return array - * An array containing a string, its lowercase version and whether it should - * be processed as multibyte. + * An array containing a string and its lowercase version. */ public function providerLcfirst() { return array( @@ -221,7 +150,7 @@ public function providerLcfirst() { array('Über', 'über'), array('Åwesome', 'åwesome'), // Add a multibyte string. - array('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE), + array('ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'), ); } @@ -231,9 +160,7 @@ public function providerLcfirst() { * @dataProvider providerUcwords * @covers ::ucwords */ - public function testUcwords($text, $expected, $multibyte = FALSE) { - $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE; - Unicode::setStatus($status); + public function testUcwords($text, $expected) { $this->assertEquals($expected, Unicode::ucwords($text)); } @@ -243,8 +170,7 @@ public function testUcwords($text, $expected, $multibyte = FALSE) { * @see testUcwords() * * @return array - * An array containing a string, its capitalized version and whether it should - * be processed as multibyte. + * An array containing a string and its capitalized version. */ public function providerUcwords() { return array( @@ -255,7 +181,7 @@ public function providerUcwords() { // Make sure we don't mangle extra spaces. array('frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'), // Add a multibyte string. - array('σion', 'Σion', TRUE), + array('σion', 'Σion'), ); } @@ -266,11 +192,6 @@ public function providerUcwords() { * @covers ::strlen */ public function testStrlen($text, $expected) { - // Run through multibyte code path. - Unicode::setStatus(Unicode::STATUS_MULTIBYTE); - $this->assertEquals($expected, Unicode::strlen($text)); - // Run through singlebyte code path. - Unicode::setStatus(Unicode::STATUS_SINGLEBYTE); $this->assertEquals($expected, Unicode::strlen($text)); } @@ -297,11 +218,6 @@ public function providerStrlen() { * @covers ::substr */ public function testSubstr($text, $start, $length, $expected) { - // Run through multibyte code path. - Unicode::setStatus(Unicode::STATUS_MULTIBYTE); - $this->assertEquals($expected, Unicode::substr($text, $start, $length)); - // Run through singlebyte code path. - Unicode::setStatus(Unicode::STATUS_SINGLEBYTE); $this->assertEquals($expected, Unicode::substr($text, $start, $length)); } @@ -532,11 +448,6 @@ public function providerTestConvertToUtf8() { * @covers ::strpos */ public function testStrpos($haystack, $needle, $offset, $expected) { - // Run through multibyte code path. - Unicode::setStatus(Unicode::STATUS_MULTIBYTE); - $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset)); - // Run through singlebyte code path. - Unicode::setStatus(Unicode::STATUS_SINGLEBYTE); $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset)); } diff --git a/core/tests/bootstrap.php b/core/tests/bootstrap.php index a2b4921..41f6181 100644 --- a/core/tests/bootstrap.php +++ b/core/tests/bootstrap.php @@ -154,6 +154,10 @@ function drupal_phpunit_populate_class_loader() { // @see \Drupal\Core\DrupalKernel::bootEnvironment() setlocale(LC_ALL, 'C'); +// Set appropriate configuration for multi-byte strings. +mb_internal_encoding('utf-8'); +mb_language('uni'); + // Set the default timezone. While this doesn't cause any tests to fail, PHP // complains if 'date.timezone' is not set in php.ini. The Australia/Sydney // timezone is chosen so all tests are run using an edge case scenario (UTC+10