diff --git a/core/composer.json b/core/composer.json index 3970b28..08bad4f 100644 --- a/core/composer.json +++ b/core/composer.json @@ -18,6 +18,7 @@ "symfony/validator": "~2.8", "symfony/process": "~2.8", "symfony/polyfill-iconv": "~1.0", + "symfony/polyfill-mbstring": "~1.0", "symfony/yaml": "~2.8", "twig/twig": "^1.23.1", "doctrine/common": "2.5.*", diff --git a/core/includes/theme.maintenance.inc b/core/includes/theme.maintenance.inc index 80cc52c..310c9cd 100644 --- a/core/includes/theme.maintenance.inc +++ b/core/includes/theme.maintenance.inc @@ -5,7 +5,6 @@ * Theming for maintenance pages. */ -use Drupal\Component\Utility\Unicode; use Drupal\Core\Site\Settings; /** @@ -29,7 +28,6 @@ function _drupal_maintenance_theme() { require_once __DIR__ . '/file.inc'; require_once __DIR__ . '/module.inc'; require_once __DIR__ . '/database.inc'; - Unicode::check(); // Install and update pages are treated differently to prevent theming overrides. if (defined('MAINTENANCE_MODE') && (MAINTENANCE_MODE == 'install' || MAINTENANCE_MODE == 'update')) { diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php index f6b4668..2adda9c 100644 --- a/core/lib/Drupal/Component/Utility/Unicode.php +++ b/core/lib/Drupal/Component/Utility/Unicode.php @@ -88,13 +88,6 @@ class Unicode { const STATUS_ERROR = -1; /** - * Holds the multibyte capabilities of the current environment. - * - * @var int - */ - protected static $status = 0; - - /** * Gets the current status of unicode/multibyte support on this environment. * * @return int @@ -107,7 +100,13 @@ class Unicode { * An error occurred. No unicode support. */ public static function getStatus() { - return static::$status; + switch (static::check()) { + case 'mb_strlen': + return Unicode::STATUS_SINGLEBYTE; + case '': + return Unicode::STATUS_MULTIBYTE; + } + return Unicode::STATUS_ERROR; } /** @@ -123,12 +122,13 @@ public static function getStatus() { * * @param int $status * The new status of multibyte support. + * + * @deprecated in Drupal 8.0.0, will be removed before Drupal 9.0.0. In Drupal + * 9 there will be no way to set the status and in Drupal 8 this ability has + * been removed. */ public static function setStatus($status) { - if (!in_array($status, array(static::STATUS_SINGLEBYTE, static::STATUS_MULTIBYTE, static::STATUS_ERROR))) { - throw new \InvalidArgumentException('Invalid status value for unicode support.'); - } - static::$status = $status; + @trigger_error('\Drupal\Component\Utility\Unicode::setStatus() is deprecated and will be remvoed in Drupal 9.0.0.', E_USER_DEPRECATED); } /** @@ -143,38 +143,33 @@ public static function setStatus($status) { * Otherwise, an empty string. */ public static function check() { + // Set appropriate configuration. + mb_internal_encoding('utf-8'); + mb_language('uni'); + // Check for mbstring extension. if (!function_exists('mb_strlen')) { - static::$status = static::STATUS_SINGLEBYTE; return 'mb_strlen'; } // Check mbstring configuration. if (ini_get('mbstring.func_overload') != 0) { - static::$status = static::STATUS_ERROR; return 'mbstring.func_overload'; } if (ini_get('mbstring.encoding_translation') != 0) { - static::$status = static::STATUS_ERROR; return 'mbstring.encoding_translation'; } // mbstring.http_input and mbstring.http_output are deprecated and empty by // default in PHP 5.6. if (version_compare(PHP_VERSION, '5.6.0') == -1) { if (ini_get('mbstring.http_input') != 'pass') { - static::$status = static::STATUS_ERROR; return 'mbstring.http_input'; } if (ini_get('mbstring.http_output') != 'pass') { - static::$status = static::STATUS_ERROR; return 'mbstring.http_output'; } } - // Set appropriate configuration. - mb_internal_encoding('utf-8'); - mb_language('uni'); - static::$status = static::STATUS_MULTIBYTE; return ''; } @@ -224,17 +219,7 @@ public static function encodingFromBOM($data) { * Converted data or FALSE. */ public static function convertToUtf8($data, $encoding) { - if (function_exists('iconv')) { - return @iconv($encoding, 'utf-8', $data); - } - elseif (function_exists('mb_convert_encoding')) { - return @mb_convert_encoding($data, 'utf-8', $encoding); - } - elseif (function_exists('recode_string')) { - return @recode_string($encoding . '..utf-8', $data); - } - // Cannot convert. - return FALSE; + return @iconv($encoding, 'utf-8', $data); } /** @@ -281,13 +266,7 @@ public static function truncateBytes($string, $len) { * The length of the string. */ public static function strlen($text) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strlen($text); - } - else { - // Do not count UTF-8 continuation bytes. - return strlen(preg_replace("/[\x80-\xBF]/", '', $text)); - } + return mb_strlen($text); } /** @@ -300,16 +279,7 @@ public static function strlen($text) { * The string in uppercase. */ public static function strtoupper($text) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strtoupper($text); - } - else { - // Use C-locale for ASCII-only uppercase. - $text = strtoupper($text); - // Case flip Latin-1 accented letters. - $text = preg_replace_callback('/\xC3[\xA0-\xB6\xB8-\xBE]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text); - return $text; - } + return mb_strtoupper($text); } /** @@ -322,16 +292,7 @@ public static function strtoupper($text) { * The string in lowercase. */ public static function strtolower($text) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strtolower($text); - } - else { - // Use C-locale for ASCII-only lowercase. - $text = strtolower($text); - // Case flip Latin-1 accented letters. - $text = preg_replace_callback('/\xC3[\x80-\x96\x98-\x9E]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text); - return $text; - } + return mb_strtolower($text); } /** @@ -399,90 +360,7 @@ public static function ucwords($text) { * The shortened string. */ public static function substr($text, $start, $length = NULL) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length); - } - else { - $strlen = strlen($text); - // Find the starting byte offset. - $bytes = 0; - if ($start > 0) { - // Count all the characters except continuation bytes from the start - // until we have found $start characters or the end of the string. - $bytes = -1; $chars = -1; - while ($bytes < $strlen - 1 && $chars < $start) { - $bytes++; - $c = ord($text[$bytes]); - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - } - } - } - elseif ($start < 0) { - // Count all the characters except continuation bytes from the end - // until we have found abs($start) characters. - $start = abs($start); - $bytes = $strlen; $chars = 0; - while ($bytes > 0 && $chars < $start) { - $bytes--; - $c = ord($text[$bytes]); - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - } - } - } - $istart = $bytes; - - // Find the ending byte offset. - if ($length === NULL) { - $iend = $strlen; - } - elseif ($length > 0) { - // Count all the characters except continuation bytes from the starting - // index until we have found $length characters or reached the end of - // the string, then backtrace one byte. - $iend = $istart - 1; - $chars = -1; - $last_real = FALSE; - while ($iend < $strlen - 1 && $chars < $length) { - $iend++; - $c = ord($text[$iend]); - $last_real = FALSE; - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - $last_real = TRUE; - } - } - // Backtrace one byte if the last character we found was a real - // character and we don't need it. - if ($last_real && $chars >= $length) { - $iend--; - } - } - elseif ($length < 0) { - // Count all the characters except continuation bytes from the end - // until we have found abs($start) characters, then backtrace one byte. - $length = abs($length); - $iend = $strlen; $chars = 0; - while ($iend > 0 && $chars < $length) { - $iend--; - $c = ord($text[$iend]); - if ($c < 0x80 || $c >= 0xC0) { - $chars++; - } - } - // Backtrace one byte if we are not at the beginning of the string. - if ($iend > 0) { - $iend--; - } - } - else { - // $length == 0, return an empty string. - return ''; - } - - return substr($text, $istart, max(0, $iend - $istart + 1)); - } + return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length); } /** @@ -709,16 +587,7 @@ public static function validateUtf8($text) { * a return value of 0 is not the same as FALSE. */ public static function strpos($haystack, $needle, $offset = 0) { - if (static::getStatus() == static::STATUS_MULTIBYTE) { - return mb_strpos($haystack, $needle, $offset); - } - else { - // Remove Unicode continuation characters, to be compatible with - // Unicode::strlen() and Unicode::substr(). - $haystack = preg_replace("/[\x80-\xBF]/", '', $haystack); - $needle = preg_replace("/[\x80-\xBF]/", '', $needle); - return strpos($haystack, $needle, $offset); - } + return mb_strpos($haystack, $needle, $offset); } } diff --git a/core/lib/Drupal/Component/Utility/composer.json b/core/lib/Drupal/Component/Utility/composer.json index 13671ef..3f489ea 100644 --- a/core/lib/Drupal/Component/Utility/composer.json +++ b/core/lib/Drupal/Component/Utility/composer.json @@ -7,7 +7,9 @@ "require": { "php": ">=5.5.9", "paragonie/random_compat": "^1.0|^2.0", - "drupal/core-render": "~8.2" + "drupal/core-render": "~8.2", + "symfony/polyfill-iconv": "~1.0", + "symfony/polyfill-mbstring": "~1.0" }, "autoload": { "psr-4": { diff --git a/core/lib/Drupal/Core/DrupalKernel.php b/core/lib/Drupal/Core/DrupalKernel.php index b8130b3..a0a89df 100644 --- a/core/lib/Drupal/Core/DrupalKernel.php +++ b/core/lib/Drupal/Core/DrupalKernel.php @@ -5,7 +5,6 @@ use Composer\Autoload\ClassLoader; use Drupal\Component\Assertion\Handle; use Drupal\Component\FileCache\FileCacheFactory; -use Drupal\Component\Utility\Unicode; use Drupal\Component\Utility\UrlHelper; use Drupal\Core\Config\BootstrapConfigStorageFactory; use Drupal\Core\Config\NullStorage; @@ -966,8 +965,9 @@ public static function bootEnvironment($app_root = NULL) { // numbers handling. setlocale(LC_ALL, 'C'); - // Detect string handling method. - Unicode::check(); + // Set appropriate configuration for multi-byte strings. + mb_internal_encoding('utf-8'); + mb_language('uni'); // Indicate that code is operating in a test child site. if (!defined('DRUPAL_TEST_IN_CHILD_SITE')) { diff --git a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php index 39759b5..fc3c3c4 100644 --- a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @@ -15,54 +15,6 @@ class UnicodeTest extends UnitTestCase { /** - * {@inheritdoc} - * - * @covers ::check - */ - protected function setUp() { - // Initialize unicode component. - Unicode::check(); - } - - /** - * Getting and settings the multibyte environment status. - * - * @dataProvider providerTestStatus - * @covers ::getStatus - * @covers ::setStatus - */ - public function testStatus($value, $expected, $invalid = FALSE) { - if ($invalid) { - $this->setExpectedException('InvalidArgumentException'); - } - Unicode::setStatus($value); - $this->assertEquals($expected, Unicode::getStatus()); - } - - /** - * Data provider for testStatus(). - * - * @see testStatus() - * - * @return array - * An array containing: - * - The status value to set. - * - The status value to expect after setting the new value. - * - (optional) Boolean indicating invalid status. Defaults to FALSE. - */ - public function providerTestStatus() { - return array( - array(Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE), - array(rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE), - array(rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE), - array(Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE), - array(rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE), - array(Unicode::STATUS_ERROR, Unicode::STATUS_ERROR), - array(Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE), - ); - } - - /** * Tests multibyte encoding and decoding. * * @dataProvider providerTestMimeHeader diff --git a/core/tests/bootstrap.php b/core/tests/bootstrap.php index a2b4921..41f6181 100644 --- a/core/tests/bootstrap.php +++ b/core/tests/bootstrap.php @@ -154,6 +154,10 @@ function drupal_phpunit_populate_class_loader() { // @see \Drupal\Core\DrupalKernel::bootEnvironment() setlocale(LC_ALL, 'C'); +// Set appropriate configuration for multi-byte strings. +mb_internal_encoding('utf-8'); +mb_language('uni'); + // Set the default timezone. While this doesn't cause any tests to fail, PHP // complains if 'date.timezone' is not set in php.ini. The Australia/Sydney // timezone is chosen so all tests are run using an edge case scenario (UTC+10