diff --git a/core/includes/common.inc b/core/includes/common.inc index 88d6fd3..515a2fc 100644 --- a/core/includes/common.inc +++ b/core/includes/common.inc @@ -11,6 +11,7 @@ use Drupal\Core\Database\Database; use Drupal\Core\SystemListingInfo; use Drupal\Core\Template\Attribute; +use Drupal\Component\Utility\Unicode; /** * @file @@ -1548,11 +1549,7 @@ function filter_xss_bad_protocol($string, $decode = TRUE) { // @todo Remove the $decode parameter in Drupal 8, and always assume an HTML // string that needs decoding. if ($decode) { - if (!function_exists('decode_entities')) { - require_once DRUPAL_ROOT . '/core/includes/unicode.inc'; - } - - $string = decode_entities($string); + $string = Unicode::decodeEntities($string); } return check_plain(drupal_strip_dangerous_protocols($string)); } @@ -1582,7 +1579,7 @@ function format_rss_channel($title, $link, $description, $items, $langcode = NUL // The RSS 2.0 "spec" doesn't indicate HTML can be used in the description. // We strip all HTML tags, but need to prevent double encoding from properly // escaped source data (such as & becoming &amp;). - $output .= ' ' . check_plain(decode_entities(strip_tags($description))) . "\n"; + $output .= ' ' . check_plain(Unicode::decodeEntities(strip_tags($description))) . "\n"; $output .= ' ' . check_plain($langcode) . "\n"; $output .= format_xml_elements($args); $output .= $items; @@ -3414,7 +3411,7 @@ function drupal_clean_css_identifier($identifier, $filter = array(' ' => '-', '_ * The cleaned class name. */ function drupal_html_class($class) { - return drupal_clean_css_identifier(drupal_strtolower($class)); + return drupal_clean_css_identifier(Unicode::strtolower($class)); } /** @@ -3488,7 +3485,7 @@ function drupal_html_id($id) { } $seen_ids = &drupal_static(__FUNCTION__, $seen_ids_init); - $id = strtr(drupal_strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => '')); + $id = strtr(Unicode::strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => '')); // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"), @@ -4866,7 +4863,6 @@ function _drupal_bootstrap_code() { require_once DRUPAL_ROOT . '/' . settings()->get('menu_inc', 'core/includes/menu.inc'); require_once DRUPAL_ROOT . '/core/includes/tablesort.inc'; require_once DRUPAL_ROOT . '/core/includes/file.inc'; - require_once DRUPAL_ROOT . '/core/includes/unicode.inc'; require_once DRUPAL_ROOT . '/core/includes/image.inc'; require_once DRUPAL_ROOT . '/core/includes/form.inc'; require_once DRUPAL_ROOT . '/core/includes/mail.inc'; diff --git a/core/includes/file.inc b/core/includes/file.inc index 5f54190..ad9c062 100644 --- a/core/includes/file.inc +++ b/core/includes/file.inc @@ -6,6 +6,7 @@ */ use Drupal\Core\StreamWrapper\LocalStream; +use Drupal\Component\Utility\Unicode; use Drupal\Component\PhpStorage\MTimeProtectedFastFileStorage; use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException; use Symfony\Component\HttpKernel\Exception\NotFoundHttpException; @@ -445,7 +446,7 @@ function file_create_url($uri) { // HTTP and to https://example.com/bar.jpg when viewing a HTTPS page) // Both types of relative URIs are characterized by a leading slash, hence // we can use a single check. - if (drupal_substr($uri, 0, 1) == '/') { + if (Unicode::substr($uri, 0, 1) == '/') { return $uri; } else { diff --git a/core/includes/form.inc b/core/includes/form.inc index 3ebaca1..69ef376 100644 --- a/core/includes/form.inc +++ b/core/includes/form.inc @@ -6,6 +6,7 @@ */ use Drupal\Component\Utility\NestedArray; +use Drupal\Component\Utility\Unicode; use Drupal\Core\Form\FormInterface; use Drupal\Core\Database\Database; use Drupal\Core\Template\Attribute; @@ -1366,8 +1367,8 @@ function _form_validate(&$elements, &$form_state, $form_id = NULL) { // The following errors are always shown. if (isset($elements['#needs_validation'])) { // Verify that the value is not longer than #maxlength. - if (isset($elements['#maxlength']) && drupal_strlen($elements['#value']) > $elements['#maxlength']) { - form_error($elements, $t('!name cannot be longer than %max characters but is currently %length characters long.', array('!name' => empty($elements['#title']) ? $elements['#parents'][0] : $elements['#title'], '%max' => $elements['#maxlength'], '%length' => drupal_strlen($elements['#value'])))); + if (isset($elements['#maxlength']) && Unicode::strlen($elements['#value']) > $elements['#maxlength']) { + form_error($elements, $t('!name cannot be longer than %max characters but is currently %length characters long.', array('!name' => empty($elements['#title']) ? $elements['#parents'][0] : $elements['#title'], '%max' => $elements['#maxlength'], '%length' => Unicode::strlen($elements['#value'])))); } if (isset($elements['#options']) && isset($elements['#value'])) { @@ -1447,7 +1448,7 @@ function _form_validate(&$elements, &$form_state, $form_id = NULL) { // An unchecked checkbox has a #value of integer 0, different than string // '0', which could be a valid value. $is_empty_multiple = (!count($elements['#value'])); - $is_empty_string = (is_string($elements['#value']) && drupal_strlen(trim($elements['#value'])) == 0); + $is_empty_string = (is_string($elements['#value']) && Unicode::strlen(trim($elements['#value'])) == 0); $is_empty_value = ($elements['#value'] === 0); if ($is_empty_multiple || $is_empty_string || $is_empty_value) { // Flag this element as #required_but_empty to allow #element_validate diff --git a/core/includes/install.inc b/core/includes/install.inc index 088413a..63a3010 100644 --- a/core/includes/install.inc +++ b/core/includes/install.inc @@ -5,6 +5,7 @@ * API functions for installing modules and themes. */ +use Drupal\Component\Utility\Unicode; use Drupal\Core\Database\Database; use Drupal\Core\DrupalKernel; use Drupal\locale\Gettext; @@ -388,7 +389,7 @@ function drupal_verify_profile($install_state) { if (count($missing_modules)) { $modules = array(); foreach ($missing_modules as $module) { - $modules[] = '' . drupal_ucfirst($module) . ''; + $modules[] = '' . Unicode::ucfirst($module) . ''; } $requirements['required_modules'] = array( 'title' => st('Required modules'), diff --git a/core/includes/mail.inc b/core/includes/mail.inc index c9ff601..1147625 100644 --- a/core/includes/mail.inc +++ b/core/includes/mail.inc @@ -5,6 +5,8 @@ * API functions for processing and sending e-mail. */ +use Drupal\Component\Utility\Unicode; + /** * Auto-detect appropriate line endings for e-mails. * @@ -463,11 +465,11 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { // Fancy headers case 'h1': $indent[] = '======== '; - $casing = 'drupal_strtoupper'; + $casing = 'Unicode::strtoupper'; break; case 'h2': $indent[] = '-------- '; - $casing = 'drupal_strtoupper'; + $casing = 'Unicode::strtoupper'; break; case '/h1': case '/h2': @@ -496,8 +498,8 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { else { // Convert inline HTML text to plain text; not removing line-breaks or // white-space, since that breaks newlines when sanitizing plain-text. - $value = trim(decode_entities($value)); - if (drupal_strlen($value)) { + $value = trim(Unicode::decodeEntities($value)); + if (Unicode::strlen($value)) { $chunk = $value; } } diff --git a/core/includes/menu.inc b/core/includes/menu.inc index d719571..13fd258 100644 --- a/core/includes/menu.inc +++ b/core/includes/menu.inc @@ -8,6 +8,7 @@ use Symfony\Component\HttpFoundation\Request; use Drupal\Component\Utility\NestedArray; +use Drupal\Component\Utility\Unicode; use Drupal\Core\Cache\CacheBackendInterface; use Drupal\Core\Template\Attribute; use Drupal\menu_link\Plugin\Core\Entity\MenuLink; @@ -2141,11 +2142,11 @@ function menu_contextual_links($module, $parent_path, $args) { ->execute() ->fetchAllAssoc('path', PDO::FETCH_ASSOC); } - $parent_length = drupal_strlen($root_path) + 1; + $parent_length = Unicode::strlen($root_path) + 1; $map = $router_item['original_map']; foreach ($data[$root_path] as $item) { // Extract the actual "task" string from the path argument. - $key = drupal_substr($item['path'], $parent_length); + $key = Unicode::substr($item['path'], $parent_length); // Denormalize and translate the contextual link. _menu_translate($item, $map, TRUE); diff --git a/core/includes/theme.maintenance.inc b/core/includes/theme.maintenance.inc index 37775e6..a9b7226 100644 --- a/core/includes/theme.maintenance.inc +++ b/core/includes/theme.maintenance.inc @@ -25,7 +25,6 @@ function _drupal_maintenance_theme() { require_once DRUPAL_ROOT . '/' . settings()->get('path_inc', 'core/includes/path.inc'); require_once DRUPAL_ROOT . '/core/includes/theme.inc'; require_once DRUPAL_ROOT . '/core/includes/common.inc'; - require_once DRUPAL_ROOT . '/core/includes/unicode.inc'; require_once DRUPAL_ROOT . '/core/includes/file.inc'; require_once DRUPAL_ROOT . '/core/includes/module.inc'; unicode_check(); diff --git a/core/includes/unicode.inc b/core/includes/unicode.inc deleted file mode 100644 index 70a8fde..0000000 --- a/core/includes/unicode.inc +++ /dev/null @@ -1,631 +0,0 @@ - $t('Standard PHP'), - UNICODE_MULTIBYTE => $t('PHP Mbstring Extension'), - UNICODE_ERROR => $t('Error'), - ); - $severities = array( - UNICODE_SINGLEBYTE => REQUIREMENT_WARNING, - UNICODE_MULTIBYTE => NULL, - UNICODE_ERROR => REQUIREMENT_ERROR, - ); - $failed_check = unicode_check(); - $library = $GLOBALS['multibyte']; - - $requirements['unicode'] = array( - 'title' => $t('Unicode library'), - 'value' => $libraries[$library], - 'severity' => $severities[$library], - ); - $t_args = array('@url' => 'http://www.php.net/mbstring'); - switch ($failed_check) { - case 'mb_strlen': - $requirements['unicode']['description'] = $t('Operations on Unicode strings are emulated on a best-effort basis. Install the PHP mbstring extension for improved Unicode support.', $t_args); - break; - - case 'mbstring.func_overload': - $requirements['unicode']['description'] = $t('Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini mbstring.func_overload setting. Please refer to the PHP mbstring documentation for more information.', $t_args); - break; - - case 'mbstring.encoding_translation': - $requirements['unicode']['description'] = $t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini mbstring.encoding_translation setting. Please refer to the PHP mbstring documentation for more information.', $t_args); - break; - - case 'mbstring.http_input': - $requirements['unicode']['description'] = $t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini mbstring.http_input setting. Please refer to the PHP mbstring documentation for more information.', $t_args); - break; - - case 'mbstring.http_output': - $requirements['unicode']['description'] = $t('Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini mbstring.http_output setting. Please refer to the PHP mbstring documentation for more information.', $t_args); - break; - } - - return $requirements; -} - -/** - * Prepares a new XML parser. - * - * This is a wrapper around xml_parser_create() which extracts the encoding - * from the XML data first and sets the output encoding to UTF-8. This function - * should be used instead of xml_parser_create(), because PHP 4's XML parser - * doesn't check the input encoding itself. "Starting from PHP 5, the input - * encoding is automatically detected, so that the encoding parameter specifies - * only the output encoding." - * - * This is also where unsupported encodings will be converted. Callers should - * take this into account: $data might have been changed after the call. - * - * @param $data - * The XML data which will be parsed later. - * - * @return - * An XML parser object or FALSE on error. - * - * @ingroup php_wrappers - */ -function drupal_xml_parser_create(&$data) { - // Default XML encoding is UTF-8 - $encoding = 'utf-8'; - $bom = FALSE; - - // Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it). - if (!strncmp($data, "\xEF\xBB\xBF", 3)) { - $bom = TRUE; - $data = substr($data, 3); - } - - // Check for an encoding declaration in the XML prolog if no BOM was found. - if (!$bom && preg_match('/^<\?xml[^>]+encoding="(.+?)"/', $data, $match)) { - $encoding = $match[1]; - } - - // Unsupported encodings are converted here into UTF-8. - $php_supported = array('utf-8', 'iso-8859-1', 'us-ascii'); - if (!in_array(strtolower($encoding), $php_supported)) { - $out = drupal_convert_to_utf8($data, $encoding); - if ($out !== FALSE) { - $encoding = 'utf-8'; - $data = preg_replace('/^(<\?xml[^>]+encoding)="(.+?)"/', '\\1="utf-8"', $out); - } - else { - watchdog('php', 'Could not convert XML encoding %s to UTF-8.', array('%s' => $encoding), WATCHDOG_WARNING); - return FALSE; - } - } - - $xml_parser = xml_parser_create($encoding); - xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8'); - return $xml_parser; -} - -/** - * Converts data to UTF-8. - * - * Requires the iconv, GNU recode or mbstring PHP extension. - * - * @param $data - * The data to be converted. - * @param $encoding - * The encoding that the data is in. - * - * @return - * Converted data or FALSE. - */ -function drupal_convert_to_utf8($data, $encoding) { - if (function_exists('iconv')) { - $out = @iconv($encoding, 'utf-8', $data); - } - elseif (function_exists('mb_convert_encoding')) { - $out = @mb_convert_encoding($data, 'utf-8', $encoding); - } - elseif (function_exists('recode_string')) { - $out = @recode_string($encoding . '..utf-8', $data); - } - else { - watchdog('php', 'Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.', array('%s' => $encoding), WATCHDOG_ERROR); - return FALSE; - } - - return $out; -} - -/** - * Truncates a UTF-8-encoded string safely to a number of bytes. - * - * If the end position is in the middle of a UTF-8 sequence, it scans backwards - * until the beginning of the byte sequence. - * - * Use this function whenever you want to chop off a string at an unsure - * location. On the other hand, if you're sure that you're splitting on a - * character boundary (e.g. after using strpos() or similar), you can safely - * use substr() instead. - * - * @param $string - * The string to truncate. - * @param $len - * An upper limit on the returned string length. - * - * @return - * The truncated string. - */ -function drupal_truncate_bytes($string, $len) { - if (strlen($string) <= $len) { - return $string; - } - if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) { - return substr($string, 0, $len); - } - // Scan backwards to beginning of the byte sequence. - while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0); - - return substr($string, 0, $len); -} - -/** - * Truncates a UTF-8-encoded string safely to a number of characters. - * - * @param $string - * The string to truncate. - * @param $max_length - * An upper limit on the returned string length, including trailing ellipsis - * if $add_ellipsis is TRUE. - * @param $wordsafe - * If TRUE, attempt to truncate on a word boundary. Word boundaries are - * spaces, punctuation, and Unicode characters used as word boundaries in - * non-Latin languages; see PREG_CLASS_UNICODE_WORD_BOUNDARY for more - * information. If a word boundary cannot be found that would make the length - * of the returned string fall within length guidelines (see parameters - * $max_length and $min_wordsafe_length), word boundaries are ignored. - * @param $add_ellipsis - * If TRUE, add t('...') to the end of the truncated string (defaults to - * FALSE). The string length will still fall within $max_length. - * @param $min_wordsafe_length - * If $wordsafe is TRUE, the minimum acceptable length for truncation (before - * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe - * is FALSE. This can be used to prevent having a very short resulting string - * that will not be understandable. For instance, if you are truncating the - * string "See myverylongurlexample.com for more information" to a word-safe - * return length of 20, the only available word boundary within 20 characters - * is after the word "See", which wouldn't leave a very informative string. If - * you had set $min_wordsafe_length to 10, though, the function would realise - * that "See" alone is too short, and would then just truncate ignoring word - * boundaries, giving you "See myverylongurl..." (assuming you had set - * $add_ellipses to TRUE). - * - * @return string - * The truncated string. - */ -function truncate_utf8($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) { - $ellipsis = ''; - $max_length = max($max_length, 0); - $min_wordsafe_length = max($min_wordsafe_length, 0); - - if (drupal_strlen($string) <= $max_length) { - // No truncation needed, so don't add ellipsis, just return. - return $string; - } - - if ($add_ellipsis) { - // Truncate ellipsis in case $max_length is small. - $ellipsis = drupal_substr(t('…'), 0, $max_length); - $max_length -= drupal_strlen($ellipsis); - $max_length = max($max_length, 0); - } - - if ($max_length <= $min_wordsafe_length) { - // Do not attempt word-safe if lengths are bad. - $wordsafe = FALSE; - } - - if ($wordsafe) { - $matches = array(); - // Find the last word boundary, if there is one within $min_wordsafe_length - // to $max_length characters. preg_match() is always greedy, so it will - // find the longest string possible. - $found = preg_match('/^(.{' . $min_wordsafe_length . ',' . $max_length . '})[' . PREG_CLASS_UNICODE_WORD_BOUNDARY . ']/u', $string, $matches); - if ($found) { - $string = $matches[1]; - } - else { - $string = drupal_substr($string, 0, $max_length); - } - } - else { - $string = drupal_substr($string, 0, $max_length); - } - - if ($add_ellipsis) { - // If we're adding an ellipsis, remove any trailing periods. - $string = rtrim($string, '.'); - - $string .= $ellipsis; - } - - return $string; -} - -/** - * Encodes MIME/HTTP header values that contain incorrectly encoded characters. - * - * For example, mime_header_encode('tést.txt') returns "=?UTF-8?B?dMOpc3QudHh0?=". - * - * See http://www.rfc-editor.org/rfc/rfc2047.txt for more information. - * - * Notes: - * - Only encode strings that contain non-ASCII characters. - * - We progressively cut-off a chunk with truncate_utf8(). This is to ensure - * each chunk starts and ends on a character boundary. - * - Using \n as the chunk separator may cause problems on some systems and may - * have to be changed to \r\n or \r. - * - * @param $string - * The header to encode. - * - * @return string - * The mime-encoded header. - * - * @see mime_header_decode() - */ -function mime_header_encode($string) { - if (preg_match('/[^\x20-\x7E]/', $string)) { - $chunk_size = 47; // floor((75 - strlen("=?UTF-8?B??=")) * 0.75); - $len = strlen($string); - $output = ''; - while ($len > 0) { - $chunk = drupal_truncate_bytes($string, $chunk_size); - $output .= ' =?UTF-8?B?' . base64_encode($chunk) . "?=\n"; - $c = strlen($chunk); - $string = substr($string, $c); - $len -= $c; - } - return trim($output); - } - return $string; -} - -/** - * Decodes MIME/HTTP encoded header values. - * - * @param $header - * The header to decode. - * - * @return string - * The mime-decoded header. - * - * @see mime_header_encode() - */ -function mime_header_decode($header) { - // First step: encoded chunks followed by other encoded chunks (need to collapse whitespace) - $header = preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=\s+(?==\?)/', '_mime_header_decode', $header); - // Second step: remaining chunks (do not collapse whitespace) - return preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=/', '_mime_header_decode', $header); -} - -/** - * Decodes encoded header data passed from mime_header_decode(). - * - * Callback for preg_replace_callback() within mime_header_decode(). - * - * @param $matches - * The array of matches from preg_replace_callback(). - * - * @return string - * The mime-decoded string. - * - * @see mime_header_decode() - */ -function _mime_header_decode($matches) { - // Regexp groups: - // 1: Character set name - // 2: Escaping method (Q or B) - // 3: Encoded data - $data = ($matches[2] == 'B') ? base64_decode($matches[3]) : str_replace('_', ' ', quoted_printable_decode($matches[3])); - if (strtolower($matches[1]) != 'utf-8') { - $data = drupal_convert_to_utf8($data, $matches[1]); - } - return $data; -} - -/** - * Decodes all HTML entities (including numerical ones) to regular UTF-8 bytes. - * - * Double-escaped entities will only be decoded once ("&lt;" becomes "<" - * , not "<"). Be careful when using this function, as decode_entities can - * revert previous sanitization efforts (<script> will become