diff --git a/core/includes/common.inc b/core/includes/common.inc
index 88d6fd3..515a2fc 100644
--- a/core/includes/common.inc
+++ b/core/includes/common.inc
@@ -11,6 +11,7 @@
use Drupal\Core\Database\Database;
use Drupal\Core\SystemListingInfo;
use Drupal\Core\Template\Attribute;
+use Drupal\Component\Utility\Unicode;
/**
* @file
@@ -1548,11 +1549,7 @@ function filter_xss_bad_protocol($string, $decode = TRUE) {
// @todo Remove the $decode parameter in Drupal 8, and always assume an HTML
// string that needs decoding.
if ($decode) {
- if (!function_exists('decode_entities')) {
- require_once DRUPAL_ROOT . '/core/includes/unicode.inc';
- }
-
- $string = decode_entities($string);
+ $string = Unicode::decodeEntities($string);
}
return check_plain(drupal_strip_dangerous_protocols($string));
}
@@ -1582,7 +1579,7 @@ function format_rss_channel($title, $link, $description, $items, $langcode = NUL
// The RSS 2.0 "spec" doesn't indicate HTML can be used in the description.
// We strip all HTML tags, but need to prevent double encoding from properly
// escaped source data (such as & becoming &).
- $output .= ' ' . check_plain(decode_entities(strip_tags($description))) . "\n";
+ $output .= ' ' . check_plain(Unicode::decodeEntities(strip_tags($description))) . "\n";
$output .= ' ' . check_plain($langcode) . "\n";
$output .= format_xml_elements($args);
$output .= $items;
@@ -3414,7 +3411,7 @@ function drupal_clean_css_identifier($identifier, $filter = array(' ' => '-', '_
* The cleaned class name.
*/
function drupal_html_class($class) {
- return drupal_clean_css_identifier(drupal_strtolower($class));
+ return drupal_clean_css_identifier(Unicode::strtolower($class));
}
/**
@@ -3488,7 +3485,7 @@ function drupal_html_id($id) {
}
$seen_ids = &drupal_static(__FUNCTION__, $seen_ids_init);
- $id = strtr(drupal_strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => ''));
+ $id = strtr(Unicode::strtolower($id), array(' ' => '-', '_' => '-', '[' => '-', ']' => ''));
// As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
// only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
@@ -4866,7 +4863,6 @@ function _drupal_bootstrap_code() {
require_once DRUPAL_ROOT . '/' . settings()->get('menu_inc', 'core/includes/menu.inc');
require_once DRUPAL_ROOT . '/core/includes/tablesort.inc';
require_once DRUPAL_ROOT . '/core/includes/file.inc';
- require_once DRUPAL_ROOT . '/core/includes/unicode.inc';
require_once DRUPAL_ROOT . '/core/includes/image.inc';
require_once DRUPAL_ROOT . '/core/includes/form.inc';
require_once DRUPAL_ROOT . '/core/includes/mail.inc';
diff --git a/core/includes/file.inc b/core/includes/file.inc
index 5f54190..ad9c062 100644
--- a/core/includes/file.inc
+++ b/core/includes/file.inc
@@ -6,6 +6,7 @@
*/
use Drupal\Core\StreamWrapper\LocalStream;
+use Drupal\Component\Utility\Unicode;
use Drupal\Component\PhpStorage\MTimeProtectedFastFileStorage;
use Symfony\Component\HttpKernel\Exception\AccessDeniedHttpException;
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
@@ -445,7 +446,7 @@ function file_create_url($uri) {
// HTTP and to https://example.com/bar.jpg when viewing a HTTPS page)
// Both types of relative URIs are characterized by a leading slash, hence
// we can use a single check.
- if (drupal_substr($uri, 0, 1) == '/') {
+ if (Unicode::substr($uri, 0, 1) == '/') {
return $uri;
}
else {
diff --git a/core/includes/form.inc b/core/includes/form.inc
index 3ebaca1..69ef376 100644
--- a/core/includes/form.inc
+++ b/core/includes/form.inc
@@ -6,6 +6,7 @@
*/
use Drupal\Component\Utility\NestedArray;
+use Drupal\Component\Utility\Unicode;
use Drupal\Core\Form\FormInterface;
use Drupal\Core\Database\Database;
use Drupal\Core\Template\Attribute;
@@ -1366,8 +1367,8 @@ function _form_validate(&$elements, &$form_state, $form_id = NULL) {
// The following errors are always shown.
if (isset($elements['#needs_validation'])) {
// Verify that the value is not longer than #maxlength.
- if (isset($elements['#maxlength']) && drupal_strlen($elements['#value']) > $elements['#maxlength']) {
- form_error($elements, $t('!name cannot be longer than %max characters but is currently %length characters long.', array('!name' => empty($elements['#title']) ? $elements['#parents'][0] : $elements['#title'], '%max' => $elements['#maxlength'], '%length' => drupal_strlen($elements['#value']))));
+ if (isset($elements['#maxlength']) && Unicode::strlen($elements['#value']) > $elements['#maxlength']) {
+ form_error($elements, $t('!name cannot be longer than %max characters but is currently %length characters long.', array('!name' => empty($elements['#title']) ? $elements['#parents'][0] : $elements['#title'], '%max' => $elements['#maxlength'], '%length' => Unicode::strlen($elements['#value']))));
}
if (isset($elements['#options']) && isset($elements['#value'])) {
@@ -1447,7 +1448,7 @@ function _form_validate(&$elements, &$form_state, $form_id = NULL) {
// An unchecked checkbox has a #value of integer 0, different than string
// '0', which could be a valid value.
$is_empty_multiple = (!count($elements['#value']));
- $is_empty_string = (is_string($elements['#value']) && drupal_strlen(trim($elements['#value'])) == 0);
+ $is_empty_string = (is_string($elements['#value']) && Unicode::strlen(trim($elements['#value'])) == 0);
$is_empty_value = ($elements['#value'] === 0);
if ($is_empty_multiple || $is_empty_string || $is_empty_value) {
// Flag this element as #required_but_empty to allow #element_validate
diff --git a/core/includes/install.inc b/core/includes/install.inc
index 3fdc48c..258f971 100644
--- a/core/includes/install.inc
+++ b/core/includes/install.inc
@@ -5,6 +5,7 @@
* API functions for installing modules and themes.
*/
+use Drupal\Component\Utility\Unicode;
use Drupal\Core\Database\Database;
use Drupal\Core\DrupalKernel;
use Drupal\locale\Gettext;
@@ -594,7 +595,7 @@ function drupal_verify_profile($install_state) {
if (count($missing_modules)) {
$modules = array();
foreach ($missing_modules as $module) {
- $modules[] = '' . drupal_ucfirst($module) . '';
+ $modules[] = '' . Unicode::ucfirst($module) . '';
}
$requirements['required_modules'] = array(
'title' => st('Required modules'),
diff --git a/core/includes/mail.inc b/core/includes/mail.inc
index c9ff601..1147625 100644
--- a/core/includes/mail.inc
+++ b/core/includes/mail.inc
@@ -5,6 +5,8 @@
* API functions for processing and sending e-mail.
*/
+use Drupal\Component\Utility\Unicode;
+
/**
* Auto-detect appropriate line endings for e-mails.
*
@@ -463,11 +465,11 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
// Fancy headers
case 'h1':
$indent[] = '======== ';
- $casing = 'drupal_strtoupper';
+ $casing = 'Unicode::strtoupper';
break;
case 'h2':
$indent[] = '-------- ';
- $casing = 'drupal_strtoupper';
+ $casing = 'Unicode::strtoupper';
break;
case '/h1':
case '/h2':
@@ -496,8 +498,8 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
else {
// Convert inline HTML text to plain text; not removing line-breaks or
// white-space, since that breaks newlines when sanitizing plain-text.
- $value = trim(decode_entities($value));
- if (drupal_strlen($value)) {
+ $value = trim(Unicode::decodeEntities($value));
+ if (Unicode::strlen($value)) {
$chunk = $value;
}
}
diff --git a/core/includes/menu.inc b/core/includes/menu.inc
index d719571..13fd258 100644
--- a/core/includes/menu.inc
+++ b/core/includes/menu.inc
@@ -8,6 +8,7 @@
use Symfony\Component\HttpFoundation\Request;
use Drupal\Component\Utility\NestedArray;
+use Drupal\Component\Utility\Unicode;
use Drupal\Core\Cache\CacheBackendInterface;
use Drupal\Core\Template\Attribute;
use Drupal\menu_link\Plugin\Core\Entity\MenuLink;
@@ -2141,11 +2142,11 @@ function menu_contextual_links($module, $parent_path, $args) {
->execute()
->fetchAllAssoc('path', PDO::FETCH_ASSOC);
}
- $parent_length = drupal_strlen($root_path) + 1;
+ $parent_length = Unicode::strlen($root_path) + 1;
$map = $router_item['original_map'];
foreach ($data[$root_path] as $item) {
// Extract the actual "task" string from the path argument.
- $key = drupal_substr($item['path'], $parent_length);
+ $key = Unicode::substr($item['path'], $parent_length);
// Denormalize and translate the contextual link.
_menu_translate($item, $map, TRUE);
diff --git a/core/includes/theme.maintenance.inc b/core/includes/theme.maintenance.inc
index 37775e6..a9b7226 100644
--- a/core/includes/theme.maintenance.inc
+++ b/core/includes/theme.maintenance.inc
@@ -25,7 +25,6 @@ function _drupal_maintenance_theme() {
require_once DRUPAL_ROOT . '/' . settings()->get('path_inc', 'core/includes/path.inc');
require_once DRUPAL_ROOT . '/core/includes/theme.inc';
require_once DRUPAL_ROOT . '/core/includes/common.inc';
- require_once DRUPAL_ROOT . '/core/includes/unicode.inc';
require_once DRUPAL_ROOT . '/core/includes/file.inc';
require_once DRUPAL_ROOT . '/core/includes/module.inc';
unicode_check();
diff --git a/core/lib/Drupal/Component/Diff/DiffEngine.php b/core/lib/Drupal/Component/Diff/DiffEngine.php
index f426b96..fd4d63b 100644
--- a/core/lib/Drupal/Component/Diff/DiffEngine.php
+++ b/core/lib/Drupal/Component/Diff/DiffEngine.php
@@ -8,6 +8,8 @@
* You may copy this code freely under the conditions of the GPL.
*/
+use Drupal\Component\Utility\Unicode;
+
define('USE_ASSERTS', FALSE);
/**
@@ -238,7 +240,7 @@ function diff($from_lines, $to_lines) {
* Returns the whole line if it's small enough, or the MD5 hash otherwise.
*/
function _line_hash($line) {
- if (drupal_strlen($line) > $this->MAX_XREF_LENGTH()) {
+ if (Unicode::strlen($line) > $this->MAX_XREF_LENGTH()) {
return md5($line);
}
else {
@@ -993,7 +995,7 @@ function addWords($words, $tag = '') {
}
if ($word[0] == "\n") {
$this->_flushLine($tag);
- $word = drupal_substr($word, 1);
+ $word = Unicode::substr($word, 1);
}
assert(!strstr($word, "\n"));
$this->_group .= $word;
@@ -1037,7 +1039,7 @@ function _split($lines) {
$words[] = "\n";
$stripped[] = "\n";
}
- if ( drupal_strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
+ if ( Unicode::strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
$words[] = $line;
$stripped[] = $line;
}
@@ -1256,7 +1258,7 @@ function render() {
break;
case 'delete':
foreach ($chunk->orig as $i => $piece) {
- if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
+ if (strpos($piece, '<') === 0 && Unicode::substr($piece, Unicode::strlen($piece) - 1) === '>') {
$output .= $piece;
}
else {
@@ -1267,7 +1269,7 @@ function render() {
default:
$chunk->closing = $this->process_chunk($chunk->closing);
foreach ($chunk->closing as $i => $piece) {
- if ($piece === ' ' || (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>' && drupal_strtolower(drupal_substr($piece, 1, 3)) != 'img')) {
+ if ($piece === ' ' || (strpos($piece, '<') === 0 && Unicode::substr($piece, Unicode::strlen($piece) - 1) === '>' && Unicode::strtolower(Unicode::substr($piece, 1, 3)) != 'img')) {
$output .= $piece;
}
else {
@@ -1291,11 +1293,11 @@ function process_chunk($chunk) {
if (!isset($processed[$j])) {
$processed[$j] = '';
}
- if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
+ if (strpos($piece, '<') === 0 && Unicode::substr($piece, Unicode::strlen($piece) - 1) === '>') {
$processed[$j] = $piece;
$j++;
}
- elseif (isset($next) && strpos($next, '<') === 0 && drupal_substr($next, drupal_strlen($next) - 1) === '>') {
+ elseif (isset($next) && strpos($next, '<') === 0 && Unicode::substr($next, Unicode::strlen($next) - 1) === '>') {
$processed[$j] .= $piece;
$j++;
}
diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php
new file mode 100644
index 0000000..3e241ad
--- /dev/null
+++ b/core/lib/Drupal/Component/Utility/Unicode.php
@@ -0,0 +1,644 @@
+ $t('Standard PHP'),
+ UNICODE_MULTIBYTE => $t('PHP Mbstring Extension'),
+ UNICODE_ERROR => $t('Error'),
+ );
+ $severities = array(
+ UNICODE_SINGLEBYTE => REQUIREMENT_WARNING,
+ UNICODE_MULTIBYTE => NULL,
+ UNICODE_ERROR => REQUIREMENT_ERROR,
+ );
+ $failed_check = unicode_check();
+ $library = $GLOBALS['multibyte'];
+
+ $requirements['unicode'] = array(
+ 'title' => $t('Unicode library'),
+ 'value' => $libraries[$library],
+ 'severity' => $severities[$library],
+ );
+ $t_args = array('@url' => 'http://www.php.net/mbstring');
+ switch ($failed_check) {
+ case 'mb_strlen':
+ $requirements['unicode']['description'] = $t('Operations on Unicode strings are emulated on a best-effort basis. Install the PHP mbstring extension for improved Unicode support.', $t_args);
+ break;
+
+ case 'mbstring.func_overload':
+ $requirements['unicode']['description'] = $t('Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini mbstring.func_overload setting. Please refer to the PHP mbstring documentation for more information.', $t_args);
+ break;
+
+ case 'mbstring.encoding_translation':
+ $requirements['unicode']['description'] = $t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini mbstring.encoding_translation setting. Please refer to the PHP mbstring documentation for more information.', $t_args);
+ break;
+
+ case 'mbstring.http_input':
+ $requirements['unicode']['description'] = $t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini mbstring.http_input setting. Please refer to the PHP mbstring documentation for more information.', $t_args);
+ break;
+
+ case 'mbstring.http_output':
+ $requirements['unicode']['description'] = $t('Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini mbstring.http_output setting. Please refer to the PHP mbstring documentation for more information.', $t_args);
+ break;
+ }
+
+ return $requirements;
+ }
+
+ /**
+ * Prepares a new XML parser.
+ *
+ * This is a wrapper around xml_parser_create() which extracts the encoding
+ * from the XML data first and sets the output encoding to UTF-8. This function
+ * should be used instead of xml_parser_create(), because PHP 4's XML parser
+ * doesn't check the input encoding itself. "Starting from PHP 5, the input
+ * encoding is automatically detected, so that the encoding parameter specifies
+ * only the output encoding."
+ *
+ * This is also where unsupported encodings will be converted. Callers should
+ * take this into account: $data might have been changed after the call.
+ *
+ * @param string $data
+ * The XML data which will be parsed later.
+ *
+ * @return resource
+ * An XML parser object or FALSE on error.
+ *
+ * @ingroup php_wrappers
+ */
+ public static function createXMLParser(&$data) {
+ // Default XML encoding is UTF-8
+ $encoding = 'utf-8';
+ $bom = FALSE;
+
+ // Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
+ if (!strncmp($data, "\xEF\xBB\xBF", 3)) {
+ $bom = TRUE;
+ $data = substr($data, 3);
+ }
+
+ // Check for an encoding declaration in the XML prolog if no BOM was found.
+ if (!$bom && preg_match('/^<\?xml[^>]+encoding="(.+?)"/', $data, $match)) {
+ $encoding = $match[1];
+ }
+
+ // Unsupported encodings are converted here into UTF-8.
+ $php_supported = array('utf-8', 'iso-8859-1', 'us-ascii');
+ if (!in_array(strtolower($encoding), $php_supported)) {
+ $out = self::convertToUTF8($data, $encoding);
+ if ($out !== FALSE) {
+ $encoding = 'utf-8';
+ $data = preg_replace('/^(<\?xml[^>]+encoding)="(.+?)"/', '\\1="utf-8"', $out);
+ }
+ else {
+ watchdog('php', 'Could not convert XML encoding %s to UTF-8.', array('%s' => $encoding), WATCHDOG_WARNING);
+ return FALSE;
+ }
+ }
+
+ $xml_parser = xml_parser_create($encoding);
+ xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8');
+ return $xml_parser;
+ }
+
+ /**
+ * Converts data to UTF-8.
+ *
+ * Requires the iconv, GNU recode or mbstring PHP extension.
+ *
+ * @param string $data
+ * The data to be converted.
+ * @param string $encoding
+ * The encoding that the data is in.
+ *
+ * @return bool|string
+ * Converted data or FALSE.
+ */
+ public static function convertToUTF8($data, $encoding) {
+ if (function_exists('iconv')) {
+ $out = @iconv($encoding, 'utf-8', $data);
+ }
+ elseif (function_exists('mb_convert_encoding')) {
+ $out = @mb_convert_encoding($data, 'utf-8', $encoding);
+ }
+ elseif (function_exists('recode_string')) {
+ $out = @recode_string($encoding . '..utf-8', $data);
+ }
+ else {
+ watchdog('php', 'Unsupported encoding %s. Please install iconv, GNU recode or mbstring for PHP.', array('%s' => $encoding), WATCHDOG_ERROR);
+ return FALSE;
+ }
+
+ return $out;
+ }
+
+ /**
+ * Truncates a UTF-8-encoded string safely to a number of bytes.
+ *
+ * If the end position is in the middle of a UTF-8 sequence, it scans backwards
+ * until the beginning of the byte sequence.
+ *
+ * Use this function whenever you want to chop off a string at an unsure
+ * location. On the other hand, if you're sure that you're splitting on a
+ * character boundary (e.g. after using strpos() or similar), you can safely
+ * use substr() instead.
+ *
+ * @param $string
+ * The string to truncate.
+ * @param $len
+ * An upper limit on the returned string length.
+ *
+ * @return
+ * The truncated string.
+ */
+ public static function truncateBytes($string, $len) {
+ if (strlen($string) <= $len) {
+ return $string;
+ }
+ if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
+ return substr($string, 0, $len);
+ }
+ // Scan backwards to beginning of the byte sequence.
+ while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0);
+
+ return substr($string, 0, $len);
+ }
+
+ /**
+ * Truncates a UTF-8-encoded string safely to a number of characters.
+ *
+ * @param string $string
+ * The string to truncate.
+ * @param integer $max_length
+ * An upper limit on the returned string length, including trailing ellipsis
+ * if $add_ellipsis is TRUE.
+ * @param bool $wordsafe
+ * If TRUE, attempt to truncate on a word boundary. Word boundaries are
+ * spaces, punctuation, and Unicode characters used as word boundaries in
+ * non-Latin languages; see self::PREG_CLASS_UNICODE_WORD_BOUNDARY for more
+ * information. If a word boundary cannot be found that would make the length
+ * of the returned string fall within length guidelines (see parameters
+ * $max_length and $min_wordsafe_length), word boundaries are ignored.
+ * @param bool $add_ellipsis
+ * If TRUE, add t('...') to the end of the truncated string (defaults to
+ * FALSE). The string length will still fall within $max_length.
+ * @param integer $min_wordsafe_length
+ * If $wordsafe is TRUE, the minimum acceptable length for truncation (before
+ * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe
+ * is FALSE. This can be used to prevent having a very short resulting string
+ * that will not be understandable. For instance, if you are truncating the
+ * string "See myverylongurlexample.com for more information" to a word-safe
+ * return length of 20, the only available word boundary within 20 characters
+ * is after the word "See", which wouldn't leave a very informative string. If
+ * you had set $min_wordsafe_length to 10, though, the function would realise
+ * that "See" alone is too short, and would then just truncate ignoring word
+ * boundaries, giving you "See myverylongurl..." (assuming you had set
+ * $add_ellipses to TRUE).
+ *
+ * @return string
+ * The truncated string.
+ */
+ public static function truncateUTF8($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) {
+ $ellipsis = '';
+ $max_length = max($max_length, 0);
+ $min_wordsafe_length = max($min_wordsafe_length, 0);
+
+ if (Unicode::strlen($string) <= $max_length) {
+ // No truncation needed, so don't add ellipsis, just return.
+ return $string;
+ }
+
+ if ($add_ellipsis) {
+ // Truncate ellipsis in case $max_length is small.
+ $ellipsis = self::substr(t('…'), 0, $max_length);
+ $max_length -= Unicode::strlen($ellipsis);
+ $max_length = max($max_length, 0);
+ }
+
+ if ($max_length <= $min_wordsafe_length) {
+ // Do not attempt word-safe if lengths are bad.
+ $wordsafe = FALSE;
+ }
+
+ if ($wordsafe) {
+ $matches = array();
+ // Find the last word boundary, if there is one within $min_wordsafe_length
+ // to $max_length characters. preg_match() is always greedy, so it will
+ // find the longest string possible.
+ $found = preg_match('/^(.{' . $min_wordsafe_length . ',' . $max_length . '})[' . self::PREG_CLASS_UNICODE_WORD_BOUNDARY . ']/u', $string, $matches);
+ if ($found) {
+ $string = $matches[1];
+ }
+ else {
+ $string = self::substr($string, 0, $max_length);
+ }
+ }
+ else {
+ $string = self::substr($string, 0, $max_length);
+ }
+
+ if ($add_ellipsis) {
+ // If we're adding an ellipsis, remove any trailing periods.
+ $string = rtrim($string, '.');
+
+ $string .= $ellipsis;
+ }
+
+ return $string;
+ }
+
+ /**
+ * Encodes MIME/HTTP header values that contain incorrectly encoded characters.
+ *
+ * For example, self::MIMEHeaderEncode('tést.txt') returns "=?UTF-8?B?dMOpc3QudHh0?=".
+ *
+ * See http://www.rfc-editor.org/rfc/rfc2047.txt for more information.
+ *
+ * Notes:
+ * - Only encode strings that contain non-ASCII characters.
+ * - We progressively cut-off a chunk with self::truncateBytes(). This is to
+ * ensure each chunk starts and ends on a character boundary.
+ * - Using \n as the chunk separator may cause problems on some systems and may
+ * have to be changed to \r\n or \r.
+ *
+ * @param string $string
+ * The header to encode.
+ *
+ * @return string
+ * The mime-encoded header.
+ *
+ * @see self::MIMEHeaderDecode()
+ */
+ public static function MIMEHeaderEncode($string) {
+ if (preg_match('/[^\x20-\x7E]/', $string)) {
+ $chunk_size = 47; // floor((75 - strlen("=?UTF-8?B??=")) * 0.75);
+ $len = strlen($string);
+ $output = '';
+ while ($len > 0) {
+ $chunk = self::truncateBytes($string, $chunk_size);
+ $output .= ' =?UTF-8?B?' . base64_encode($chunk) . "?=\n";
+ $c = strlen($chunk);
+ $string = substr($string, $c);
+ $len -= $c;
+ }
+ return trim($output);
+ }
+ return $string;
+ }
+
+ /**
+ * Decodes MIME/HTTP encoded header values.
+ *
+ * @param string $header
+ * The header to decode.
+ *
+ * @return string
+ * The mime-decoded header.
+ *
+ * @see self::MIMEHeaderEncode()
+ */
+ public static function MIMEHeaderDecode($header) {
+ // First step: encoded chunks followed by other encoded chunks (need to collapse whitespace)
+ $header = preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=\s+(?==\?)/', 'self::MIMEHeaderDecodeCallback', $header);
+ // Second step: remaining chunks (do not collapse whitespace)
+ return preg_replace_callback('/=\?([^?]+)\?(Q|B)\?([^?]+|\?(?!=))\?=/', 'self::MIMEHeaderDecodeCallback', $header);
+ }
+
+ /**
+ * Decodes encoded header data passed from self::MIMEHeaderDecode().
+ *
+ * Callback for preg_replace_callback() within self::MIMEHeaderDecode().
+ *
+ * @param array $matches
+ * The array of matches from preg_replace_callback().
+ *
+ * @return string
+ * The mime-decoded string.
+ *
+ * @see self::MIMEHeaderDecode()
+ */
+ protected static function MIMEHeaderDecodeCallback(array $matches) {
+ // Regexp groups:
+ // 1: Character set name
+ // 2: Escaping method (Q or B)
+ // 3: Encoded data
+ $data = ($matches[2] == 'B') ? base64_decode($matches[3]) : str_replace('_', ' ', quoted_printable_decode($matches[3]));
+ if (strtolower($matches[1]) != 'utf-8') {
+ $data = self::convertToUTF8($data, $matches[1]);
+ }
+ return $data;
+ }
+
+ /**
+ * Decodes all HTML entities (including numerical ones) to regular UTF-8 bytes.
+ *
+ * Double-escaped entities will only be decoded once ("<" becomes "<"
+ * , not "<"). Be careful when using this function, as self::decodeEntities()
+ * can revert previous sanitization efforts (<script> will become
+ *