diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php index 039c27c..e6ffcee 100644 --- a/core/lib/Drupal/Component/Utility/Unicode.php +++ b/core/lib/Drupal/Component/Utility/Unicode.php @@ -590,7 +590,7 @@ public static function strcasecmp($str1 , $str2) { } /** - * Encodes MIME/HTTP headers that contain incorrectly encoded characters. + * Encodes MIME/HTTP headers that contain unencoded characters. * * For example, Unicode::mimeHeaderEncode('tést.txt') returns * "=?UTF-8?B?dMOpc3QudHh0?=". @@ -601,7 +601,7 @@ public static function strcasecmp($str1 , $str2) { * - Only encode strings that contain non-ASCII characters. * - We progressively cut-off a chunk with self::truncateBytes(). This ensures * each chunk starts and ends on a character boundary. - * - According to the RFC 2047, we use \r\n as the chunk separator. + * - According to RFC 2047, we split long lines using CLRF SPACE as separator. * * @param string $string * The header to encode. @@ -611,7 +611,11 @@ public static function strcasecmp($str1 , $str2) { */ public static function mimeHeaderEncode($string) { if (preg_match('/[^\x20-\x7E]/', $string)) { - $chunk_size = 45; // floor((75 - strlen("=?UTF-8?B??=\r\n ")) * 0.75); + // Encoded lines must be 75 characters or less; base64 expands to 4/3 + // times the line size (in bytes), rounded up to a multiple of 4, so: + // ceil($chunk_size * 4/3) must be <= 75 - strlen("=?UTF-8?B??=") + // => floor(($chunk_size + 2/3) * 4/3) must be <= 63 + $chunk_size = 45; // floor((63 * 3/4) - 3/2) $len = strlen($string); $output = ''; while ($len > 0) { @@ -621,6 +625,7 @@ public static function mimeHeaderEncode($string) { $string = substr($string, $c); $len -= $c; } + // Remove \r\n from the end of the encoded string. return trim($output); } return $string; diff --git a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php index 2f8c7e7..6f730db 100644 --- a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @@ -89,15 +89,16 @@ public function testMimeHeader($value, $encoded) { */ public function providerTestMimeHeader() { return array( - array('tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='), - // String longer than 47 characters including special characters. The - // result is an encoded chunk splitted in two parts separated by a CRLF - // and a space. - array('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaà', "=?UTF-8?B?YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh?=\r\n =?UTF-8?B?YWHDoA==?="), // Simple ASCII characters. array('ASCII', 'ASCII'), - // Long ASCII string (more than 47 characters). - array('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), + // Long ASCII string (more than 45 bytes). + array('1234567890abcdefghij1234567890abcdefghij123456', '1234567890abcdefghij1234567890abcdefghij123456'), + // Strings containing non-ASCII characters: + // length is 44 characters, 45 bytes: converts into one encoded chunk. + array('1234567890abcdefghij1234567890abcdefghij123à', '=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyM8Og?='), + // longer than 45 bytes: converts into two encoded chunks separated by + // CLRF + space, according to http://www.rfc-editor.org/rfc/rfc2047.txt + array('1234567890abcdefghij1234567890abcdefghij1234à', "=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyMzQ=?=\r\n =?UTF-8?B?w6A=?="), ); }