diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php index f6b4668..df5bb39 100644 --- a/core/lib/Drupal/Component/Utility/Unicode.php +++ b/core/lib/Drupal/Component/Utility/Unicode.php @@ -585,7 +585,7 @@ public static function strcasecmp($str1 , $str2) { } /** - * Encodes MIME/HTTP headers that contain incorrectly encoded characters. + * Encodes MIME/HTTP headers containing characters that should be encoded. * * For example, Unicode::mimeHeaderEncode('tést.txt') returns * "=?UTF-8?B?dMOpc3QudHh0?=". @@ -596,8 +596,13 @@ public static function strcasecmp($str1 , $str2) { * - Only encode strings that contain non-ASCII characters. * - We progressively cut-off a chunk with self::truncateBytes(). This ensures * each chunk starts and ends on a character boundary. - * - Using \n as the chunk separator may cause problems on some systems and - * may have to be changed to \r\n or \r. + * - According to RFC 2047, we split long lines using CLRF SPACE as separator. + * - PHP's mail() function (http://php.net/manual/en/function.mail.php) used + * to have an explicit restriction that subject lines could not contain LF. + * This restriction has been lifted since november 2009 or earlier, but + * occasional problem reports have still popped up about a system not being + * able to deal with "\n" in subject lines, whose source (php or mailer) was + * not confirmed. * * @param string $string * The header to encode. @@ -607,16 +612,22 @@ public static function strcasecmp($str1 , $str2) { */ public static function mimeHeaderEncode($string) { if (preg_match('/[^\x20-\x7E]/', $string)) { - $chunk_size = 47; // floor((75 - strlen("=?UTF-8?B??=")) * 0.75); + // Encoded lines must be 75 characters or less; base64 expands to 4/3 + // times the line size (in bytes), rounded up to a multiple of 4, so: + // ceil($chunk_size / 3) * 4 must be <= 75 - strlen("=?UTF-8?B??=") + // => ceil($chunk_size / 3) must be <= 63 / 4 + // => $chunk_size must be <= floor(63 / 4) * 3 + $chunk_size = 45; $len = strlen($string); $output = ''; while ($len > 0) { $chunk = static::truncateBytes($string, $chunk_size); - $output .= ' =?UTF-8?B?' . base64_encode($chunk) . "?=\n"; + $output .= '=?UTF-8?B?' . base64_encode($chunk) . "?=\r\n "; $c = strlen($chunk); $string = substr($string, $c); $len -= $c; } + // Remove \r\n from the end of the encoded string. return trim($output); } return $string; diff --git a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php index 39759b5..17c6fa2 100644 --- a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @@ -87,6 +87,16 @@ public function providerTestMimeHeader() { array('tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='), // Simple ASCII characters. array('ASCII', 'ASCII'), + // Long ASCII string (more than 45 bytes). + array('1234567890abcdefghij1234567890abcdefghij123456', '1234567890abcdefghij1234567890abcdefghij123456'), + // Strings containing non-ASCII characters: + // length is 44 characters, 45 bytes: converts into one encoded chunk. + array('1234567890abcdefghij1234567890abcdefghij123à', '=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyM8Og?='), + // longer than 45 bytes: converts into two encoded chunks separated by + // CLRF + space, according to http://www.rfc-editor.org/rfc/rfc2047.txt + array('1234567890abcdefghij1234567890abcdefghij1234à', "=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyMzQ=?=\r\n =?UTF-8?B?w6A=?="), + // ASCII string including a LF. + array("ASCII\nASCII", '=?UTF-8?B?QVNDSUkKQVNDSUk=?='), ); }