diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php index a33250f935..42ceb5411a 100644 --- a/core/lib/Drupal/Component/Utility/Unicode.php +++ b/core/lib/Drupal/Component/Utility/Unicode.php @@ -372,7 +372,7 @@ public static function strcasecmp($str1, $str2) { } /** - * Encodes MIME/HTTP headers that contain incorrectly encoded characters. + * Encodes MIME/HTTP headers containing characters that should be encoded. * * For example, Unicode::mimeHeaderEncode('tést.txt') returns * "=?UTF-8?B?dMOpc3QudHh0?=". @@ -383,8 +383,12 @@ public static function strcasecmp($str1, $str2) { * - Only encode strings that contain non-ASCII characters. * - We progressively cut-off a chunk with self::truncateBytes(). This ensures * each chunk starts and ends on a character boundary. - * - Using \n as the chunk separator may cause problems on some systems and - * may have to be changed to \r\n or \r. + * - According to RFC 2047, we split long lines using CLRF SPACE as separator. + * - PHP's mail() function (http://php.net/manual/en/function.mail.php) used + * to have an explicit restriction that subject lines could not contain LF. + * This restriction has been lifted since November 2009 or earlier, so we + * assume that systems having problems with them are extinct by now. + * See https://www.drupal.org/project/drupal/issues/84883. * * @param string $string * The header to encode. @@ -396,13 +400,17 @@ public static function strcasecmp($str1, $str2) { */ public static function mimeHeaderEncode($string, $shorten = FALSE) { if (preg_match('/[^\x20-\x7E]/', $string)) { - // floor((75 - strlen("=?UTF-8?B??=")) * 0.75); - $chunk_size = 47; + // Encoded lines must be 75 characters or less; base64 expands to 4/3 + // times the line size (in bytes), rounded up to a multiple of 4, so: + // ceil($chunk_size / 3) * 4 must be <= 75 - strlen("=?UTF-8?B??=") + // => ceil($chunk_size / 3) must be <= 63 / 4 + // => $chunk_size must be <= floor(63 / 4) * 3 + $chunk_size = 45; $len = strlen($string); $output = ''; while ($len > 0) { $chunk = static::truncateBytes($string, $chunk_size); - $output .= ' =?UTF-8?B?' . base64_encode($chunk) . "?=\n"; + $output .= '=?UTF-8?B?' . base64_encode($chunk) . "?=\r\n "; if ($shorten) { break; } @@ -410,6 +418,7 @@ public static function mimeHeaderEncode($string, $shorten = FALSE) { $string = substr($string, $c); $len -= $c; } + // Remove \r\n from the end of the encoded string. return trim($output); } return $string; diff --git a/core/modules/system/tests/src/Kernel/Mail/MailTest.php b/core/modules/system/tests/src/Kernel/Mail/MailTest.php index af593f3f13..5ddf856018 100644 --- a/core/modules/system/tests/src/Kernel/Mail/MailTest.php +++ b/core/modules/system/tests/src/Kernel/Mail/MailTest.php @@ -147,9 +147,9 @@ public function testFromAndReplyToHeader() { $captured_emails = \Drupal::state()->get('system.test_mail_collector'); $sent_message = end($captured_emails); // From header is correctly encoded. - $this->assertEquals('=?UTF-8?B?RHLDqXBhbCB0aGlzIGlzIGEgdmVyeSBsb25nIHRlc3Qgc2VudGVuY2UgdG8gdGU=?= ', $sent_message['headers']['From']); + $this->assertEquals('=?UTF-8?B?RHLDqXBhbCB0aGlzIGlzIGEgdmVyeSBsb25nIHRlc3Qgc2VudGVuY2UgdG8g?= ', $sent_message['headers']['From']); // From header is correctly encoded. - $this->assertEquals('Drépal this is a very long test sentence to te ', Unicode::mimeHeaderDecode($sent_message['headers']['From'])); + $this->assertEquals('Drépal this is a very long test sentence to ', Unicode::mimeHeaderDecode($sent_message['headers']['From'])); $this->assertFalse(isset($sent_message['headers']['Reply-to']), 'Message reply-to is not set if not specified.'); // Errors-to header must not be set, it is deprecated. $this->assertFalse(isset($sent_message['headers']['Errors-To'])); diff --git a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php index 714078241a..c3adc023d2 100644 --- a/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @@ -21,21 +21,89 @@ class UnicodeTest extends TestCase { * @covers ::mimeHeaderEncode */ public function testMimeHeaderEncode($value, $encoded) { - $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value)); + // The second parameter ($shorten) is set to FALSE, which is the default, + // to ensure we do not truncate the result. + $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value, FALSE)); } /** - * Data provider for testMimeHeader(). + * Data provider for testMimeHeaderEncode(). * - * @see testMimeHeader() + * @see testMimeHeaderEncode() * * @return array * An array containing a string and its encoded value. */ public function providerTestMimeHeader() { return [ - "Base64 encoding" => ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='], - "ASCII characters only" => ['test.txt', 'test.txt'], + 'Base64 encoding' => [ + 'tést.txt', + '=?UTF-8?B?dMOpc3QudHh0?=', + ], + 'ASCII characters only' => [ + 'test.txt', + 'test.txt', + ], + 'ASCII string including a LF' => [ + "ASCII\nASCII", + '=?UTF-8?B?QVNDSUkKQVNDSUk=?=', + ], + 'Long ASCII string (more than 45 bytes)' => [ + '1234567890abcdefghij1234567890abcdefghij123456', + '1234567890abcdefghij1234567890abcdefghij123456', + ], + // Strings containing non-ASCII characters. + 'Non-ASCII string, 44 characters, 45 bytes -> one encoded chunk' => [ + '1234567890abcdefghij1234567890abcdefghij123à', + '=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyM8Og?=', + ], + // Encoded chunks are separated by CLRF + space, + // @see http://www.rfc-editor.org/rfc/rfc2047.txt + 'Non-ASCII string, >45 bytes -> two encoded chunks' => [ + '1234567890abcdefghij1234567890abcdefghij1234à', + "=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyMzQ=?=\r\n =?UTF-8?B?w6A=?=", + ], + ]; + } + + /** + * Tests multibyte encoding with shortening. + * + * @dataProvider providerTestMimeHeaderShorten + * @covers ::mimeHeaderEncode + */ + public function testMimeHeaderEncodeShorten($value, $encoded) { + // The second parameter ($shorten) is set to TRUE to ensure we return only + // the first chunk. + $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value, TRUE)); + } + + /** + * Data provider for testMimeHeaderEncodeShorten(). + * + * @see testMimeHeaderEncodeShorten() + * + * @return array + * An array containing a string and its encoded value. + */ + public function providerTestMimeHeaderShorten() { + return [ + 'Long ASCII string that should not be shortened' => [ + '1234567890abcdefghij1234567890abcdefghij123456', + '1234567890abcdefghij1234567890abcdefghij123456', + ], + // Strings containing non-ASCII characters should only be shortened + // if they result in more than one chunk. + 'Non-ASCII string, 44 characters, 45 bytes -> one encoded chunk (not shortened)' => [ + '1234567890abcdefghij1234567890abcdefghij123à', + '=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyM8Og?=', + ], + // Encoded chunks are separated by CLRF + space, + // @see http://www.rfc-editor.org/rfc/rfc2047.txt + 'Non-ASCII string, >45 bytes -> two encoded chunks (shortened)' => [ + '1234567890abcdefghij1234567890abcdefghij1234à', + "=?UTF-8?B?MTIzNDU2Nzg5MGFiY2RlZmdoaWoxMjM0NTY3ODkwYWJjZGVmZ2hpajEyMzQ=?=", + ], ]; }