From e3d8e1382a403926ff3f79d32bd246ee933243a4 Mon Sep 17 00:00:00 2001 From: Bob Vincent Date: Sat, 23 Apr 2011 14:44:21 -0400 Subject: [PATCH 1/2] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: Fix the broken formatting in drupal_html_to_text() and also add tests. (test-only patch) --- modules/simpletest/tests/mail.test | 163 +++++++++++++++++++++++++++++++++++- 1 files changed, 162 insertions(+), 1 deletions(-) diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..458636adaa9919d95e3191ca470eb822a6c8fb5a 100644 --- a/modules/simpletest/tests/mail.test +++ b/modules/simpletest/tests/mail.test @@ -1,6 +1,7 @@ 'HTML to text conversion', + 'description' => 'Tests drupal_html_to_text().', + 'group' => 'Mail', + ); + } + + /** + * Test all supported tags of drupal_html_to_text(). + */ + function testTags() { + $tests = array( + 'Drupal.org' => "Drupal.org [1]\n[1] http://drupal.org\n", + '
Drupal
' => "Drupal\n", + 'Drupal' => "*Drupal*", + '
Drupal
' => "> Drupal", + '
Drupal
Drupal' => "Drupal\nDrupal", + 'Drupal' => "Drupal\n", + '
Drupal
' => "Drupal\n", + '
Drupal
' => "Drupal\n", + '
Drupal
Drupal
' => "Drupal\n Drupal\n", + '
Drupal
Drupal
' => "Drupal\n Drupal\n", + '
Drupal
' => "Drupal\n", + '
Drupal
' => "Drupal", + 'Drupal' => "/Drupal/", + '

Drupal

' => "======== DRUPAL " . str_repeat('=', 62) . "\n", + '

Drupal

' => "-------- DRUPAL " . str_repeat('-', 62) . "\n", + '

Drupal

' => ".... Drupal\n", + '

Drupal

' => ".. Drupal\n", + '
Drupal
' => "Drupal\n", + '
Drupal
' => "Drupal\n", + '
Drupal
' => str_repeat('-', 78) . "\nDrupal\n" . str_repeat('-', 78), + 'Drupal' => "Drupal\n", + 'Drupal' => "/Drupal/", + '
  1. Drupal
' => " 1) Drupal\n", + '
  1. Drupal
    • Drupal
    • Drupal
' => " 1) Drupal\n 2) * Drupal\n * Drupal\n", + '
  1. Drupal
  2. Drupal
' => " 1) Drupal\n 2) Drupal\n", + '
    Drupal
' => "Drupal\n", + '

Drupal

' => "Drupal\n", + '
Drupal
' => "Drupal\n", + 'Drupal' => "*Drupal*", + '
DrupalDrupal
DrupalDrupal
' => "Drupal Drupal\nDrupal Drupal\n", + '' => " * Drupal\n", + '' => " * Drupal /Drupal/ Drupal\n", + '' => " * Drupal\n * 1) Drupal\n 2) Drupal\n", + '' => " * Drupal\n * Drupal\n", + // Tests malformed HTML tags. + '
Drupal
Drupal' => "Drupal\nDrupal", + '
Drupal
Drupal' => str_repeat('-', 78) . "\nDrupal\n" . str_repeat('-', 78) . "\nDrupal", + '
  1. Drupal
  2. Drupal
' => " 1) Drupal\n 2) Drupal\n", + '' => " * Drupal /Drupal/ Drupal\n", + '
  • Drupal' => "Drupal\n * Drupal", + // Tests some unsupported HTML tags. + 'Drupal' => "Drupal", + '' => "Drupal", + ); + + foreach ($tests as $html => $text) { + $result = drupal_html_to_text($html); + $this->assertEqual($result, $text, + var_export($html, TRUE) + . '
    ' + . str_replace("\n", '\n', check_plain(var_export($result, TRUE))) + . '
    is equal to
    ' + . str_replace("\n", '\n', check_plain(var_export($text, TRUE))) + ); + } + } + + /** + * Test $allowed_tags argument of drupal_html_to_text(). + */ + function testDrupalHtmlToTextArgs() { + // The second parameter of drupal_html_to_text() overrules the allowed tags. + $result = drupal_html_to_text('Drupal Drupal Drupal', array('b')); + $this->assertEqual($result, 'Drupal *Drupal* Drupal', 'Allowed <b> tag found.'); + + $result = drupal_html_to_text('Drupal

    Drupal

    Drupal', array('b')); + $this->assertEqual($result, 'Drupal Drupal Drupal', 'Disallowed <h1> tag not found.'); + + $result = drupal_html_to_text('Drupal

    Drupal

    Drupal', array('a', 'br', 'h1')); + $this->assertEqual($result, 'Drupal Drupal Drupal', 'Disallowed <p>, <em>, and <b> tags not found.'); + + $result = drupal_html_to_text('Drupal', array('html', 'body')); + $this->assertEqual($result, 'Drupal', 'Unsupported <html> and <body> tags not found.'); + } + + /** + * Test that internal whitespace in plaintext input is preserved. + */ + function testDrupalHtmltoTextPreservesWhitespace() { + $input = "\n \n \nDrupal\n Drupal\n Drupal\n \n \n"; + $expected = "Drupal\n Drupal\n Drupal"; + $result = drupal_html_to_text($input, NULL); + $this->assertEqual($result, $expected, + 'Internal Whitespace is preserved:
    ' + . "

    $input

    becomes
    $result
    " + ); + } + + /** + * Test that text separated by block-level tags in HTML get separated by + * (at least) a newline in the plaintext version. + */ + function testDrupalHtmlToTextBlockTagToNewline() { + $input = '[text]' + . '
    [address]
    ' + . '
    [blockquote]
    ' + . '
    [br]' + . '[del]' + . '
    [div]
    ' + . '
    [dl-dt]
    ' + . '
    [dt]
    ' + . '
    [dd]
    ' + . '
    [dd]
    ' + . '

    [h1]

    ' + . '

    [h2]

    ' + . '

    [h3]

    ' + . '

    [h4]

    ' + . '
    [h5]
    ' + . '
    [h6]
    ' + . '
    [hr]' + . '[ins]' + . '
    1. [ol-li]
    2. ' + . '
    3. [li]
    ' + . '

    [p]

    ' + . '
    [pre]
    ' + . '' + . '' + . '
    [table-thead--tr-td]
    [tbody-tr-td]
    [tr-td]
    ' + . '' + . '[text]'; + $output = drupal_html_to_text($input); + $this->assertFalse( + preg_match('/\][^\n]*\[/s', $output), + 'Block-level HTML tags should force newlines: ' + . nl2br(check_plain($output)) + ); + $output_upper = drupal_strtoupper($output); + $upper_input = drupal_strtoupper($input); + $upper_output = drupal_html_to_text($upper_input); + $this->assertEqual( + $upper_output, + $output_upper, + 'Tag recognition should be case-insensitive:
    ' + . $upper_output + . '
    should be equal to
    ' + . $output_upper + ); + } +} -- 1.7.1 From fd354aad1259d8f6637d6dab93e842921095b1b9 Mon Sep 17 00:00:00 2001 From: Bob Vincent Date: Sat, 23 Apr 2011 15:19:50 -0400 Subject: [PATCH 2/2] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: Fix the broken formatting in drupal_html_to_text() and also add tests. (tests+fix patch) --- includes/mail.inc | 128 +++++++++++++++++++++++++++++++++++++---------------- 1 files changed, 90 insertions(+), 38 deletions(-) diff --git a/includes/mail.inc b/includes/mail.inc index d2febed39686c9bf3f6f7a2bf99fa1377d09f4de..a6f7fc464f8e9118c015263fde8f01a7a1324627 100644 --- a/includes/mail.inc +++ b/includes/mail.inc @@ -267,7 +267,7 @@ interface MailSystemInterface { * @return * The formatted $message. */ - public function format(array $message); + public function format(array $message); /** * Send a message composed by drupal_mail(). @@ -294,7 +294,7 @@ interface MailSystemInterface { * @return * TRUE if the mail was successfully accepted for delivery, otherwise FALSE. */ - public function mail(array $message); + public function mail(array $message); } /** @@ -303,39 +303,41 @@ interface MailSystemInterface { * We use delsp=yes wrapping, but only break non-spaced languages when * absolutely necessary to avoid compatibility issues. * - * We deliberately use LF rather than CRLF, see drupal_mail(). + * We deliberately use MAIL_LINE_ENDINGS rather than CRLF. * * @param $text * The plain text to process. * @param $indent (optional) * A string to indent the text with. Only '>' characters are repeated on * subsequent wrapped lines. Others are replaced by spaces. + * + * @see drupal_mail() */ function drupal_wrap_mail($text, $indent = '') { - // Convert CRLF into LF. - $text = str_replace("\r", '', $text); + // Convert CRLF into MAIL_LINE_ENDINGS. + $text = preg_replace('/\r?\n/', MAIL_LINE_ENDINGS, $text); // See if soft-wrapping is allowed. $clean_indent = _drupal_html_to_text_clean($indent); $soft = strpos($clean_indent, ' ') === FALSE; // Check if the string has line breaks. - if (strpos($text, "\n") !== FALSE) { + if (strpos($text, MAIL_LINE_ENDINGS) !== FALSE) { // Remove trailing spaces to make existing breaks hard. - $text = preg_replace('/ +\n/m', "\n", $text); + $text = preg_replace('/ +\r?\n/m', MAIL_LINE_ENDINGS, $text); // Wrap each line at the needed width. - $lines = explode("\n", $text); - array_walk($lines, '_drupal_wrap_mail_line', array('soft' => $soft, 'length' => strlen($indent))); - $text = implode("\n", $lines); + $lines = explode(MAIL_LINE_ENDINGS, $text); + array_walk($lines, '_drupal_wrap_mail_line', array('soft' => $soft, 'length' => drupal_strlen($indent))); + $text = implode(MAIL_LINE_ENDINGS, $lines); } else { // Wrap this line. - _drupal_wrap_mail_line($text, 0, array('soft' => $soft, 'length' => strlen($indent))); + _drupal_wrap_mail_line($text, 0, array('soft' => $soft, 'length' => drupal_strlen($indent))); } // Empty lines with nothing but spaces. - $text = preg_replace('/^ +\n/m', "\n", $text); + $text = preg_replace('/^ +\r?\n/m', MAIL_LINE_ENDINGS, $text); // Space-stuff special lines. - $text = preg_replace('/^(>| |From)/m', ' $1', $text); + $text = preg_replace('/^(>|From)/m', ' $1', $text); // Apply indentation. We only include non-'>' indentation on the first line. - $text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent)); + $text = $indent . drupal_substr(preg_replace('/^/m', $clean_indent, $text), drupal_strlen($indent)); return $text; } @@ -347,11 +349,18 @@ function drupal_wrap_mail($text, $indent = '') { * The output will be suitable for use as 'format=flowed; delsp=yes' text * (RFC 3676) and can be passed directly to drupal_mail() for sending. * - * We deliberately use LF rather than CRLF, see drupal_mail(). + * We deliberately use MAIL_LINE_ENDINGS rather than CRLF. * * This function provides suitable alternatives for the following tags: - *