From 3d530f3c6a500600cc3abbd958766165dfa80fd3 Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Sun, 17 Apr 2011 13:24:36 -0400
Subject: [PATCH] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: drupal_html_to_text() formatting is broken and does not have tests.

---
 includes/mail.inc                  |    3 +
 modules/simpletest/tests/mail.test |  336 ++++++++++++++++++++++++++++++++++++
 2 files changed, 339 insertions(+), 0 deletions(-)

diff --git a/includes/mail.inc b/includes/mail.inc
index d2febed39686c9bf3f6f7a2bf99fa1377d09f4de..3633f23dfa9e19546b26dd89bd7fc463d0ec2722 100644
--- a/includes/mail.inc
+++ b/includes/mail.inc
@@ -509,6 +509,9 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
         $chunk = $casing($chunk);
       }
       // Format it and apply the current indentation.
+      if ($output) {
+        $output .= "\n";
+      }
       $output .= drupal_wrap_mail($chunk, implode('', $indent));
       // Remove non-quotation markers from indentation.
       $indent = array_map('_drupal_html_to_text_clean', $indent);
diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test
index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..f563a70dd80121a68354554bab2868d89e2cd0ac 100644
--- a/modules/simpletest/tests/mail.test
+++ b/modules/simpletest/tests/mail.test
@@ -63,3 +63,339 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
   }
 }
 
+/**
+ * Unit tests for drupal_html_to_text().
+ */
+class DrupalHtmlToTextTestCase extends DrupalUnitTestCase {
+  public static function getInfo() {
+    return array(
+      'name'  => 'HTML to text conversion',
+      'description' => 'Tests drupal_html_to_text().',
+      'group' => 'Mail',
+    );
+  }
+
+  /**
+   * Test all supported tags of drupal_html_to_text().
+   */
+  function testTags() {
+    $tests = array(
+      '<a href = "http://drupal.org">Drupal.org</a>' => 'Drupal.org [1]
+
+[1] http://drupal.org
+',
+      '<em>Drupal</em>' => '/Drupal/',
+      '<i>Drupal</i>' => '/Drupal/',
+      '<strong>Drupal</strong>' => '*Drupal*',
+      '<b>Drupal</b>' => '*Drupal*',
+      'Drupal<br />Drupal' => 'Drupal
+Drupal',
+      '<p>Drupal</p>' => 'Drupal
+
+',
+      '<blockquote>Drupal</blockquote>' => '> Drupal
+',
+      '<ul>Drupal</ul>' => 'Drupal
+
+',
+      '<ul><li>Drupal</li></ul>' => ' * Drupal
+
+',
+      '<ul><li>Drupal</li><li>Drupal</li></ul>' => ' * Drupal
+ * Drupal
+
+',
+      '<ol>Drupal</ol>' => 'Drupal
+
+',
+      '<ol><li>Drupal</li></ol>' => ' 1) Drupal
+
+',
+      '<ol><li>Drupal</li><li>Drupal</li></ol>' => ' 1) Drupal
+ 2) Drupal
+
+',
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => ' * Drupal
+ *  1) Drupal
+    2) Drupal
+
+
+',
+      '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => ' 1) Drupal
+ 2)  * Drupal
+     * Drupal
+
+
+',
+      '<dl>Drupal</dl>' => 'Drupal
+
+',
+      '<dt>Drupal</dt>' => 'Drupal
+',
+      '<dl><dt>Drupal</dl>' => 'Drupal
+
+
+',
+      '<dl><dt>Drupal<dd>Drupal</dl>' => 'Drupal
+    Drupal
+
+',
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => 'Drupal
+    Drupal
+
+',
+      '<h1>Drupal</h1>' => '======== DRUPAL ' . str_repeat('=', 62) . "\n\n",
+      '<h2>Drupal</h2>' => '-------- DRUPAL ' . str_repeat('-', 62) . "\n\n",
+      '<h3>Drupal</h3>' => '.... Drupal
+
+',
+      '<h4>Drupal</h4>' => '.. Drupal
+
+',
+      '<h5>Drupal</h5>' => 'Drupal
+
+',
+      '<h6>Drupal</h6>' => 'Drupal
+
+',
+      'Drupal<hr />' => "Drupal\n" . str_repeat('-', 78) . "\n",
+      'Drupal<hr>Drupal' => "Drupal\n" . str_repeat('-', 78) . "\nDrupal",
+      // Tests malformed HTML tags.
+      'Drupal<br>Drupal' => 'Drupal
+Drupal',
+      '<ul><li>Drupal<li>Drupal</ul>' => ' * Drupal
+ * Drupal
+
+',
+      '<ol><li>Drupal<li>Drupal</ol>' => ' 1) Drupal
+ 2) Drupal
+
+',
+      '<ul><li>Drupal<li>Drupal</ol>' => ' * Drupal
+ * Drupal
+
+',
+      // Tests some unsupported HTML tags.
+      '<div>Drupal</div>' => 'Drupal',
+      '<html>Drupal</html>' => 'Drupal',
+      '<script type="text/javascript">Drupal</script>' => 'Drupal',
+    );
+
+    foreach ($tests as $html => $text) {
+      $result = drupal_html_to_text($html);
+      $this->assertEqual($result, $text, t('@html:<br />!first<br />is equal to<br />!second', array(
+        '@html' => var_export($html, TRUE),
+        '!first' => nl2br(check_plain(var_export($result, TRUE))),
+        '!second' => nl2br(check_plain(var_export($text, TRUE))),
+      )));
+    }
+  }
+
+  /**
+   * Test drupal_html_to_text() with a full e-mail example.
+   */
+  function testDrupalHtmlToTextFull() {
+    // HTML code to test.
+    $html = <<<EOS
+<h1>Testing headline</h1>
+
+<h2>Sub-headline</h2>
+
+<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, <a href="http://drupal.org">quis nostrud exercitation</a> ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+
+<p>
+A second paragraph.
+</p>
+
+<blockquote>
+  <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+
+  <ul>
+    <li>Unordered List item</li>
+    <li>List item</li>
+    <li>List item that is really long. Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+    </li>
+    <li>List item</li>
+  </ul>
+
+  <blockquote>
+    <ol>
+      <li>Ordered List item</li>
+      <li>List item</li>
+      <li><p>List item that is really long. Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.</p>
+        <blockquote>
+        <p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure</p>
+        <hr />
+        <p>Dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+      </blockquote>
+      </li>
+      <li>List item</li>
+    </ol>
+  </blockquote>
+</blockquote>
+
+<h2><a href="http://drupal.org/user">Testing linked headline</a></h2>
+
+<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>
+
+<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+
+<dl>
+  <dt>Definition list item</dt>
+  <dd>Definition list body Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut.</dd>
+  <dt>Definition list item</dt>
+  <dd>Definition list body Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut.</dd>
+</dl>
+
+<h2>Really really long sub-headline lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam.</h2>
+
+<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
+EOS;
+
+    // The expected result after drupal_html_to_text($html).
+    $expected_result = <<<EOS
+======== TESTING HEADLINE ====================================================
+
+-------- SUB-HEADLINE --------------------------------------------------------
+
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation [1] ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+A second paragraph.
+
+> Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+> tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+> quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+> consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+> cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat
+> non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+>
+> * Unordered List item
+> * List item
+> * List item that is really long. Lorem ipsum dolor sit amet, consectetur
+>   adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore
+>   magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco
+>   laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in
+>   reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
+>   pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa
+>   qui officia deserunt mollit anim id est laborum.
+> * List item
+>
+>> 1) Ordered List item
+>> 2) List item
+>> 3) List item that is really long. Lorem ipsum dolor sit amet, consectetur
+>>    adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore
+>>    magna aliqua.
+>>
+>>     "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris
+>>      nisi ut aliquip ex ea commodo consequat. Duis aute irure
+>>
+>>      ----------------------------------------------------------------------
+>>      Dolor in reprehenderit in voluptate velit esse cillum dolore eu
+>>      fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+>>      proident, sunt in culpa qui officia deserunt mollit anim id est
+>>      laborum."
+>>
+>> 4) List item
+>>
+-------- TESTING LINKED HEADLINE [2] -----------------------------------------
+
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
+quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat.
+
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore
+eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,
+sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+Definition list item
+    Definition list body Lorem ipsum dolor sit amet, consectetur adipisicing
+    elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+    Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
+    ut.
+Definition list item
+    Definition list body Lorem ipsum dolor sit amet, consectetur adipisicing
+    elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+    Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi
+    ut.
+
+-------- REALLY REALLY LONG SUB-HEADLINE LOREM IPSUM DOLOR SIT AMET,
+         CONSECTETUR ADIPISICING ELIT, SED DO EIUSMOD TEMPOR INCIDIDUNT UT
+         LABORE ET DOLORE MAGNA ALIQUA. UT ENIM AD MINIM VENIAM. -------------
+
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore
+eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,
+sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+
+[1] http://drupal.org
+[2] http://drupal.org/user
+
+EOS;
+
+    $result = drupal_html_to_text($html);
+    $this->assertEqual($result, $expected_result, t('HTML document properly converted to text.'));
+  }
+
+  /**
+   * Test $allowed_tags argument of drupal_html_to_text().
+   */
+  function testDrupalHtmlToTextArgs() {
+    // The second parameter of drupal_html_to_text() overrules the allowed tags.
+    $result = drupal_html_to_text('<b>Drupal</b>', array('b'));
+    $this->assertEqual($result, '*Drupal*', t('Allowed tag found.'));
+
+    $result = drupal_html_to_text('<h1>Drupal</h1>', array('b'));
+    $this->assertEqual($result, 'Drupal', t('Disallowed tag not found.'));
+
+    $result = drupal_html_to_text('<b>Drupal</b>', array('a', 'br', 'h1'));
+    $this->assertEqual($result, 'Drupal', t('Disallowed tags not found.'));
+  }
+
+  /**
+   * Test that text separated by block-level tags in HTML get separated by
+   * (at least) a newline in the plaintext version.
+   */
+  function testDrupalHtmlToTextBlockTagToNewline() {
+    $input = 'Drupal'
+      . '<address>Drupal</address>'
+      . '<blockquote>Drupal</blockquote>'
+      . '<br />Drupal'
+      . '<del>Drupal</del>'
+      . '<div>Drupal</div>'
+      . '<dl><dt>Drupal</dt>'
+      . '<dt>Drupal</dt>'
+      . '<dd>Drupal</dd>'
+      . '<dd>Drupal</dd></dl>'
+      . '<h1>Drupal</h1>'
+      . '<h2>Drupal</h2>'
+      . '<h3>Drupal</h3>'
+      . '<h4>Drupal</h4>'
+      . '<h5>Drupal</h5>'
+      . '<h6>Drupal</h6>'
+      . '<hr />Drupal'
+      . '<ins>Drupal</ins>'
+      . '<ol><li>Drupal</li>'
+      . '<li>Drupal</li></ol>'
+      . '<p>Drupal</p>'
+      . '<pre>Drupal</pre>'
+      . '<table><thead><tr><td>Drupal</td></tr></thead>'
+      . '<tbody><tr><td>Drupal</td></tr>'
+      . '<tr><td>Drupal</td></tr></tbody></table>'
+      . '<ul><li>Drupal</li>'
+      . '<li>Drupal</li></ul>'
+      . 'Drupal';
+    $output = drupal_html_to_text($input);
+    $this->assertFalse(
+      preg_match('/Drupal[^\n]*Drupal/s', $output),
+      t('Block-level HTML tags should force newlines: @output',
+        array('@output' => $output)
+      )
+    );
+  }
+}
-- 
1.7.1

