From d09963547e8a335d1a15db393080908171667070 Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Sun, 24 Apr 2011 01:31:59 -0400
Subject: [PATCH] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: Fix the broken formatting in drupal_html_to_text() and also add tests. (test-only patch)

---
 modules/simpletest/tests/mail.test |  169 +++++++++++++++++++++++++++++++++++-
 1 files changed, 168 insertions(+), 1 deletions(-)

diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test
index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..9f8367597d0b7a6ec142cc7ff0c6f2377f0fa9c6 100644
--- a/modules/simpletest/tests/mail.test
+++ b/modules/simpletest/tests/mail.test
@@ -1,6 +1,7 @@
 <?php
 
 /**
+ * @file
  * Test the Drupal mailing system.
  */
 class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
@@ -43,7 +44,7 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
   /**
    * Concatenate and wrap the e-mail body for plain-text mails.
    *
-   * @see DefaultMailSystem
+   * @see DefaultMailSystem()
    */
   public function format(array $message) {
     // Join the body array into one string.
@@ -63,3 +64,169 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
   }
 }
 
+/**
+ * Unit tests for drupal_html_to_text().
+ */
+class DrupalHtmlToTextTestCase extends DrupalUnitTestCase {
+  public static function getInfo() {
+    return array(
+      'name'  => 'HTML to text conversion',
+      'description' => 'Tests drupal_html_to_text().',
+      'group' => 'Mail',
+    );
+  }
+
+  /**
+   * Test all supported tags of drupal_html_to_text().
+   */
+  function testTags() {
+    $tests = array(
+      '<a href = "http://drupal.org">Drupal.org</a>' => "Drupal.org [1]\n[1] http://drupal.org",
+      '<address>Drupal</address>' => "Drupal",
+      '<b>Drupal</b>' => "*Drupal*",
+      '<blockquote>Drupal</blockquote>' => "> Drupal",
+      '<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\n\nDrupal",
+      '<del>Drupal</del>' => "Drupal",
+      '<div>Drupal</div>' => "Drupal",
+      '<dl><dt>Drupal</dl>' => "Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n    Drupal",
+      '<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n    Drupal",
+      '<dl>Drupal</dl>' => "Drupal",
+      '<dt>Drupal</dt>' => "Drupal",
+      '<em>Drupal</em>' => "/Drupal/",
+      '<h1>Drupal</h1>' => "======== DRUPAL " . str_repeat('=', 62) . "",
+      '<h2>Drupal</h2>' => "-------- DRUPAL " . str_repeat('-', 62) . "",
+      '<h3>Drupal</h3>' => ".... Drupal",
+      '<h4>Drupal</h4>' => ".. Drupal",
+      '<h5>Drupal</h5>' => "Drupal",
+      '<h6>Drupal</h6>' => "Drupal",
+      '<hr />Drupal<hr />' => str_repeat('-', 78) . "\nDrupal\n" . str_repeat('-', 78),
+      '<ins>Drupal</ins>' => "Drupal",
+      '<i>Drupal</i>' => "/Drupal/",
+      '<ol><li>Drupal</li></ol>' => " 1) Drupal",
+      '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => " 1) Drupal\n 2)  * Drupal\n     * Drupal",
+      '<ol><li>Drupal</li><li>Drupal</li></ol>' => " 1) Drupal\n 2) Drupal",
+      '<ol>Drupal</ol>' => "Drupal",
+      '<p>Drupal</p>' => "Drupal",
+      '<pre>Drupal</pre>' => "Drupal",
+      '<strong>Drupal</strong>' => "*Drupal*",
+      '<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "Drupal Drupal\nDrupal Drupal",
+      '<ul><li>Drupal</li></ul>' => " * Drupal",
+      '<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => " * Drupal /Drupal/ Drupal",
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => " * Drupal\n *  1) Drupal\n    2) Drupal",
+      '<ul><li>Drupal</li><li>Drupal</li></ul>' => " * Drupal\n * Drupal",
+      // Tests malformed HTML tags.
+      '<br>Drupal<br>Drupal' => "Drupal\nDrupal",
+      '<hr>Drupal<hr>Drupal' => str_repeat('-', 78) . "\nDrupal\n" . str_repeat('-', 78) . "\nDrupal",
+      '<ol><li>Drupal<li>Drupal</ol>' => " 1) Drupal\n 2) Drupal",
+      '<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => " * Drupal /Drupal/ Drupal",
+      '<ul><li>Drupal<li>Drupal</ol>' => " * Drupal\n * Drupal",
+      '<ul><li>Drupal<li>Drupal</ul>' => " * Drupal\n * Drupal",
+      '<ul>Drupal</ul>' => "Drupal",
+      'Drupal</ul></ol></dl><li>Drupal' => "Drupal\n * Drupal",
+      // Tests some unsupported HTML tags.
+      '<html>Drupal</html>' => "Drupal",
+      '<script type="text/javascript">Drupal</script>' => "Drupal",
+    );
+
+    foreach ($tests as $html => $text) {
+      $result = drupal_html_to_text($html);
+      $this->assertEqual($result, $text,
+        var_export($html, TRUE)
+        . '<br />'
+        . str_replace("\n", '\n', check_plain(var_export($result, TRUE)))
+        . '<br />is equal to<br />'
+        . str_replace("\n", '\n', check_plain(var_export($text, TRUE)))
+      );
+    }
+  }
+
+  /**
+   * Test $allowed_tags argument of drupal_html_to_text().
+   */
+  function testDrupalHtmlToTextArgs() {
+    // The second parameter of drupal_html_to_text() overrules the allowed tags.
+    $result = drupal_html_to_text('Drupal <b>Drupal</b> Drupal', array('b'));
+    $this->assertEqual($result, 'Drupal *Drupal* Drupal', 'Allowed &lt;b&gt; tag found.');
+
+    $result = drupal_html_to_text('Drupal <h1>Drupal</h1> Drupal', array('b'));
+    $this->assertEqual($result, 'Drupal Drupal Drupal', 'Disallowed &lt;h1&gt; tag not found.');
+
+    $result = drupal_html_to_text('Drupal <p><em><b>Drupal</b></em><p> Drupal', array('a', 'br', 'h1'));
+    $this->assertEqual($result, 'Drupal Drupal Drupal', 'Disallowed &lt;p&gt;, &lt;em&gt;, and &lt;b&gt; tags not found.');
+
+    $result = drupal_html_to_text('<html><body>Drupal</body></html>', array('html', 'body'));
+    $this->assertEqual($result, 'Drupal', 'Unsupported &lt;html&gt; and &lt;body&gt; tags not found.');
+  }
+
+  /**
+   * Test that whitespace is collapsed, except within <pre> tags.
+   */
+  function testDrupalHtmltoTextCollapsesWhitespace() {
+    $input = "\n\n \n  \n<pre>Drupal\n\n Drupal\n\n  Drupal</pre>\n \n  \n\n";
+    $collapsed = "Drupal Drupal Drupal";
+    $preserved = "Drupal\n\n Drupal\n\n  Drupal";
+    $result = drupal_html_to_text($input, array('p'));
+    $this->assertEqual($result, $collapsed,
+      'Whitespace inside &lt;pre&gt; tags is collapsed if &lt;pre&gt; is disallowed:<br />'
+      . "<pre>$input</pre><br />becomes<br /><pre>$result</pre>"
+    );
+    $result = drupal_html_to_text($input);
+    $this->assertEqual($result, $preserved,
+      'Whitespace inside &lt;pre&gt; tags is preserved if &lt;pre&gt; is allowed:<br />'
+      . "<pre>$input</pre><br />becomes<br /><pre>$result</pre>"
+    );
+  }
+
+  /**
+   * Test that text separated by block-level tags in HTML get separated by
+   * (at least) a newline in the plaintext version.
+   */
+  function testDrupalHtmlToTextBlockTagToNewline() {
+    $input = '[text]'
+      . '<address>[address]</address>'
+      . '<blockquote>[blockquote]</blockquote>'
+      . '<br />[br]'
+      . '<del>[del]</del>'
+      . '<div>[div]</div>'
+      . '<dl><dt>[dl-dt]</dt>'
+      . '<dt>[dt]</dt>'
+      . '<dd>[dd]</dd>'
+      . '<dd>[dd]</dd></dl>'
+      . '<h1>[h1]</h1>'
+      . '<h2>[h2]</h2>'
+      . '<h3>[h3]</h3>'
+      . '<h4>[h4]</h4>'
+      . '<h5>[h5]</h5>'
+      . '<h6>[h6]</h6>'
+      . '<hr />[hr]'
+      . '<ins>[ins]</ins>'
+      . '<ol><li>[ol-li]</li>'
+      . '<li>[li]</li></ol>'
+      . '<p>[p]</p>'
+      . '<pre>[pre]</pre>'
+      . '<table><thead><tr><td>[table-thead--tr-td]</td></tr></thead>'
+      . '<tbody><tr><td>[tbody-tr-td]</td></tr>'
+      . '<tr><td>[tr-td]</td></tr></tbody></table>'
+      . '<ul><li>[ul-li]</li>'
+      . '<li>[li]</li></ul>'
+      . '[text]';
+    $output = drupal_html_to_text($input);
+    $this->assertFalse(
+      preg_match('/\][^\n]*\[/s', $output),
+      'Block-level HTML tags should force newlines: '
+      . nl2br(check_plain($output))
+    );
+    $output_upper = drupal_strtoupper($output);
+    $upper_input = drupal_strtoupper($input);
+    $upper_output = drupal_html_to_text($upper_input);
+    $this->assertEqual(
+      $upper_output,
+      $output_upper,
+      'Tag recognition should be case-insensitive:<br />'
+      . $upper_output
+      . '<br />should  be equal to <br />'
+      . $output_upper
+    );
+  }
+}
-- 
1.7.1

