From 071f32eb31713c64be1b5fa0f07e1b62eaea6a29 Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Mon, 30 May 2011 05:50:33 -0400
Subject: [PATCH] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet, stephandale, salvis: Improve drupal_html_to_text().

---
 includes/mail.inc                  |  826 ++++++++++++++++++++++++++----------
 modules/simpletest/tests/mail.test |  388 +++++++++++++++++
 2 files changed, 994 insertions(+), 220 deletions(-)

diff --git a/includes/mail.inc b/includes/mail.inc
index be2df923427ec363f671132771e9c97ee490c090..3b0d5ab9da7653249aba60858a89f4a8bc713bf8 100644
--- a/includes/mail.inc
+++ b/includes/mail.inc
@@ -55,7 +55,7 @@ define('MAIL_LINE_ENDINGS', isset($_SERVER['WINDIR']) || strpos($_SERVER['SERVER
  *     $data['user'] = $params['account'];
  *     $options['language'] = $message['language'];
  *     user_mail_tokens($variables, $data, $options);
- *     switch($key) {
+ *     switch ($key) {
  *       case 'notice':
  *         $langcode = $message['language']->language;
  *         $message['subject'] = t('Notification from !site', $variables, array('langcode' => $langcode));
@@ -267,7 +267,7 @@ interface MailSystemInterface {
    * @return
    *   The formatted $message.
    */
-   public function format(array $message);
+  public function format(array $message);
 
   /**
    * Send a message composed by drupal_mail().
@@ -294,7 +294,7 @@ interface MailSystemInterface {
    * @return
    *   TRUE if the mail was successfully accepted for delivery, otherwise FALSE.
    */
-   public function mail(array $message);
+  public function mail(array $message);
 }
 
 /**
@@ -303,41 +303,64 @@ interface MailSystemInterface {
  * We use delsp=yes wrapping, but only break non-spaced languages when
  * absolutely necessary to avoid compatibility issues.
  *
- * We deliberately use LF rather than CRLF, see drupal_mail().
+ * We deliberately use variable_get('mail_line_endings', MAIL_LINE_ENDINGS)
+ * rather than "\r\n".
  *
  * @param $text
  *   The plain text to process.
- * @param $indent (optional)
- *   A string to indent the text with. Only '>' characters are repeated on
- *   subsequent wrapped lines. Others are replaced by spaces.
+ * @param array $options
+ *   (optional) An array containing one or more of the following keys:
+ *   - indent: A string to indent the text with. Only '>' characters are
+ *     repeated on subsequent wrapped lines. Others are replaced by spaces.
+ *   - max: The maximum length at which to wrap each line. Defaults to 80.
+ *   - stuff: Whether to space-stuff special lines.  Defaults to TRUE.
+ *   - hard: Whether to enforce the maximum line length even if no convenient
+ *     space character is available.  Defaults to FALSE.
+ *   - pad: A string to use for padding short lines to 'max' characters.  If
+ *     more than one character, only the last will be repeated.
+ *   - break: The line break sequence to insert.  The default is one of the
+ *     following:
+ *     - "\r\n": Windows, when $text does not contain a space character.
+ *     - "\n": Non-Windows, when $text does not contain a space character.
+ *     - " \r\n": On Windows, when $text contains at least one space.
+ *     - " \n": Non-Windows, when $text contains at least one space.
+ *
+ * @see drupal_mail()
  */
-function drupal_wrap_mail($text, $indent = '') {
-  // Convert CRLF into LF.
-  $text = str_replace("\r", '', $text);
-  // See if soft-wrapping is allowed.
-  $clean_indent = _drupal_html_to_text_clean($indent);
-  $soft = strpos($clean_indent, ' ') === FALSE;
-  // Check if the string has line breaks.
-  if (strpos($text, "\n") !== FALSE) {
-    // Remove trailing spaces to make existing breaks hard.
-    $text = preg_replace('/ +\n/m', "\n", $text);
-    // Wrap each line at the needed width.
-    $lines = explode("\n", $text);
-    array_walk($lines, '_drupal_wrap_mail_line', array('soft' => $soft, 'length' => strlen($indent)));
-    $text = implode("\n", $lines);
+function drupal_wrap_mail($text, array $options = array()) {
+  static $defaults;
+  if (!isset($defaults)) {
+    $defaults = array(
+      'indent' => '',
+      'pad' => '',
+      'pad_repeat' => '',
+      'max' => 80,
+      'stuff' => TRUE,
+      'hard' => FALSE,
+      'eol' => variable_get('mail_line_endings', MAIL_LINE_ENDINGS),
+    );
   }
-  else {
-    // Wrap this line.
-    _drupal_wrap_mail_line($text, 0, array('soft' => $soft, 'length' => strlen($indent)));
+  $options += $defaults;
+  if (!isset($options['break'])) {
+    // Allow soft-wrap spaces only when $text contains at least one space.
+    $options['break'] = (strpos($text, ' ') === FALSE ? '' : ' ') . $defaults['eol'];
   }
-  // Empty lines with nothing but spaces.
-  $text = preg_replace('/^ +\n/m', "\n", $text);
-  // Space-stuff special lines.
-  $text = preg_replace('/^(>| |From)/m', ' $1', $text);
-  // Apply indentation. We only include non-'>' indentation on the first line.
-  $text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent));
-
-  return $text;
+  if ($options['pad']) {
+    $options['pad_repeat'] = drupal_substr($options['pad'], -1, 1);
+  }
+  $options['length'] = drupal_strlen($options['indent'] . $options['break']);
+  // The 'clean' indent is applied to all lines after the first one.
+  $options['clean'] = _drupal_html_to_text_clean($options['indent']);
+  // Replace line breaks with platform-dependent text.
+  $text = preg_replace('/\r?\n/', $defaults['eol'], $text);
+  // Wrap lines according to RFC 3676.
+  $lines = explode($defaults['eol'], $text);
+  array_walk($lines, '_drupal_wrap_mail_line', $options);
+  // Expand the lines array on newly-inserted line breaks.
+  $lines = explode($defaults['eol'], implode($defaults['eol'], $lines));
+  // Apply indentation, space-stuffing, and padding.
+  array_walk($lines, '_drupal_indent_mail_line', $options);
+  return implode($defaults['eol'], $lines);
 }
 
 /**
@@ -347,240 +370,603 @@ function drupal_wrap_mail($text, $indent = '') {
  * The output will be suitable for use as 'format=flowed; delsp=yes' text
  * (RFC 3676) and can be passed directly to drupal_mail() for sending.
  *
- * We deliberately use LF rather than CRLF, see drupal_mail().
+ * We deliberately use variable_get('mail_line_endings', MAIL_LINE_ENDINGS)
+ * rather than "\r\n".
  *
  * This function provides suitable alternatives for the following tags:
- * <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt>
- * <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
+ *
+ * <a> <address> <b> <blockquote> <br /> <caption> <cite> <dd> <div> <dl> <dt>
+ * <em> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <ol> <p> <pre> <strong>
+ * <table> <tbody> <td> <tfoot> <thead> <tr> <u> <ul>
+ *
+ * The following tag attributes are supported:
+ * - <a href=...>: Hyperlink destination urls.
+ * - <li value=...>: Ordered list item numbers.
+ * - <ol start=...>: Ordered list start number.
  *
  * @param $string
  *   The string to be transformed.
- * @param $allowed_tags (optional)
- *   If supplied, a list of tags that will be transformed. If omitted, all
- *   all supported tags are transformed.
+ * @param $allowed_tags
+ *   (optional) If supplied, a list of tags that will be transformed. If
+ *   omitted, all supported tags are transformed.
  *
  * @return
  *   The transformed string.
+ *
+ * @see drupal_mail()
  */
 function drupal_html_to_text($string, $allowed_tags = NULL) {
+  $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
   // Cache list of supported tags.
   static $supported_tags;
-  if (empty($supported_tags)) {
-    $supported_tags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr');
+  if (!isset($supported_tags)) {
+    $supported_tags = array(
+      'a', 'address', 'b', 'blockquote', 'br', 'cite', 'dd', 'div', 'dl',
+      'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'li',
+      'ol', 'p', 'pre', 'strong', 'table', 'td', 'tr', 'u', 'ul',
+    );
   }
 
   // Make sure only supported tags are kept.
   $allowed_tags = isset($allowed_tags) ? array_intersect($supported_tags, $allowed_tags) : $supported_tags;
 
-  // Make sure tags, entities and attributes are well-formed and properly nested.
-  $string = _filter_htmlcorrector(filter_xss($string, $allowed_tags));
-
-  // Apply inline styles.
-  $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
-  $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);
-
-  // Replace inline <a> tags with the text of link and a footnote.
-  // 'See <a href="http://drupal.org">the Drupal site</a>' becomes
-  // 'See the Drupal site [1]' with the URL included as a footnote.
-  _drupal_html_to_mail_urls(NULL, TRUE);
-  $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
-  $string = preg_replace_callback($pattern, '_drupal_html_to_mail_urls', $string);
-  $urls = _drupal_html_to_mail_urls();
-  $footnotes = '';
-  if (count($urls)) {
-    $footnotes .= "\n";
-    for ($i = 0, $max = count($urls); $i < $max; $i++) {
-      $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
+  // Parse $string into a DOM tree.
+  $dom = filter_dom_load($string);
+  $notes = array();
+  $text = _drupal_html_to_text($dom->documentElement, $allowed_tags, $notes);
+  // Hard-wrap at 1000 characters and space-stuff.
+  $text = drupal_wrap_mail($text, array('max' => 1000, 'hard' => TRUE));
+  // Change non-breaking spaces back to regular spaces, and trim line breaks.
+  // chr(160) is the non-breaking space character.
+  $text = trim(str_replace(chr(160), ' ', $text), $eol);
+  // Add footnotes;
+  if ($notes) {
+    // Add a blank line before the footnote list.
+    $text .= $eol;
+    foreach ($notes as $url => $note) {
+      $text .= $eol . '[' . $note . '] ' . $url;
     }
   }
-
-  // Split tags from text.
-  $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
-  // Note: PHP ensures the array consists of alternating delimiters and literals
-  // and begins and ends with a literal (inserting $null as required).
-
-  $tag = FALSE; // Odd/even counter (tag or no tag)
-  $casing = NULL; // Case conversion function
-  $output = '';
-  $indent = array(); // All current indentation string chunks
-  $lists = array(); // Array of counters for opened lists
-  foreach ($split as $value) {
-    $chunk = NULL; // Holds a string ready to be formatted and output.
-
-    // Process HTML tags (but don't output any literally).
-    if ($tag) {
-      list($tagname) = explode(' ', strtolower($value), 2);
-      switch ($tagname) {
-        // List counters
-        case 'ul':
-          array_unshift($lists, '*');
-          break;
-        case 'ol':
-          array_unshift($lists, 1);
-          break;
-        case '/ul':
-        case '/ol':
-          array_shift($lists);
-          $chunk = ''; // Ensure blank new-line.
-          break;
-
-        // Quotation/list markers, non-fancy headers
-        case 'blockquote':
-          // Format=flowed indentation cannot be mixed with lists.
-          $indent[] = count($lists) ? ' "' : '>';
-          break;
-        case 'li':
-          $indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
-          break;
-        case 'dd':
-          $indent[] = '    ';
-          break;
-        case 'h3':
-          $indent[] = '.... ';
-          break;
-        case 'h4':
-          $indent[] = '.. ';
-          break;
-        case '/blockquote':
-          if (count($lists)) {
-            // Append closing quote for inline quotes (immediately).
-            $output = rtrim($output, "> \n") . "\"\n";
-            $chunk = ''; // Ensure blank new-line.
-          }
-          // Fall-through
-        case '/li':
-        case '/dd':
-          array_pop($indent);
-          break;
-        case '/h3':
-        case '/h4':
-          array_pop($indent);
-        case '/h5':
-        case '/h6':
-          $chunk = ''; // Ensure blank new-line.
-          break;
-
-        // Fancy headers
-        case 'h1':
-          $indent[] = '======== ';
-          $casing = 'drupal_strtoupper';
-          break;
-        case 'h2':
-          $indent[] = '-------- ';
-          $casing = 'drupal_strtoupper';
-          break;
-        case '/h1':
-        case '/h2':
-          $casing = NULL;
-          // Pad the line with dashes.
-          $output = _drupal_html_to_text_pad($output, ($tagname == '/h1') ? '=' : '-', ' ');
-          array_pop($indent);
-          $chunk = ''; // Ensure blank new-line.
-          break;
-
-        // Horizontal rulers
-        case 'hr':
-          // Insert immediately.
-          $output .= drupal_wrap_mail('', implode('', $indent)) . "\n";
-          $output = _drupal_html_to_text_pad($output, '-');
-          break;
-
-        // Paragraphs and definition lists
-        case '/p':
-        case '/dl':
-          $chunk = ''; // Ensure blank new-line.
-          break;
-      }
+  return $text;
+}
+
+/**
+ * Helper function for drupal_html_to_text().
+ *
+ * Recursively converts $node to text, wrapping and indenting as necessary.
+ *
+ * @param $node
+ *   The source DOMNode.
+ * @param $allowed_tags
+ *   A list of tags that will be transformed.
+ * @param array &$notes
+ *   A writeable array of footnote reference numbers, keyed by their
+ *   respective hyperlink destination urls.
+ * @param $line_length
+ *   The maximum length of a line, for wrapping.  Defaults to 80 characters.
+ * @param array $parents
+ *   The list of ancestor tags, from nearest to most distant.  Defaults to an
+ *   empty array().
+ * @param &$count
+ *   The number to use for the next list item within an ordered list.  Defaults
+ *   to 1.
+ */
+function _drupal_html_to_text(DOMNode $node, array $allowed_tags, array &$notes, $line_length = 80, array $parents = array(), &$count = NULL) {
+  if (!isset($count)) {
+    $count = 1;
+  }
+  $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+  if ($node->nodeType === XML_TEXT_NODE) {
+    // For text nodes, we just copy the text content.
+    $text = $node->textContent;
+    if (in_array('pre', $parents)) {
+      // Within <pre> tags, all spaces become non-breaking.
+      // chr(160) is the non-breaking space character.
+      $text = str_replace(' ', chr(160), $text);
     }
-    // Process blocks of text.
     else {
-      // Convert inline HTML text to plain text; not removing line-breaks or
-      // white-space, since that breaks newlines when sanitizing plain-text.
-      $value = trim(decode_entities($value));
-      if (drupal_strlen($value)) {
-        $chunk = $value;
-      }
+      // Outside <pre> tags, collapse whitespace.
+      $text = preg_replace('/[[:space:]]+/', ' ', $text);
     }
+    return $text;
+  }
+  // Non-text node.
+  $tag = '';
+  $text = '';
+  $child_text = '';
+  $child_count = 1;
+  $indent = '';
+  $prefix = '';
+  $suffix = '';
+  $pad = '';
+  if (isset($node->tagName) && in_array($node->tagName, $allowed_tags)) {
+    $tag = $node->tagName;
+    switch ($tag) {
+      // Turn links with valid hrefs into footnotes.
+      case 'a':
+        $test = !empty($node->attributes);
+        $test = $test && ($href = $node->attributes->getNamedItem('href'));
+        $test = $test && ($url = url(ltrim($href->nodeValue, '/'), array('absolute' => TRUE)));
+        $test = $test && valid_url($url);
+        if ($test) {
+          // Only add links that have not already been added.
+          if (isset($notes[$url])) {
+            $note = $notes[$url];
+          }
+          else {
+            $note = count($notes) + 1;
+            $notes[$url] = $note;
+          }
+          $suffix = ' [' . $note . ']';
+        }
+        break;
 
-    // See if there is something waiting to be output.
-    if (isset($chunk)) {
-      // Apply any necessary case conversion.
-      if (isset($casing)) {
-        $chunk = $casing($chunk);
-      }
-      // Format it and apply the current indentation.
-      $output .= drupal_wrap_mail($chunk, implode('', $indent));
-      // Remove non-quotation markers from indentation.
-      $indent = array_map('_drupal_html_to_text_clean', $indent);
-    }
+      // Generic block-level tags.
+      case 'address':
+      case 'caption':
+      case 'div':
+      case 'p':
+      case 'pre':
+        $text = $eol;
+        $suffix = $eol;
+        break;
+
+      // Forced line break.
+      case 'br':
+        $text = $eol;
+        break;
+
+      // Boldface by wrapping with "*" characters.
+      case 'b':
+      case 'strong':
+        $prefix = '*';
+        $suffix = '*';
+        break;
+
+      // Italicize by wrapping with "/" characters.
+      case 'cite':
+      case 'em':
+      case 'i':
+        $prefix = '/';
+        $suffix = '/';
+        break;
+
+      // Underline by wrapping with "_" characters.
+      case 'u':
+        $prefix = '_';
+        $suffix = '_';
+        break;
+
+      // Blockquotes are indented by "> " at each level.
+      case 'blockquote':
+        $text = $eol;
+        // chr(160) is the non-breaking space character.
+        $indent = '>' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Dictionary definitions are indented by four spaces.
+      case 'dd':
+        // chr(160) is the non-breaking space character.
+        $indent = chr(160) . chr(160) . chr(160) . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Dictionary list.
+      case 'dl':
+        // Start on a newline except inside other lists.
+        if (!in_array('li', $parents)) {
+          $text = $eol;
+        }
+        $suffix = $eol;
+        break;
+
+      // Dictionary term.
+      case 'dt':
+        $suffix = $eol;
+        break;
+
+      // Header level 1 is prefixed by eight "=" characters.
+      case 'h1':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '========' . chr(160);
+        $pad = chr(160) . '=';
+        $suffix = $eol;
+        break;
+
+      // Header level 2 is prefixed by six "-" characters.
+      case 'h2':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '------' . chr(160);
+        $pad = chr(160) . '-';
+        $suffix = $eol;
+        break;
+
+      // Header level 3 is prefixed by four "." characters and a space.
+      case 'h3':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '....' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Header level 4 is prefixed by three "." characters and a space.
+      case 'h4':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '...' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Header level 5 is prefixed by two "." character and a space.
+      case 'h5':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '..' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Header level 6 is prefixed by one "." character and a space.
+      case 'h6':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '.' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Horizontal rulers become a line of "-" characters.
+      case 'hr':
+        $text = $eol;
+        $child_text = '-';
+        $pad = '-';
+        $suffix = $eol;
+        break;
+
+      // List items are treated differently depending on the parent tag.
+      case 'li':
+        // Ordered list item.
+        if (reset($parents) === 'ol') {
+          // Check the value attribute.
+          $test = !empty($node->attributes);
+          $test = $test && ($value = $node->attributes->getNamedItem('value'));
+          if ($test) {
+            $count = $value->nodeValue;
+          }
+          // chr(160) is the non-breaking space character.
+          $indent = ($count < 10 ? chr(160) : '') . chr(160) . "$count)" . chr(160);
+          $count++;
+        }
+        // Unordered list item.
+        else {
+          // chr(160) is the non-breaking space character.
+          $indent = chr(160) . '*' . chr(160);
+        }
+        $suffix = $eol;
+        break;
+
+      // Ordered lists.
+      case 'ol':
+        // Start on a newline except inside other lists.
+        if (!in_array('li', $parents)) {
+          $text = $eol;
+        }
+        // Check the start attribute.
+        $test = !empty($node->attributes);
+        $test = $test && ($value = $node->attributes->getNamedItem('start'));
+        if ($test) {
+          $child_count = $value->nodeValue;
+        }
+        break;
+
+      // Tables require special handling.
+      case 'table':
+        return _drupal_html_to_text_table($node, $allowed_tags, $notes, $line_length);
+
+      // Separate adjacent table cells by two non-breaking spaces.
+      case 'td':
+        if (!empty($node->nextSibling)) {
+          // chr(160) is the non-breaking space character.
+          $suffix = chr(160) . chr(160);
+        }
+        break;
+
+      // End each table row with a newline.
+      case 'tr':
+        $suffix = $eol;
+        break;
 
-    $tag = !$tag;
+      // Unordered lists.
+      case 'ul':
+        // Start on a newline except inside other lists.
+        if (!in_array('li', $parents)) {
+          $text = $eol;
+        }
+        break;
+
+    default:
+        // Coder review complains if there is no default case.
+        break;
+    }
+    // Only add allowed tags to the $parents array.
+    array_unshift($parents, $tag);
+  }
+  // Copy each child node to output.
+  if ($node->hasChildNodes()) {
+    foreach ($node->childNodes as $child) {
+      $child_text .= _drupal_html_to_text($child, $allowed_tags, $notes, $line_length - drupal_strlen($indent), $parents, $child_count);
+    }
+  }
+  // We only add prefix and suffix if the child nodes were non-empty.
+  if ($child_text > '') {
+    // We capitalize the contents of h1 and h2 tags.
+    if ($tag === 'h1' || $tag === 'h2') {
+      $child_text = drupal_strtoupper($child_text);
+    }
+    // Don't add a newline to an existing newline.
+    if ($suffix === $eol && drupal_substr($child_text, - drupal_strlen($eol)) === $eol) {
+      $suffix = '';
+    }
+    // Trim spaces around newlines except with <pre> or inline tags.
+    if (!in_array($tag, array('a', 'b', 'cite', 'em', 'i', 'pre', 'strong', 'u'))) {
+      $child_text = preg_replace('/ *\n */', "\n", $child_text);
+    }
+    // Soft-wrap at effective line length, but don't space-stuff.
+    $child_text = drupal_wrap_mail(
+      $prefix . $child_text,
+      array(
+        'break' => chr(160) . $eol,
+        'indent' => $indent,
+        'max' => $line_length,
+        'pad' => $pad,
+        'stuff' => FALSE,
+      )
+    ) . $suffix;
+    if ($tag === 'pre') {
+      // Perform RFC-3676 soft-wrapping.
+      $child_text = str_replace(chr(160), ' ', $child_text);
+      $child_text = drupal_wrap_mail(
+        $child_text,
+        array('max' => $line_length, 'stuff' => FALSE)
+      );
+      $child_text = str_replace(' ', chr(160), $child_text);
+    }
+    $text .= $child_text;
   }
+  return $text;
+}
+
+/**
+ * Helper function for _drupal_html_to_text().
+ *
+ * Renders a <table> DOM Node into plain text.  Attributes such as rowspan,
+ * colspan, padding, border, etc. are ignored.
+ *
+ * @param DOMNode $node
+ *   The DOMNode corresponding to the <table> tag and its contents.
+ * @param $allowed_tags
+ *   The list of allowed tags passed to _drupal_html_to_text().
+ * @param array &$notes
+ *   A writeable array of footnote reference numbers, keyed by their
+ *   respective hyperlink destination urls.
+ * @param $line_length
+ *   The desired maximum line length, after word-wrapping each table cell.
+ *
+ * @return
+ *   A plain text representation of the table.
+ *
+ * @see _drupal_html_to_text()
+ */
+function _drupal_html_to_text_table(DOMNode $node, $allowed_tags = NULL, array &$notes = array(), $line_length = 80) {
+  $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+  $header = array();
+  $footer = array();
+  $body = array();
+  $text = '';
+  $text = '';
+  $current = $node;
+  while (TRUE) {
+    if (isset($current->tagName)) {
+      switch ($current->tagName) {
+        case 'caption':  // The table caption is added first.
+          $text = _drupal_html_to_text($caption, $allowed_tags, $notes, $line_length);
+          break;
+
+        case 'tr':
+          switch ($current->parentNode->tagName) {
+            case 'thead':
+              $header[] = $current;
+              break;
+
+            case 'tfoot':
+              $footer[] = $current;
+              break;
 
-  return $output . $footnotes;
+            default: // Either 'tbody' or 'table'
+              $body[] = $current;
+              break;
+          }
+          break;
+
+        default:
+          if ($current->hasChildNodes()) {
+            $current = $current->firstChild;
+            continue 2;
+          }
+      }
+    }
+    do {
+      if ($current->nextSibling) {
+        $current = $current->nextSibling;
+        continue 2;
+      }
+      $current = $current->parentNode;
+    } while (!$current->isSameNode($node));
+    break;
+  }
+  // Merge the thead, tbody, and tfoot sections together.
+  if ($rows = array_merge($header, $body, $footer)) {
+    // First just count the number of columns.
+    $columns = 0;
+    foreach ($rows as $row) {
+      if ($row->childNodes) {
+        $columns = max($columns, $row->childNodes->length);
+      }
+    }
+    if ($columns) {
+      // If any columns were found, calculate each column height and width.
+      $width = $line_length - $columns + 1;
+      $table_width = 1000;
+      $table = array();
+      // Find an appropriate width to wrap each cell.
+      do {
+        // Start with zero for each width and height, and and empty cell array.
+        $widths = array_fill(0, $columns, 0);
+        $heights = array_fill(0, count($rows), 0);
+        $cells = array();
+        $breaks = array();
+        foreach ($rows as $i => $row) {
+          foreach ($row->childNodes as $j => $cell) {
+            $z = 0;
+            // Render the cell contents.
+            $cells[$i][$j] = _drupal_html_to_text($cell, $allowed_tags, $notes, $width);
+            // Split the cell into lines.
+            $lines = explode($eol, $cells[$i][$j]);
+            // The row height is the maximum number of lines among all the
+            // cells in that row.
+            $heights[$i] = max($heights[$i], count($lines));
+            foreach ($lines as $line) {
+              $this_width = drupal_strlen($line);
+              // The column width is the maximum line width among all the
+              // lines in that column.
+              if ($this_width > $widths[$j]) {
+                $widths[$j] = $this_width;
+                // If the longest line in a column contains at least one
+                // space character, then the table can be made narrower.
+                $breaks[$j] = strpos(' ', $line) !== FALSE;
+              }
+            }
+          }
+        }
+        // Calculate the total table width;
+        $this_width = array_sum($widths) + $columns + 1;
+        if ($this_width < $table_width) {
+          $table_width = $this_width;
+          // Save this configuration in case it turns out to be the best one.
+          $table = array($widths, $heights, $cells);
+          if ($table_width <= $line_length) {
+            // Table width is within limits; we're done.
+            break;
+          }
+        }
+        elseif (!in_array(TRUE, $breaks)) {
+          // If there are no more break points, then the table is already as
+          // narrow as it can get, so we're done.
+          break;
+        }
+        // Try wrapping each column at a smaller width.
+        $width--;
+      } while ($width > 1);
+      // Retrieve the best configuration that was found.
+      list ($widths, $heights, $cells) = $table;
+      // Pad each cell to column width and line height.
+      foreach ($cells as $i => $row) {
+        foreach ($row as $j => $cell) {
+          // Pad each cell to the maximum number of lines in that row.
+          $lines = array_pad(explode($eol, $cell), $heights[$i], '');
+          foreach ($lines as $k => $line) {
+            // Pad each line to the maximum width in that column.
+            $count = $widths[$j] - drupal_strlen($line);
+            if ($count > 0) {
+              $lines[$k] .= str_repeat(' ', $count);
+            }
+          }
+          $cells[$i][$j] = $lines;
+        }
+      }
+      // Generate the row separator line.
+      $row_line = '+';
+      foreach ($widths as $width) {
+        $row_line .= str_repeat('-', $width) . '+';
+      }
+      $row_line .= $eol;
+      // Final output starts with a row separator.
+      $text .= $row_line;
+      foreach ($cells as $i => $row) {
+        // For each row, iterate first by lines within the row.
+        for ($k = 0; $k < $heights[$i]; $k++) {
+          // Add a vertical-bar at the beginning of each row line.
+          $text .= '|';
+          // Within each row line, iterate by cells within that line.
+          foreach ($row as $j => $lines) {
+            // Add a vertical bar at the end of each cell line.
+            $text .= $lines[$k] . '|';
+          }
+          // Add a line break at the end of each row line.
+          $text .= $eol;
+        }
+        // Add a row separator at the end of each row.
+        $text .= $row_line;
+      }
+    }
+  }
+  return $text;
 }
 
 /**
  * Helper function for array_walk in drupal_wrap_mail().
  *
- * Wraps words on a single line.
+ * Inserts $values['break'] sequences to break up $line into parts of no more
+ * than $values['max'] - $values['length'] characters.  Only breaks at space
+ * characters, unless $values['hard'] is TRUE.
  */
 function _drupal_wrap_mail_line(&$line, $key, $values) {
-  // Use soft-breaks only for purely quoted or unindented text.
-  $line = wordwrap($line, 77 - $values['length'], $values['soft'] ? "  \n" : "\n");
-  // Break really long words at the maximum width allowed.
-  $line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n");
+  $line = wordwrap(rtrim($line), $values['max'] - $values['length'], $values['break'], $values['hard']);
 }
 
 /**
- * Helper function for drupal_html_to_text().
+ * Helper function for array_walk in drupal_wrap_mail().
  *
- * Keeps track of URLs and replaces them with placeholder tokens.
+ * If $values['pad'] is non-empty, $values['indent'] will be added at the start
+ * of each line, and $values['pad'] at the end, repeating the last character of
+ * $values['pad'] until the line length equals $values['max'].
+ *
+ * If $values['pad'] is empty, $values['indent'] will be added at the start of
+ * the first line, and $values['clean'] at the start of subsequent lines.
+ *
+ * If $values['stuff'] is true, then an extra space character will be added at
+ * the start of any line beginning with a space, a '>', or the word 'From'.
+ *
+ * @see http://www.ietf.org/rfc/rfc3676.txt
  */
-function _drupal_html_to_mail_urls($match = NULL, $reset = FALSE) {
-  global $base_url, $base_path;
-  static $urls = array(), $regexp;
-
-  if ($reset) {
-    // Reset internal URL list.
-    $urls = array();
+function _drupal_indent_mail_line(&$line, $key, $values) {
+  if ($line == '') {
+    return;
+  }
+  if ($values['pad']) {
+    $line = $values['indent'] . $line;
+    $count = $values['max'] - drupal_strlen($line) - drupal_strlen($values['pad']);
+    if ($count >= 0) {
+      $line .= $values['pad'] . str_repeat($values['pad_repeat'], $count);
+    }
   }
   else {
-    if (empty($regexp)) {
-      $regexp = '@^' . preg_quote($base_path, '@') . '@';
-    }
-    if ($match) {
-      list(, , $url, $label) = $match;
-      // Ensure all URLs are absolute.
-      $urls[] = strpos($url, '://') ? $url : preg_replace($regexp, $base_url . '/', $url);
-      return $label . ' [' . count($urls) . ']';
-    }
+    $line = $values[$key === 0 ? 'indent' : 'clean'] . $line;
+  }
+  if ($values['stuff']) {
+    // chr(160) is the non-breaking space character.
+    $line = preg_replace('/^(' . chr(160) . '| |>|From)/', ' $1', $line);
   }
-  return $urls;
 }
 
 /**
  * Helper function for drupal_wrap_mail() and drupal_html_to_text().
  *
- * Replace all non-quotation markers from a given piece of indentation with spaces.
+ * Replace all non-quotation markers from a given piece of indentation with
+ * non-breaking space characters.
  */
 function _drupal_html_to_text_clean($indent) {
-  return preg_replace('/[^>]/', ' ', $indent);
-}
-
-/**
- * Helper function for drupal_html_to_text().
- *
- * Pad the last line with the given character.
- */
-function _drupal_html_to_text_pad($text, $pad, $prefix = '') {
-  // Remove last line break.
-  $text = substr($text, 0, -1);
-  // Calculate needed padding space and add it.
-  if (($p = strrpos($text, "\n")) === FALSE) {
-    $p = -1;
-  }
-  $n = max(0, 79 - (strlen($text) - $p) - strlen($prefix));
-  // Add prefix and padding, and restore linebreak.
-  return $text . $prefix . str_repeat($pad, $n) . "\n";
+  // chr(160) is the non-breaking space character.
+  return preg_replace('/[^>]/', chr(160), $indent);
 }
diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test
index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..1e012b8100038d51e0185f556426fbba2c01b8fa 100644
--- a/modules/simpletest/tests/mail.test
+++ b/modules/simpletest/tests/mail.test
@@ -1,6 +1,7 @@
 <?php
 
 /**
+ * @file
  * Test the Drupal mailing system.
  */
 class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
@@ -63,3 +64,390 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
   }
 }
 
+/**
+ * Unit tests for drupal_html_to_text().
+ */
+class DrupalHtmlToTextTestCase extends DrupalUnitTestCase {
+  public static function getInfo() {
+    return array(
+      'name'  => 'HTML to text conversion',
+      'description' => 'Tests drupal_html_to_text().',
+      'group' => 'Mail',
+    );
+  }
+
+  /**
+   * Converts a string to its PHP source equivalent for display in test messages.
+   *
+   * @param $text
+   *   The text string to convert.
+   *
+   * @return
+   *   An HTML representation of the text string that, when displayed in a
+   *   browser, represents the PHP source code equivalent of $text.
+   */
+  function stringToHtml($text) {
+    return '"' .
+      str_replace(
+        array("\n", ' '),
+        array('\n', '&nbsp;'),
+        check_plain($text)
+      ) . '"';
+  }
+
+  /**
+   * Helper function for testing drupal_html_to_text().
+   *
+   * @param $html
+   *   The source HTML string to be converted.
+   * @param $text
+   *   The expected result of converting $html to text.
+   * @param $message
+   *   A text message to display in the assertion message.
+   * @param $allowed_tags
+   *   (optional) An array of allowed tags, or NULL to default to the full
+   *   set of tags supported by drupal_html_to_text().
+   */
+  function assertHtmlToText($html, $text, $message, $allowed_tags = NULL) {
+    preg_match_all('/<([a-z1-6]+)/', drupal_strtolower($html), $matches);
+    $tested_tags = implode(', ', array_unique($matches[1]));
+    $message .= ' (' . $tested_tags . ')';
+    $result = drupal_html_to_text($html, $allowed_tags);
+    $pass = $this->assertEqual($result, $text, check_plain($message));
+    if (!$pass) {
+      $this->verbose('html = <pre>' . $this->stringToHtml($html)
+        . '</pre><br />' . 'result = <pre>' . $this->stringToHtml($result)
+        . '</pre><br />' . 'expected = <pre>' . $this->stringToHtml($text)
+        . '</pre>');
+    }
+  }
+
+  /**
+   * Test all supported tags of drupal_html_to_text().
+   */
+  function testTags() {
+    $tests = array(
+      '<a href = "http://drupal.org">Drupal.org</a>' => "Drupal.org [1]\n\n[1] http://drupal.org",
+      '<a href = "/">Homepage</a>' => "Homepage [1]\n\n[1] " . url('', array('absolute' => TRUE)),
+      '<address>Drupal</address>' => "Drupal",
+      '<address>Drupal</address><address>Drupal</address>' => "Drupal\n\nDrupal",
+      '<b>Drupal</b>' => "*Drupal*",
+      '<blockquote>Drupal</blockquote>' => " > Drupal",
+      '<blockquote>Drupal</blockquote><blockquote>Drupal</blockquote>' => " > Drupal\n\n > Drupal",
+      '<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\n\nDrupal",
+      '<br/>Drupal<br/>Drupal<br/><br/>Drupal' => "Drupal\nDrupal\n\nDrupal",
+      '<br/>Drupal<br/>Drupal<br/><br/>Drupal<p>Drupal</p>' => "Drupal\nDrupal\n\nDrupal\nDrupal",
+      '<div>Drupal</div>' => "Drupal",
+      '<div>Drupal</div><div>Drupal</div>' => "Drupal\n\nDrupal",
+      '<em>Drupal</em>' => "/Drupal/",
+      '<h1>Drupal</h1>' => "======== DRUPAL " . str_repeat('=', 64),
+      '<h1>Drupal</h1><p>Drupal</p>' => "======== DRUPAL " . str_repeat('=', 64) . "\n\nDrupal",
+      '<h2>Drupal</h2>' => "------ DRUPAL " . str_repeat('-', 66),
+      '<h2>Drupal</h2><p>Drupal</p>' => "------ DRUPAL " . str_repeat('-', 66) . "\n\nDrupal",
+      '<h3>Drupal</h3>' => ".... Drupal",
+      '<h3>Drupal</h3><p>Drupal</p>' => ".... Drupal\n\nDrupal",
+      '<h4>Drupal</h4>' => "... Drupal",
+      '<h4>Drupal</h4><p>Drupal</p>' => "... Drupal\n\nDrupal",
+      '<h5>Drupal</h5>' => ".. Drupal",
+      '<h5>Drupal</h5><p>Drupal</p>' => ".. Drupal\n\nDrupal",
+      '<h6>Drupal</h6>' => ". Drupal",
+      '<h6>Drupal</h6><p>Drupal</p>' => ". Drupal\n\nDrupal",
+      '<hr />Drupal<hr />' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80),
+      '<hr/>Drupal<hr/>' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80),
+      '<hr/>Drupal<hr/><p>Drupal</p>' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80) . "\n\nDrupal",
+      '<i>Drupal</i>' => "/Drupal/",
+      '<p>Drupal</p>' => "Drupal",
+      '<p>Drupal</p><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<pre>Drupal</pre>' => "Drupal",
+      '<pre>Drupal</pre>Drupal' => "Drupal\nDrupal",
+      '<pre>Drupal</pre><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<strong>Drupal</strong>' => "*Drupal*",
+      '<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "+--------+------+\n|Drupal  |Drupal|\n+--------+------+\n|Drupal  |Drupal|\n+--------+------+",
+      '<table><tr><td>Drupal</td></tr></table><p>Drupal</p>' => "+------+\n|Drupal|\n+------+\n\nDrupal",
+      '<u>Drupal</u>' => "_Drupal_",
+      '<ul><li>Drupal</li></ul>' => "  * Drupal",
+      '<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => "  * Drupal /Drupal/ Drupal",
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => "  * Drupal\n  *   1) Drupal\n      2) Drupal",
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li></ol></li><li>Drupal</li></ul>' => "  * Drupal\n  *   1) Drupal\n  * Drupal",
+      '<ul><li>Drupal</li><li>Drupal</li></ul>' => "  * Drupal\n  * Drupal",
+      '<ul><li>Drupal</li></ul><p>Drupal</p>' => "  * Drupal\n\nDrupal",
+      '<ol><li>Drupal</li></ol>' => "   1) Drupal",
+      '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => "   1) Drupal\n   2)  * Drupal\n       * Drupal",
+      '<ol><li>Drupal</li><li>Drupal</li></ol>' => "   1) Drupal\n   2) Drupal",
+      '<ol>Drupal</ol>' => "Drupal",
+      '<ol><li>Drupal</li></ol><p>Drupal</p>' => "   1) Drupal\n\nDrupal",
+      '<dl><dt>Drupal</dt></dl>' => "Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n     Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n     Drupal\nDrupal\n     Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl><p>Drupal</p>' => "Drupal\n     Drupal\n\nDrupal",
+      '<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n     Drupal",
+      '<dl><dt>Drupal</dt></dl><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<ul><li>Drupal</li><li><dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl></li><li>Drupal</li></ul>' => "  * Drupal\n  * Drupal\n        Drupal\n    Drupal\n        Drupal\n  * Drupal",
+      // Tests malformed HTML tags.
+      '<br>Drupal<br>Drupal' => "Drupal\nDrupal",
+      '<hr>Drupal<hr>Drupal' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80) . "\nDrupal",
+      '<ol><li>Drupal<li>Drupal</ol>' => "   1) Drupal\n   2) Drupal",
+      '<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => "  * Drupal /Drupal/ Drupal",
+      '<ul><li>Drupal<li>Drupal</ol>' => "  * Drupal\n  * Drupal",
+      '<ul><li>Drupal<li>Drupal</ul>' => "  * Drupal\n  * Drupal",
+      '<ul>Drupal</ul>' => "Drupal",
+      'Drupal</ul></ol></dl><li>Drupal' => "Drupal * Drupal",
+      '<dl>Drupal</dl>' => "Drupal",
+      '<dl>Drupal</dl><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<dt>Drupal</dt>' => "Drupal",
+      // Tests some unsupported HTML tags.
+      '<html>Drupal</html>' => "Drupal",
+      '<script type="text/javascript">Drupal</script>' => "",
+    );
+
+    foreach ($tests as $html => $text) {
+      $this->assertHtmlToText($html, $text, 'Supported tags');
+    }
+  }
+
+  /**
+   * Test $allowed_tags argument of drupal_html_to_text().
+   */
+  function testDrupalHtmlToTextArgs() {
+    // The second parameter of drupal_html_to_text() overrules the allowed tags.
+    $this->assertHtmlToText(
+      'Drupal <b>Drupal</b> Drupal',
+      'Drupal *Drupal* Drupal',
+      'Allowed <b> tag found',
+      array('b')
+    );
+    $this->assertHtmlToText(
+      'Drupal <h1>Drupal</h1> Drupal',
+      'Drupal Drupal Drupal',
+      'Disallowed <h1> tag not found',
+      array('b')
+    );
+
+    $this->assertHtmlToText(
+      'Drupal <p><em><b>Drupal</b></em><p> Drupal',
+      'Drupal Drupal Drupal',
+      'Disallowed <p>, <em>, and <b> tags not found',
+      array('a', 'br', 'h1')
+    );
+
+    $this->assertHtmlToText(
+      '<html><body>Drupal</body></html>',
+      'Drupal',
+      'Unsupported <html> and <body> tags not found',
+      array('html', 'body')
+    );
+  }
+
+  /**
+   * Test that whitespace is collapsed, except within <pre> tags.
+   */
+  function testDrupalHtmltoTextCollapsesWhitespace() {
+    $input = "<pre>Drupal  Drupal\n\nDrupal<pre>Drupal  Drupal\n\nDrupal</pre>Drupal  Drupal\n\nDrupal</pre>";
+    $collapsed = "Drupal Drupal DrupalDrupal Drupal DrupalDrupal Drupal Drupal";
+    $preserved = "Drupal  Drupal\n\nDrupal\nDrupal  Drupal\n\nDrupal\nDrupal  Drupal\n\nDrupal";
+    $this->assertHtmlToText(
+      $input,
+      $collapsed,
+      'Whitespace inside disallowed <pre> tags is collapsed',
+      array('p')
+    );
+    $this->assertHtmlToText(
+      $input,
+      $preserved,
+      'Whitespace inside allowed <pre> tags is preserved'
+    );
+  }
+
+  /**
+   * Test that text separated by block-level tags in HTML get separated by
+   * (at least) a newline in the plaintext version.
+   */
+  function testDrupalHtmlToTextBlockTagToNewline() {
+    $input = '[text]'
+      . '<address>[address]</address>'
+      . '<blockquote>[blockquote]</blockquote>'
+      . '<br />[br]'
+      . '<div>[div]</div>'
+      . '<dl><dt>[dl-dt]</dt>'
+      . '<dt>[dt]</dt>'
+      . '<dd>[dd]</dd>'
+      . '<dd>[dd-dl]</dd></dl>'
+      . '<h1>[h1]</h1>'
+      . '<h2>[h2]</h2>'
+      . '<h3>[h3]</h3>'
+      . '<h4>[h4]</h4>'
+      . '<h5>[h5]</h5>'
+      . '<h6>[h6]</h6>'
+      . '<hr />[hr]'
+      . '<ol><li>[ol-li]</li>'
+      . '<li>[li]</li>'
+      . '<li>[li-ol]</li></ol>'
+      . '<p>[p]</p>'
+      . '<pre>[pre]</pre>'
+      . '<table><thead><tr><td>[table-thead--tr-td]</td></tr></thead>'
+      . '<tbody><tr><td>[tbody-tr-td]</td></tr>'
+      . '<tr><td>[tr-td]</td></tr></tbody></table>'
+      . '<ul><li>[ul-li]</li>'
+      . '<li>[li-ul]</li></ul>'
+      . '[text]';
+    $output = drupal_html_to_text($input);
+    $pass = $this->assertFalse(
+      preg_match('/\][^\n]*\[/s', $output),
+      'Block-level HTML tags should force newlines'
+    );
+    if (!$pass) {
+      $this->verbose($this->stringToHtml($output));
+    }
+    $output_upper = drupal_strtoupper($output);
+    $upper_input = drupal_strtoupper($input);
+    $upper_output = drupal_html_to_text($upper_input);
+    $pass = $this->assertEqual(
+      $upper_output,
+      $output_upper,
+      'Tag recognition should be case-insensitive'
+    );
+    if (!$pass) {
+      $this->verbose(
+        $upper_output
+        . '<br />should  be equal to <br />'
+        . $output_upper
+      );
+    }
+  }
+
+  /**
+   * Test that headers are properly separated from surrounding text.
+   */
+  function testHeaderSeparation() {
+    $html = 'Drupal<h1>Drupal</h1>Drupal';
+    $text = "Drupal\n\n======== DRUPAL " . str_repeat('=', 64) . "\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Text before and after <h1> tag');
+    $html = '<p>Drupal</p><h1>Drupal</h1>Drupal';
+    $text = "Drupal\n\n\n======== DRUPAL " . str_repeat('=', 64) . "\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Paragraph before and text after <h1> tag');
+    $html = 'Drupal<h1>Drupal</h1><p>Drupal</p>';
+    $text = "Drupal\n\n======== DRUPAL " . str_repeat('=', 64) . "\n\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Text before and paragraph after <h1> tag');
+    $html = '<p>Drupal</p><h1>Drupal</h1><p>Drupal</p>';
+    $text = "Drupal\n\n\n======== DRUPAL " . str_repeat('=', 64) . "\n\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Paragraph before and after <h1> tag');
+  }
+
+  /**
+   * Test that footnote references are properly generated.
+   */
+  function testFootnoteReferences() {
+    $source = '<a href="http://www.example.com/node/1">Host and path</a>'
+      . '<br /><a href="http://www.example.com">Host, no path</a>'
+      . '<br /><a href="/node/1">Path, no host</a>'
+      . '<br /><a href="node/1">Relative path</a>';
+    $tt = "Host and path [1]"
+      . "\nHost, no path [2]"
+      . "\nPath, no host [3]"
+      . "\nRelative path [3]"
+      . "\n"
+      . "\n[1] http://www.example.com/node/1"
+      . "\n[2] http://www.example.com"
+      . "\n[3] " . url('node/1', array('absolute' => TRUE));
+    $this->assertHtmlToText($source, $tt, 'Footnotes');
+  }
+
+  /**
+   * Test that combinations of paragraph breaks, line breaks, linefeeds,
+   * and spaces are properly handled.
+   */
+  function testDrupalHtmlToTextParagraphs() {
+    $tests = array();
+    $tests[] = array(
+        'html' => "<p>line 1<br />\nline 2<br />line 3\n<br />line 4</p><p>paragraph</p>",
+        'text' => "line 1\nline 2\nline 3\nline 4\n\nparagraph",
+    );
+    $tests[] = array(
+      'html' => "<p>line 1<br /> line 2</p> <p>line 4<br /> line 5</p> <p>0</p>",
+      'text' => "line 1\nline 2\n\nline 4\nline 5\n\n0",
+    );
+    foreach ($tests as $test) {
+      $this->assertHtmlToText($test['html'], $test['text'], 'Paragraph breaks');
+    }
+  }
+
+  /**
+   * Tests that drupal_html_to_text() wraps before 1000 characters.
+   *
+   * RFC 3676 says, "The Text/Plain media type is the lowest common
+   * denominator of Internet email, with lines of no more than 998 characters."
+   *
+   * RFC 2046 says, "SMTP [RFC-821] allows a maximum of 998 octets before the
+   * next CRLF sequence."
+   *
+   * RFC 821 says, "The maximum total length of a text line including the
+   * <CRLF> is 1000 characters."
+   */
+  function testVeryLongLineWrap() {
+    $input = 'Drupal<br /><pre>' . str_repeat('x', 2100) . '</pre><br />Drupal';
+    $output = drupal_html_to_text($input);
+    // This awkward construct comes from includes/mail.inc lines 8-13.
+    $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+    // We must use strlen() rather than drupal_strlen() in order to count
+    // octets rather than characters.
+    $line_length_limit = 1000 - drupal_strlen($eol);
+    $maximum_line_length = 0;
+    foreach (explode($eol, $output) as $line) {
+      // We must use strlen() rather than drupal_strlen() in order to count
+      // octets rather than characters.
+      $maximum_line_length = max($maximum_line_length, drupal_strlen($line));
+    }
+    if (!$this->assertFalse($maximum_line_length > $line_length_limit, 'Mail lines are wrapped before 1000 octets.')) {
+      $this->verbose('Maximum line length found was ' . $maximum_line_length . ' octets.');
+    }
+  }
+
+  /**
+   * Ensure that content within <pre> tags is not changed.
+   */
+  function testNoWrapWithinPre() {
+    // This awkward construct comes from includes/mail.inc lines 8-13.
+    $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+    $html = '<pre>'
+       // Single space.
+      . str_repeat('a', 30) . ' ' . str_repeat('a', 30) . ' '
+       // Two spaces.
+      . str_repeat('b', 30) . ' ' . str_repeat('b', 30) . '  '
+      // Single newline.
+      . str_repeat('c', 30) . ' ' . str_repeat('c', 30) . "$eol"
+      // Double newline.
+      . str_repeat('d', 30) . ' ' . str_repeat('d', 30) . "$eol$eol"
+      // Newline and space.
+      . str_repeat('e', 30) . ' ' . str_repeat('e', 30) . "$eol "
+       // Newline and two spaces.
+      . str_repeat('f', 30) . ' ' . str_repeat('f', 30) . "$eol  "
+      // Space and newline.
+      . str_repeat('g', 30) . ' ' . str_repeat('g', 30) . " $eol"
+       // Two spaces and newline.
+      . str_repeat('h', 30) . ' ' . str_repeat('h', 30) . "  $eol"
+      . str_repeat('i', 30) . ' ' . str_repeat('i', 30) . '</pre>';
+    $text = ''
+      // One space and newline.
+      . str_repeat('a', 30) . ' ' . str_repeat('a', 30) . " $eol"
+      // Two spaces and newline.
+      . str_repeat('b', 30) . ' ' . str_repeat('b', 30) . "  $eol"
+      // Single newline.
+      . str_repeat('c', 30) . ' ' . str_repeat('c', 30) . "$eol"
+      // Double newline.
+      . str_repeat('d', 30) . ' ' . str_repeat('d', 30) . "$eol$eol"
+      // Newline and two spaces.
+      . str_repeat('e', 30) . ' ' . str_repeat('e', 30) . "$eol  "
+      // Newline and three spaces.
+      . str_repeat('f', 30) . ' ' . str_repeat('f', 30) . "$eol   "
+      // Newline only.
+      . str_repeat('g', 30) . ' ' . str_repeat('g', 30) . "$eol"
+      // Newline only.
+      . str_repeat('h', 30) . ' ' . str_repeat('h', 30) . "$eol"
+      . str_repeat('i', 30) . ' ' . str_repeat('i', 30);
+    $this->assertHtmlToText($html, $text, 'Soft-wrap and space-stuff text within <pre> according to RFC-3676');
+  }
+}
-- 
1.7.4.1

