diff --git a/html_to_text.inc b/html_to_text.inc index 39f06eb..26284d2 100644 --- a/html_to_text.inc +++ b/html_to_text.inc @@ -124,8 +124,8 @@ function mailsystem_html_to_text($string, $allowed_tags = NULL) { // and space-stuff special lines. $text = mailsystem_wrap_mail($text, array('max' => 1000 - strlen($eol), 'hard' => TRUE)); // Change non-breaking spaces back to regular spaces, and trim line breaks. - // chr(160) is the non-breaking space character. - $text = str_replace(chr(160), ' ', trim($text, $eol)); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $text = str_replace(mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), ' ', trim($text, $eol)); // Add footnotes; if ($notes) { // Add a blank line before the footnote list. @@ -170,8 +170,8 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no $text = preg_replace('/ *\r?\n/', $eol, $text); if (in_array('pre', $parents)) { // Within
 tags, all spaces become non-breaking.
-      // chr(160) is the non-breaking space character.
-      $text = str_replace(' ', chr(160), $text);
+      // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+      $text = str_replace(' ', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $text);
     }
     else {
       // Outside 
 tags, collapse whitespace.
@@ -252,15 +252,15 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no
       // Blockquotes are indented by "> " at each level.
       case 'blockquote':
         $text = $eol;
-        // chr(160) is the non-breaking space character.
-        $indent = '>' . chr(160);
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '>' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         $suffix = $eol;
         break;
 
       // Dictionary definitions are indented by four spaces.
       case 'dd':
-        // chr(160) is the non-breaking space character.
-        $indent = chr(160) . chr(160) . chr(160) . chr(160);
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         $suffix = $eol;
         break;
 
@@ -281,50 +281,50 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no
       // Header level 1 is prefixed by eight "=" characters.
       case 'h1':
         $text = "$eol$eol";
-        // chr(160) is the non-breaking space character.
-        $indent = '========' . chr(160);
-        $pad = chr(160) . '=';
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '========' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
+        $pad = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '=';
         $suffix = $eol;
         break;
 
       // Header level 2 is prefixed by six "-" characters.
       case 'h2':
         $text = "$eol$eol";
-        // chr(160) is the non-breaking space character.
-        $indent = '------' . chr(160);
-        $pad = chr(160) . '-';
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '------' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
+        $pad = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '-';
         $suffix = $eol;
         break;
 
       // Header level 3 is prefixed by four "." characters and a space.
       case 'h3':
         $text = "$eol$eol";
-        // chr(160) is the non-breaking space character.
-        $indent = '....' . chr(160);
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '....' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         $suffix = $eol;
         break;
 
       // Header level 4 is prefixed by three "." characters and a space.
       case 'h4':
         $text = "$eol$eol";
-        // chr(160) is the non-breaking space character.
-        $indent = '...' . chr(160);
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '...' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         $suffix = $eol;
         break;
 
       // Header level 5 is prefixed by two "." character and a space.
       case 'h5':
         $text = "$eol$eol";
-        // chr(160) is the non-breaking space character.
-        $indent = '..' . chr(160);
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '..' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         $suffix = $eol;
         break;
 
       // Header level 6 is prefixed by one "." character and a space.
       case 'h6':
         $text = "$eol$eol";
-        // chr(160) is the non-breaking space character.
-        $indent = '.' . chr(160);
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        $indent = '.' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         $suffix = $eol;
         break;
 
@@ -346,14 +346,14 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no
           if ($test) {
             $count = $value->nodeValue;
           }
-          // chr(160) is the non-breaking space character.
-          $indent = ($count < 10 ? chr(160) : '') . chr(160) . "$count)" . chr(160);
+          // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+          $indent = ($count < 10 ? mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') : '') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . "$count)" . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
           $count++;
         }
         // Unordered list item.
         else {
-          // chr(160) is the non-breaking space character.
-          $indent = chr(160) . '*' . chr(160);
+          // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+          $indent = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '*' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         }
         $suffix = $eol;
         break;
@@ -379,8 +379,8 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no
       // Separate adjacent table cells by two non-breaking spaces.
       case 'td':
         if (!empty($node->nextSibling)) {
-          // chr(160) is the non-breaking space character.
-          $suffix = chr(160) . chr(160);
+          // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+          $suffix = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES');
         }
         break;
 
@@ -428,8 +428,8 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no
     $child_text = mailsystem_wrap_mail(
       $prefix . $child_text,
       array(
-        // chr(160) is the non-breaking space character.
-        'break' => chr(160) . $eol,
+        // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+        'break' => mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . $eol,
         'indent' => $indent,
         'max' => $line_length,
         'pad' => $pad,
@@ -438,14 +438,14 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no
     ) . $suffix;
     if ($tag === 'pre') {
       // Perform RFC-3676 soft-wrapping.
-      // chr(160) is the non-breaking space character.
-      $child_text = str_replace(chr(160), ' ', $child_text);
+      // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+      $child_text = str_replace(mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), ' ', $child_text);
       $child_text = mailsystem_wrap_mail(
         $child_text,
         array('max' => $line_length, 'stuff' => FALSE)
       );
-      // chr(160) is the non-breaking space character.
-      $child_text = str_replace(' ', chr(160), $child_text);
+      // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+      $child_text = str_replace(' ', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $child_text);
     }
     $text .= $child_text;
   }
@@ -559,8 +559,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr
             // Render the cell contents.
             $cell = _mailsystem_html_to_text($cell, $allowed_tags, $notes, $try_wraps[$j]);
             // Trim leading line-breaks and trailing whitespace.
-            // chr(160) is the non-breaking space character.
-            $cell = rtrim(ltrim($cell, $eol), ' ' . $eol . chr(160));
+            // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+            $cell = rtrim(ltrim($cell, $eol), ' ' . $eol . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'));
             $table[$i][$j] = $cell;
             if ($cell > '') {
               // Split the cell into lines.
@@ -638,8 +638,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr
               // Pad each line to the maximum width in that column.
               $repeat = $widths[$j] - drupal_strlen($line);
               if ($repeat > 0) {
-                // chr(160) is the non-breaking space character.
-                $lines[$k] .= str_repeat(chr(160), $repeat);
+                // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+                $lines[$k] .= str_repeat(mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $repeat);
               }
             }
             $table[$i][$j] = $lines;
@@ -667,8 +667,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr
           for ($j = 0; $j < $num_cols; $j++) {
             // Add a vertical bar at the end of each cell line.
             $row_line .= $row[$j][$k] . '|';
-            // chr(160) is the non-breaking space character.
-            $trimmed .= trim($row[$j][$k], ' ' . $eol . chr(160));
+            // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+            $trimmed .= trim($row[$j][$k], ' ' . $eol . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'));
           }
           if ($trimmed > '') {
             // Only print rows that are non-empty.
@@ -681,8 +681,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr
     }
   }
   // Make sure formatted table content doesn't line-wrap.
-  // chr(160) is the non-breaking space character.
-  return str_replace(' ', chr(160), $text);
+  // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+  return str_replace(' ', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $text);
 }
 
 /**
@@ -726,8 +726,8 @@ function _mailsystem_indent_mail_line(&$line, $key, $values) {
     $line = $values[$key === 0 ? 'indent' : 'clean'] . $line;
   }
   if ($values['stuff']) {
-    // chr(160) is the non-breaking space character.
-    $line = preg_replace('/^(' . chr(160) . '| |>|From)/', ' $1', $line);
+    // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+    $line = preg_replace('/^(' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '| |>|From)/', ' $1', $line);
   }
 }
 
@@ -738,6 +738,6 @@ function _mailsystem_indent_mail_line(&$line, $key, $values) {
  * non-breaking space characters.
  */
 function _mailsystem_html_to_text_clean($indent) {
-  // chr(160) is the non-breaking space character.
-  return preg_replace('/[^>]/', chr(160), $indent);
+  // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character.
+  return preg_replace('/[^>]/', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $indent);
 }