diff --git a/html_to_text.inc b/html_to_text.inc index 39f06eb..26284d2 100644 --- a/html_to_text.inc +++ b/html_to_text.inc @@ -124,8 +124,8 @@ function mailsystem_html_to_text($string, $allowed_tags = NULL) { // and space-stuff special lines. $text = mailsystem_wrap_mail($text, array('max' => 1000 - strlen($eol), 'hard' => TRUE)); // Change non-breaking spaces back to regular spaces, and trim line breaks. - // chr(160) is the non-breaking space character. - $text = str_replace(chr(160), ' ', trim($text, $eol)); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $text = str_replace(mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), ' ', trim($text, $eol)); // Add footnotes; if ($notes) { // Add a blank line before the footnote list. @@ -170,8 +170,8 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no $text = preg_replace('/ *\r?\n/', $eol, $text); if (in_array('pre', $parents)) { // Within
tags, all spaces become non-breaking. - // chr(160) is the non-breaking space character. - $text = str_replace(' ', chr(160), $text); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $text = str_replace(' ', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $text); } else { // Outsidetags, collapse whitespace. @@ -252,15 +252,15 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no // Blockquotes are indented by "> " at each level. case 'blockquote': $text = $eol; - // chr(160) is the non-breaking space character. - $indent = '>' . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '>' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $suffix = $eol; break; // Dictionary definitions are indented by four spaces. case 'dd': - // chr(160) is the non-breaking space character. - $indent = chr(160) . chr(160) . chr(160) . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $suffix = $eol; break; @@ -281,50 +281,50 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no // Header level 1 is prefixed by eight "=" characters. case 'h1': $text = "$eol$eol"; - // chr(160) is the non-breaking space character. - $indent = '========' . chr(160); - $pad = chr(160) . '='; + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '========' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); + $pad = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '='; $suffix = $eol; break; // Header level 2 is prefixed by six "-" characters. case 'h2': $text = "$eol$eol"; - // chr(160) is the non-breaking space character. - $indent = '------' . chr(160); - $pad = chr(160) . '-'; + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '------' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); + $pad = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '-'; $suffix = $eol; break; // Header level 3 is prefixed by four "." characters and a space. case 'h3': $text = "$eol$eol"; - // chr(160) is the non-breaking space character. - $indent = '....' . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '....' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $suffix = $eol; break; // Header level 4 is prefixed by three "." characters and a space. case 'h4': $text = "$eol$eol"; - // chr(160) is the non-breaking space character. - $indent = '...' . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '...' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $suffix = $eol; break; // Header level 5 is prefixed by two "." character and a space. case 'h5': $text = "$eol$eol"; - // chr(160) is the non-breaking space character. - $indent = '..' . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '..' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $suffix = $eol; break; // Header level 6 is prefixed by one "." character and a space. case 'h6': $text = "$eol$eol"; - // chr(160) is the non-breaking space character. - $indent = '.' . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = '.' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $suffix = $eol; break; @@ -346,14 +346,14 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no if ($test) { $count = $value->nodeValue; } - // chr(160) is the non-breaking space character. - $indent = ($count < 10 ? chr(160) : '') . chr(160) . "$count)" . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = ($count < 10 ? mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') : '') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . "$count)" . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); $count++; } // Unordered list item. else { - // chr(160) is the non-breaking space character. - $indent = chr(160) . '*' . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $indent = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '*' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); } $suffix = $eol; break; @@ -379,8 +379,8 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no // Separate adjacent table cells by two non-breaking spaces. case 'td': if (!empty($node->nextSibling)) { - // chr(160) is the non-breaking space character. - $suffix = chr(160) . chr(160); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $suffix = mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'); } break; @@ -428,8 +428,8 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no $child_text = mailsystem_wrap_mail( $prefix . $child_text, array( - // chr(160) is the non-breaking space character. - 'break' => chr(160) . $eol, + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + 'break' => mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . $eol, 'indent' => $indent, 'max' => $line_length, 'pad' => $pad, @@ -438,14 +438,14 @@ function _mailsystem_html_to_text(DOMNode $node, array $allowed_tags, array &$no ) . $suffix; if ($tag === 'pre') { // Perform RFC-3676 soft-wrapping. - // chr(160) is the non-breaking space character. - $child_text = str_replace(chr(160), ' ', $child_text); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $child_text = str_replace(mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), ' ', $child_text); $child_text = mailsystem_wrap_mail( $child_text, array('max' => $line_length, 'stuff' => FALSE) ); - // chr(160) is the non-breaking space character. - $child_text = str_replace(' ', chr(160), $child_text); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $child_text = str_replace(' ', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $child_text); } $text .= $child_text; } @@ -559,8 +559,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr // Render the cell contents. $cell = _mailsystem_html_to_text($cell, $allowed_tags, $notes, $try_wraps[$j]); // Trim leading line-breaks and trailing whitespace. - // chr(160) is the non-breaking space character. - $cell = rtrim(ltrim($cell, $eol), ' ' . $eol . chr(160)); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $cell = rtrim(ltrim($cell, $eol), ' ' . $eol . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES')); $table[$i][$j] = $cell; if ($cell > '') { // Split the cell into lines. @@ -638,8 +638,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr // Pad each line to the maximum width in that column. $repeat = $widths[$j] - drupal_strlen($line); if ($repeat > 0) { - // chr(160) is the non-breaking space character. - $lines[$k] .= str_repeat(chr(160), $repeat); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $lines[$k] .= str_repeat(mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $repeat); } } $table[$i][$j] = $lines; @@ -667,8 +667,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr for ($j = 0; $j < $num_cols; $j++) { // Add a vertical bar at the end of each cell line. $row_line .= $row[$j][$k] . '|'; - // chr(160) is the non-breaking space character. - $trimmed .= trim($row[$j][$k], ' ' . $eol . chr(160)); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $trimmed .= trim($row[$j][$k], ' ' . $eol . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES')); } if ($trimmed > '') { // Only print rows that are non-empty. @@ -681,8 +681,8 @@ function _mailsystem_html_to_text_table(DOMNode $node, $allowed_tags = NULL, arr } } // Make sure formatted table content doesn't line-wrap. - // chr(160) is the non-breaking space character. - return str_replace(' ', chr(160), $text); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + return str_replace(' ', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $text); } /** @@ -726,8 +726,8 @@ function _mailsystem_indent_mail_line(&$line, $key, $values) { $line = $values[$key === 0 ? 'indent' : 'clean'] . $line; } if ($values['stuff']) { - // chr(160) is the non-breaking space character. - $line = preg_replace('/^(' . chr(160) . '| |>|From)/', ' $1', $line); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + $line = preg_replace('/^(' . mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') . '| |>|From)/', ' $1', $line); } } @@ -738,6 +738,6 @@ function _mailsystem_indent_mail_line(&$line, $key, $values) { * non-breaking space characters. */ function _mailsystem_html_to_text_clean($indent) { - // chr(160) is the non-breaking space character. - return preg_replace('/[^>]/', chr(160), $indent); + // mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES') is the non-breaking space character. + return preg_replace('/[^>]/', mb_convert_encoding(chr(160), 'UTF-8', 'HTML-ENTITIES'), $indent); }