Index: includes/unicode.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/unicode.inc,v
retrieving revision 1.27
diff -u -r1.27 unicode.inc
--- includes/unicode.inc	21 Oct 2007 18:59:01 -0000	1.27
+++ includes/unicode.inc	18 Dec 2007 05:31:13 -0000
@@ -203,28 +203,61 @@
  *   The string to truncate.
  * @param $len
  *   An upper limit on the returned string length.
+ * @return
+ *   The truncated string.
+ */
+function drupal_truncate_bytes($string, $len) {
+  if (strlen($string) <= $len) {
+     return $string;
+  }
+  if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
+    return substr($string, 0, $len);
+  }
+  while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {};
+  return substr($string, 0, $len);
+}
+
+
+/**
+ * Truncate a UTF-8-encoded string safely to a number of characters.
+ *
+ * @param $string
+ *   The string to truncate.
+ * @param $len
+ *   An upper limit on the returned string length.
  * @param $wordsafe
  *   Flag to truncate at nearest space. Defaults to FALSE.
+ * @param $dots
+ *   Flag to add trailing dots. Defaults to FALSE.
  * @return
  *   The truncated string.
  */
-function truncate_utf8($string, $len, $wordsafe = FALSE, $dots = FALSE) {
-  $slen = strlen($string);
-  if ($slen <= $len) {
+function drupal_truncate_chars($string, $len, $wordsafe = FALSE, $dots = FALSE) {
+  if (drupal_strlen($string) <= $len) {
     return $string;
   }
+  if ($dots) {
+    $len -= 4;
+  }  
   if ($wordsafe) {
-    $end = $len;
-    while (($string[--$len] != ' ') && ($len > 0)) {};
-    if ($len == 0) {
-      $len = $end;
+    $string = drupal_substr($string, 0, $len + 1); // leave one more character
+    if ($last_space = strrpos($string, ' ')) { // space exists AND is not on position 0
+      $string = substr($string, 0, $last_space);
+    } else {
+      $string = drupal_substr($string, 0, $len);
     }
+  } else {
+    $string = drupal_substr($string, 0, $len);
   }
-  if ((ord($string[$len]) < 0x80) || (ord($string[$len]) >= 0xC0)) {
-    return substr($string, 0, $len) . ($dots ? ' ...' : '');
-  }
-  while (--$len >= 0 && ord($string[$len]) >= 0x80 && ord($string[$len]) < 0xC0) {};
-  return substr($string, 0, $len) . ($dots ? ' ...' : '');
+  if ($dots) {
+    $string .= ' ...';
+  }  
+  return $string;
+}
+
+// THIS IS ONLY FOR TESTING!!!
+function truncate_utf8($string, $len, $wordsafe = FALSE, $dots = FALSE) {   
+  return drupal_truncate_chars($string, $len, $wordsafe, $dots);
 }
 
 /**
@@ -248,7 +281,7 @@
     $len = strlen($string);
     $output = '';
     while ($len > 0) {
-      $chunk = truncate_utf8($string, $chunk_size);
+      $chunk = drupal_truncate_bytes($string, $chunk_size);
       $output .= ' =?UTF-8?B?'. base64_encode($chunk) ."?=\n";
       $c = strlen($chunk);
       $string = substr($string, $c);
