SPACE U+00A0 NO BREAK SPACE U+1680 <9A><80> OGHAM SPACE MARK U+180E <8E> MONGOLIAN VOWEL SEPARATOR U+2000-U+200A <80><80>-<80><8A> U+2028 <80> LINE SEPARATOR U+2029 <80> PARAGRAPH SEPARATOR U+202F <80> NARROW NO-BREAK SPACE U+205F <81><9F> MEDIUM MATHEMATICAL SPACE U+3000 <80><80> IDEOGRAPHIC SPACE */ function truncate_to_words($string, $len, $wordsafe = FALSE, $dots = FALSE) { static $separators; $to_space = $return = ''; $count = $index = 0; while ($count < $len) { $c = $string[$index++]; $o = ord($c); if ($o < 0x80) { if ($o == 0x20) { $to_space = $return; } $return .= $c; } elseif ($o >= 0xC2 && $o <= 0xDF) { $c1 = $string[$index++]; $return .= $c; $return .= $c1; } elseif ($o >= 0xE0 && $o <= 0xEF) { $c1 = $string[$index++]; $c2 = $string[$index++]; if (!isset($separators)) { $separators = array( 0xE1 => array( 0x9A => array( 0x80 => TRUE, // Ogham Space Mark ), ), 0xE2 => array( 0x80 => array( 0x80 => TRUE, // en quad 0x81 => TRUE, // em quad 0x82 => TRUE, // en space 0x83 => TRUE, // em space 0x84 => TRUE, // three-per-em space 0x85 => TRUE, // four-per-em space 0x86 => TRUE, // six-per-em space 0x87 => TRUE, // figure space 0x88 => TRUE, // punctuation space 0x89 => TRUE, // thin space 0x8A => TRUE, // hair space 0xA8 => TRUE, // line separator 0xA9 => TRUE, // paragraph separator ), 0x81 => array( 0x9F => TRUE, // medium mathematical space ), ), 0xE3 => array( 0x80 => array( 0x80 => TRUE, // ideographic space ), ), ); } if (isset($separators[$o][ord($c1)][ord($c2)])) { $to_space = $return; } $return .= $c; $return .= $c1; $return .= $c2; } elseif ($o >= 0xF0 && $o <= 0xF4) { $return .= $c; $return .= $string[$index++]; $return .= $string[$index++]; $return .= $string[$index++]; } $count++; } return ($wordsafe && $to_space) ? $to_space : $return; } echo truncate_to_words('áéó űúí', 5, TRUE); echo "!\n"; echo truncate_to_words('áéó űúí', 5, FALSE); echo "!\n";