array( 0x9A => array( 0x80 => TRUE, // U+1680 Ogham Space Mark ), ), 0xE2 => array( 0x80 => array( 0x80 => TRUE, // U+2000 en quad 0x81 => TRUE, // U+2001 em quad 0x82 => TRUE, // U+2002 en space 0x83 => TRUE, // U+2003 em space 0x84 => TRUE, // U+2004 three-per-em space 0x85 => TRUE, // U+2005 four-per-em space 0x86 => TRUE, // U+2006 six-per-em space 0x87 => TRUE, // U+2007 figure space 0x88 => TRUE, // U+2008 punctuation space 0x89 => TRUE, // U+2009 thin space 0x8A => TRUE, // U+200A hair space 0xA8 => TRUE, // U+2028 line separator 0xA9 => TRUE, // U+2029 paragraph separator ), 0x81 => array( 0x9F => TRUE, // U+205F medium mathematical space ), ), 0xE3 => array( 0x80 => array( 0x80 => TRUE, // U+3000 ideographic space ), ), ); $to_space = $return = ''; $count = $index = 0; while ($count < $len) { $c = $string[$index++]; $o = ord($c); if ($o < 0x80) { if ($o == 0x20) { $to_space = $return; } $return .= $c; } elseif ($o >= 0xC2 && $o <= 0xDF) { $c1 = $string[$index++]; $return .= $c; $return .= $c1; } elseif ($o >= 0xE0 && $o <= 0xEF) { $c1 = $string[$index++]; $c2 = $string[$index++]; if (isset($separators[$o][ord($c1)][ord($c2)])) { $to_space = $return; } $return .= $c; $return .= $c1; $return .= $c2; } elseif ($o >= 0xF0 && $o <= 0xF4) { $return .= $c; $return .= $string[$index++]; $return .= $string[$index++]; $return .= $string[$index++]; } $count++; } return ($wordsafe && $to_space) ? $to_space : $return; } echo truncate_to_words('áéó űúí', 5, TRUE); echo "!\n"; echo truncate_to_words('áéó űúí', 5, FALSE); echo "!\n";