diff --git a/includes/processor_html_filter.inc b/includes/processor_html_filter.inc
index 0cc4800d..7e824998 100644
--- a/includes/processor_html_filter.inc
+++ b/includes/processor_html_filter.inc
@@ -102,8 +102,11 @@ class SearchApiHtmlFilter extends SearchApiAbstractProcessor {
}
else {
$value = html_entity_decode(strip_tags($text));
- // Remove any multiple or leading/trailing spaces we might have introduced.
- $value = preg_replace('/\s\s+/', ' ', trim($value));
+ // Remove all leading/trailing unicode spaces because trim
+ // doesn't know the unicode ones.
+ $value = preg_replace('/^\s+|\s+$/u', '', $value);
+ // Replace all multiple unicode spaces with one.
+ $value = preg_replace('/\s{2,}/u', ' ', $value);
}
}
@@ -112,8 +115,11 @@ class SearchApiHtmlFilter extends SearchApiAbstractProcessor {
while (($pos = strpos($text, '<')) !== FALSE) {
if ($boost && $pos > 0) {
$token = html_entity_decode(substr($text, 0, $pos), ENT_QUOTES, 'UTF-8');
- // Remove any multiple or leading/trailing spaces we might have introduced.
- $token = preg_replace('/\s\s+/', ' ', trim($token));
+ // Remove all leading/trailing unicode spaces because trim
+ // doesn't know the unicode ones.
+ $token = preg_replace('/^\s+|\s+$/u', '', $token);
+ // Replace all multiple unicode spaces with one.
+ $token = preg_replace('/\s{2,}/u', ' ', $token);
$ret[] = array(
'value' => $token,
'score' => $boost,
@@ -138,8 +144,11 @@ class SearchApiHtmlFilter extends SearchApiAbstractProcessor {
}
if ($text) {
$token = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
- // Remove any multiple or leading/trailing spaces we might have introduced.
- $token = preg_replace('/\s\s+/', ' ', trim($token));
+ // Remove all leading/trailing unicode spaces because trim
+ // doesn't know the unicode ones.
+ $token = preg_replace('/^\s+|\s+$/u', '', $token);
+ // Replace all multiple unicode spaces with one.
+ $token = preg_replace('/\s{2,}/u', ' ', $token);
$ret[] = array(
'value' => $token,
'score' => $boost,