From 6f09db94da09282ac867d988db1a70914cd08eb9 Mon Sep 17 00:00:00 2001
From: wonder95 <killshot91@gmail.com>
Date: Thu, 23 May 2013 12:54:14 -0700
Subject: [PATCH] Backport to 6.x-3.x of 1927032 - Avoid spaces between words
 and punctuation marks in search result snippets.

---
 apachesolr.module |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/apachesolr.module b/apachesolr.module
index 9c1445e..b5a4dd0 100644
--- a/apachesolr.module
+++ b/apachesolr.module
@@ -2152,6 +2152,18 @@ function apachesolr_clean_text($text) {
   $text = htmlspecialchars(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), ENT_QUOTES, 'UTF-8');
   // Remove extra spaces.
   $text = preg_replace('/\s+/s', ' ', $text);
+  // Remove white spaces around punctuation marks probably added
+  // by the safety operations above. This is not a world wide perfect solution,
+  // but a rough attempt for at least US and Western Europe.
+  // Pc: Connector punctuation
+  // Pd: Dash punctuation
+  // Pe: Close punctuation
+  // Pf: Final punctuation
+  // Pi: Initial punctuation
+  // Po: Other punctuation, including ¿?¡!,.:;
+  // Ps: Open punctuation
+  $text = preg_replace('/\s(\p{Pc}|\p{Pd}|\p{Pe}|\p{Pf}|!|\?|,|\.|:|;)/s', '$1', $text);
+  $text = preg_replace('/(\p{Ps}|¿|¡)\s/s', '$1', $text);
   return $text;
 }
 
-- 
1.7.10.2 (Apple Git-33)

