? 726080-1_chinese.patch
Index: libraries/extractor_simple.inc
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/extractor/libraries/extractor_simple.inc,v
retrieving revision 1.1
diff -u -p -r1.1 extractor_simple.inc
--- libraries/extractor_simple.inc	4 Dec 2009 15:01:46 -0000	1.1
+++ libraries/extractor_simple.inc	25 Feb 2010 22:20:43 -0000
@@ -73,7 +73,7 @@ function extractor_simple_extract($text)
  * Split text into words.
  */
 function _extractor_simple_split($text) {
-  return preg_split("/[\s,.:\-\(\)\[\]{}*\/]+/", $text);
+  return preg_split("/[\s,.:\-\(\)\[\]{}*\/\'\"]+/", $text);
 }
 
 /**
@@ -82,7 +82,7 @@ function _extractor_simple_split($text) 
 function _extractor_simple_lookup($word) {
   static $term_cache = array();
   static $loaded;
-
+error_log($word);
   // Do not bother if word starts with a numeric or if word is in stop word list.
   if ((strlen($word) < 3) || is_numeric($word[0]) || in_array(strtolower($word), _extractor_simple_stopwords())) {
     return array();
@@ -91,20 +91,21 @@ function _extractor_simple_lookup($word)
   if (!$loaded) {
     $result = db_query_range('SELECT tid, name FROM {term_data} WHERE vid = %d', variable_get('extractor_simple_vid', 1), 0, 2000);
     while ($term = db_fetch_object($result)) {
-      $term_cache[strtolower($term->name[0])][$term->name] = $term;
+      $term_cache[$term->tid] = $term;
     }
     $loaded = TRUE;
   }
 
   $terms = array();
   if (!empty($term_cache)) {
-    if (isset($term_cache[strtolower($word[0])])) {
-      foreach ($term_cache[strtolower($word[0])] as $name => $term) {
-        if (stripos($name, $word) === 0) {
-          $terms[$term->tid] = $term;
-        }
+    foreach ($term_cache as $term) {
+      if ($term->name == $word) {
+        $terms[$term->tid] = $term;
+error_log("hit: ". $term->name ." for ". $word);
+        return $terms;
       }
     }
+
   }
 
   return $terms;
