? glossary.vmod.module
? glossary.vnew.module
? glossary.vori.module
? trash
Index: glossary.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/glossary/glossary.module,v
retrieving revision 1.100.2.9
diff -u -p -r1.100.2.9 glossary.module
--- glossary.module	26 Jan 2008 01:00:22 -0000	1.100.2.9
+++ glossary.module	27 Jan 2008 10:09:54 -0000
@@ -516,6 +516,7 @@ function _glossary_filter_process($forma
     $absolute_link = variable_get("glossary_absolute_$format", false);
     $terms = _glossary_get_terms($format);
     $vids = _glossary_get_filter_vids();
+    $terms_replace = array();
 
     foreach ($terms as $term) {
       $term_title = $term->name .': '. strip_tags($term->description);
@@ -556,104 +557,145 @@ function _glossary_filter_process($forma
       }
       
       // replace term and synonyms with the desired new HTML code
-      foreach ($term->synonyms as $candidate) {
-        $text = _glossary_insertlink($format, $text, $candidate, $ins_before, $ins_after);
-      }
+      $terms_replace[] = array('synonyms' => $term->synonyms, 'ins_before' => $ins_before, 'ins_after' => $ins_after);
     }
+    return _glossary_insertlink($format, $text, $terms_replace);
   }
   return $text;
 }
 
 /**
- * Insert glossary links to $text after every $match that is not inside a link.
- * $ins_before is prepended to the matches, $_insafter is appended to them.
+ * Insert glossary links to $text after every matching $terms[i]['synonyms'] that is not inside a blocking tag.
+ * $terms[i]['ins_before'] is prepended to the matches, $terms[i]['ins_after'] is appended to them.
  * Match type and replace mode all depend on user settings.
- *
- * TODO: improve performance with not keeping *2.5 copies* of the string in memory: 
- *         $text                 - original
- *         $newtext              - transformed
- *         $before . $this_match - for checking stuff
+
+ * The text is scanned once for all blocking tags and matches,
+ * then those 'events' are sorted and handled one by one.
  */
-function _glossary_insertlink($format, &$text, $match, $ins_before, $ins_after) {
+function _glossary_insertlink($format, &$text, &$terms) {
   $multibyte_enabled = extension_loaded('mbstring');
-	if ($multibyte_enabled) {
-	  $mb_prefix = 'mb_';
-	}
-	else {
-	  $mb_prefix = null;
-	}
+  if ($multibyte_enabled) {
+    $mb_prefix = 'mb_';
+  }
+  else {
+    $mb_prefix = null;
+  }
   $findfunc = $mb_prefix . (variable_get("glossary_case_$format", "1") ? 'strpos' : 'stripos');
-  $next = $findfunc($text, $match);
-  
-  if ($next === false) { // no match at all
-    return $text;
+  $findtagfunc = $mb_prefix .'stripos';
+
+  $replaceall = variable_get("glossary_replace_all_$format", 0);
+
+  $events = array();
+
+  // Find blocking tags.
+  $open_tags = array('[no-glossary]', '<', '<a ', '<acronym', '<code', '<pre', '[code');
+  $close_tags = array('[/no-glossary]', '>', '</a>', '</acronym>', '</code>', '</pre>', '[/code]');
+  /* use these always/when Codefilter module is on?
+  $open_tags = array('[no-glossary]', '<', '<a ', '<acronym', '<code', '<pre', '[code', '<?php ', '[?php ', '<% ', '[% ', '[codefilter_');
+  $close_tags = array('[/no-glossary]', '>', '</a>', '</acronym>', '</code>', '</pre>', '[/code]', '?>', '?]', '%>', '%]', '[/codefilter_');
+  */
+
+  foreach ($open_tags as $i => $tag) {
+    $offset=0;
+    while(($offset = $findtagfunc($text, $tag, $offset)) !== false) {
+      //longer tags will override shorter '<' on the same offset
+      $events[$offset] = array('type' => 'open', 'which' => $i);
+      $offset += drupal_strlen($tag);
+    }
   }
-  else { // at least one match
-    $prevend    = 0;
-    $newtext    = '';
-    $matchlen   = drupal_strlen($match);
-    $textlen    = drupal_strlen($text);
-    $replaceall = variable_get("glossary_replace_all_$format", 0);
-    
-    while ($next && ($next <= $textlen)) {
-      
-      // get parts of the match for further investigation
-      $before     = drupal_substr($text, 0, $next);
-      $this_match = drupal_substr($text, $next, $matchlen);
-      
-      // see if we have a proper match or not
-      $substr_count_func = $mb_prefix . 'substr_count';
-      $open  = $substr_count_func($before, '<');
-      $close = $substr_count_func($before, '>');
-      $opena  = $substr_count_func($before, '<a ');
-      $closea = $substr_count_func($before, '</a>');
-      $openacro  = $substr_count_func($before, '<acronym');
-      $closeacro = $substr_count_func($before, '</acronym>');
+
+  // Find match candidates.
+  foreach ($terms as $i => $term) {
+    foreach($term['synonyms'] as $synonym) {
+      $offset=0;
+      $first_match_found = false;
+      while(($offset = $findfunc($text, $synonym, $offset)) !== false) {
+        $match = drupal_substr($text, $offset, drupal_strlen($synonym));
+        //only longer matches override shorter ones
+        if (!isset($events[$offset]) || drupal_strlen($events[$offset]['match'] < drupal_strlen($match))) {
+          // get synonym with case as in text
+          $events[$offset] = array('type' => 'match', 'which' => $i, 'match' => $match);
+          if (!$replaceall) {
+            $first_match_found = true;
+            break;
+          }
+        }
+        $offset += drupal_strlen($synonym);
+      }
+      //TODO: remove this if we want different synonyms of the same term to be matched independently as 'first matches'
+      if ($first_match_found && !$replaceall) {
+        break;
+      }
+    }
+  }
+
+  ksort($events);
+
+  $newtext = '';
+  $parsed = 0; // text was parsed from chars 0 to $parsed (exclusive)
+
+  foreach($events as $place => $event) {
+    // skip events inside blocking tag (they're already copied as is)
+    if ($place < $parsed) {
+      continue;
+    }
+    // copy plain text (with no events)
+      $newtext .= drupal_substr($text, $parsed, ($place - $parsed));
+      $parsed = $place;
+    // if a blocking tag is opened, skip to closing tag
+    if ($event['type'] == 'open') {
+      $skip = $findtagfunc($text, $close_tags[$event['which']], $place);
+      if ($skip === false) {
+        $skip = drupal_strlen($text);
+      }
+      // if the tag is [no-glossary] - remove it with the closing tag (by incrementing $parsed without copying)
+      if ($event['which'] == 0) {
+        $parsed += drupal_strlen($open_tags[$event['which']]);
+        $newtext .= drupal_substr($text, $parsed, ($skip - $parsed));
+        $parsed = $skip + drupal_strlen($close_tags[$event['which']]);
+      }
+      // copy text without changing it
+      else {
+        $newtext .= drupal_substr($text, $parsed, ($skip - $parsed));
+        $parsed = $skip;
+      }
+    }
+    if ($event['type'] == 'match') {
+      $matchlen = drupal_strlen($event['match']);
       $proper_match = false;
-      if ($opena <= $closea && $open <= $close && $openacro <= $closeacro) { // Not in an open link
-        switch (variable_get("glossary_match_$format", 'b')) {
+      switch (variable_get("glossary_match_$format", 'b')) {
           case 'lr': // require word break left or right
 //            $proper_match = (_glossary_is_boundary($text {$next - 1}) || _glossary_is_boundary($text {$next + $matchlen}));
-            $proper_match = (_glossary_is_boundary(drupal_substr($text, $next - 1,1)) ||
-                             _glossary_is_boundary(drupal_substr($text, $next + $matchlen, 1 )));
+            $proper_match = (_glossary_is_boundary(drupal_substr($text, $place - 1,1)) ||
+                             _glossary_is_boundary(drupal_substr($text, $place + $matchlen, 1 )));
             break;
           case 'b': // require word break left and right
 //            $proper_match = (_glossary_is_boundary($text {$next - 1}) && _glossary_is_boundary($text {$next + $matchlen}));
-            $proper_match = (_glossary_is_boundary(drupal_substr($text, $next - 1, 1)) &&
-                             _glossary_is_boundary(drupal_substr($text,$next + $matchlen,1)));
+            $proper_match = (_glossary_is_boundary(drupal_substr($text, $place - 1, 1)) &&
+                             _glossary_is_boundary(drupal_substr($text,$place + $matchlen,1)));
             break;
           case 'l':  // require word break left
 //            $proper_match = _glossary_is_boundary($text {$next - 1});
-            $proper_match = _glossary_is_boundary(drupal_substr($text,$next - 1,1));
+            $proper_match = _glossary_is_boundary(drupal_substr($text,$place - 1,1));
             break;
           case 'r': // require word break right
 //            $proper_match = _glossary_is_boundary($text {$next + $matchlen});
-            $proper_match = _glossary_is_boundary(drupal_substr($text,$next + $matchlen,1));
+            $proper_match = _glossary_is_boundary(drupal_substr($text,$place + $matchlen,1));
             break;
           case 's': // match any substring
           default:
             $proper_match = true;
             break;
-        }
       }
-      
-      if ($proper_match) { // found match
-        $newtext .= drupal_substr($text, $prevend, ($next - $prevend)) . $ins_before . $this_match . $ins_after;
-        if ($replaceall == 0) { 
-          return $newtext . drupal_substr($text, $next + $matchlen); 
-        }
+      if ($proper_match) {
+        $newtext .= $terms[$event['which']]['ins_before'] . $event['match'] . $terms[$event['which']]['ins_after'];
+        $parsed += $matchlen;
       }
-      else { // not applicable match
-        $newtext .= drupal_substr($text, $prevend, ($next - $prevend)) . $this_match;
-      }
-      
-      // Step further in finding the next match
-      $prevend = $next + $matchlen;
-      $next = $findfunc($text, $match, $prevend);
     }
-    // Append remaining part
-    return $newtext . drupal_substr($text, $prevend);
   }
+
+  // Append remaining part
+  return $newtext . drupal_substr($text, $parsed);
 }
 
 function glossary_page($vid = null, $letter = null) {
@@ -909,12 +951,12 @@ function _glossary_get_synonyms($vid) {
 // the ereg solution used before. The chars used here are from the
 // grep info page.
 function _glossary_is_boundary($char) {
-	if (extension_loaded('mbstring')) {
+  if (extension_loaded('mbstring')) {
     return (mb_strpos("!\"#\$%&'()*+,-./:;<=>?@[\]^_`{|}~ \t\n\r", $char) !== false);
-	}
-	else {
+  }
+  else {
     return (strpos("!\"#\$%&'()*+,-./:;<=>?@[\]^_`{|}~ \t\n\r", $char) !== false);
-	}
+  }
 }
 
 // Natively only available in PHP 5+