? urlfilter-161217-46.patch
? urlfilter.patch
Index: modules/filter/filter.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/filter/filter.module,v
retrieving revision 1.213
diff -u -p -r1.213 filter.module
--- modules/filter/filter.module	6 May 2008 12:18:47 -0000	1.213
+++ modules/filter/filter.module	10 Jun 2008 09:33:48 -0000
@@ -718,20 +718,75 @@ function _filter_url_settings($format) {
  * ftp links, etc.) into hyperlinks.
  */
 function _filter_url($text, $format) {
-  // Pass length to regexp callback
-  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
-
-  $text = ' ' . $text . ' ';
+  // List of tags - the content of which must be skipped.
+  $ignoretags = 'a|script|style|code';
 
-  // Match absolute URLs.
-  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
+  // Pass length to regexp callback.
+  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
 
-  // Match e-mail addresses.
-  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
+  // Need to process each case of replacement type separately (see switch in 
+  // the middle of loops). The text must be joined and split again after each 
+  // replacement, since they create new HTML tags.
+  for ($task = 1; $task <= 3; $task++) {
+    // Split at all tags.
+    // This ensures that nothing that is a tagname or attribute will be processed.
+    $chunks = preg_split('/(<.+?>)/i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+    // Note: PHP ensures the array consists of alternating delimiters and literals
+    // and begins and ends with a literal (inserting NULL as required).
+
+    // If an ignoretag is found, it is stored here and removed when the 
+    // closing tag is found. Until then no replacements are made.
+    // Think of this as a stack that always has 0 or 1 items.
+    $opentag = '';
+    for ($i = 0; $i < count($chunks); $i++) {
+      // Even numbers are text, odd numbers are tags.
+      if ($i % 2 == 0) { 
+        // Only do replacements when there are no unclosed ignoretags.
+        if ($opentag == '') { 
+          switch ($task) {
+            case 1:
+              // Match absolute URLs.
+              $protocols = 'http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://';
+              $urlpattern = "(?:$protocols)(?:[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])";
+              $re = "`($urlpattern)([\.\,\?\!]*?)`i";
+              $chunks[$i] = preg_replace_callback($re, '_filter_url_parse_full_links', $chunks[$i] );
+              break;
+
+            case 2:
+              // Match e-mail addresses.
+              $urlpattern = '[A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4}';
+              $re = "`($urlpattern)`i";
+              $chunks[$i] = preg_replace($re, '<a href="mailto:\1">\1</a>', $chunks[$i]);
+              break;
+
+            case 3:
+              // Match www domains/addresses.
+              $urlpattern = 'www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-]';
+              $re = "`($urlpattern)([\.\,\?\!]*?)`i";
+              $chunks[$i] = preg_replace_callback($re, '_filter_url_parse_partial_links', $chunks[$i]);
+              break;
+          }
+        }
+      }
+      else { 
+        if ($opentag == '') { 
+          // No opening ignoretags.
+          if (preg_match( "`<($ignoretags)(?:\s|>)`i", $chunks[$i], $matches)) {
+            // Catch and store the tag in question.
+            $opentag = $matches[1];
+          }
+        }
+        else { 
+          // Nothing happens until we find a matching closing tag.
+          if (preg_match("`<\/$opentag>`i", $chunks[$i], $matches)) {
+            $opentag = '';
+          }
+        }
+      }
+    }
 
-  // Match www domains/addresses.
-  $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
-  $text = substr($text, 1, -1);
+  $text = implode($chunks);
+  }
 
   return $text;
 }
@@ -811,23 +866,27 @@ function _filter_htmlcorrector($text) {
 }
 
 /**
- * Make links out of absolute URLs.
+ * Callback function. Make links out of absolute URLs.
  */
 function _filter_url_parse_full_links($match) {
-  $match[2] = decode_entities($match[2]);
-  $caption = check_plain(_filter_url_trim($match[2]));
-  $match[2] = check_url($match[2]);
-  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[5];
+  // Find the capturing parenthesis in the regexp containing the URL.
+  $i = 1;
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_url($match[$i]);
+  return '<a href="' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
 }
 
 /**
- * Make links out of domain names starting with "www."
+ * Callback function. Make links out of domain names starting with "www.".
  */
 function _filter_url_parse_partial_links($match) {
-  $match[2] = decode_entities($match[2]);
-  $caption = check_plain(_filter_url_trim($match[2]));
-  $match[2] = check_plain($match[2]);
-  return $match[1] . '<a href="http://' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[3];
+  // Find the parenthesis in the regexp containing the URL.
+  $i = 1;
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_plain($match[$i]);
+  return '<a href="http://' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
 }
 
 /**
@@ -839,8 +898,7 @@ function _filter_url_trim($text, $length
     $_length = $length;
   }
 
-  // Use +3 for '...' string length.
-  if (strlen($text) > $_length + 3) {
+  if (strlen($text) > $_length) {
     $text = substr($text, 0, $_length) . '...';
   }
 
