Index: modules/filter/filter.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/filter/filter.module,v
retrieving revision 1.213
diff -u -p -r1.213 filter.module
--- modules/filter/filter.module	6 May 2008 12:18:47 -0000	1.213
+++ modules/filter/filter.module	10 Jun 2008 11:00:26 -0000
@@ -718,20 +718,73 @@ function _filter_url_settings($format) {
  * ftp links, etc.) into hyperlinks.
  */
 function _filter_url($text, $format) {
-  // Pass length to regexp callback
-  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
+  // List of ignored tags - the content of which must be skipped.
+  $ignoretags = 'a|script|style|code';
 
-  $text = ' ' . $text . ' ';
-
-  // Match absolute URLs.
-  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
-
-  // Match e-mail addresses.
-  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
+  // Need to process each case of replacement type separately (see switch in 
+  // the middle of loops). The text must be joined and split again after each 
+  // replacement, since they create new HTML tags.
+  foreach (range(1, 3) as $task) {
+    // Split at all tags to ensures that nothing that is a tag name or an
+    // attribute will be processed. 
+    $chunks = preg_split('/(<.+?>)/i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+    // Note: The return value of preg_split() consists of an array of 
+    // alternating delimiters and literals that begins and ends with a 
+    // literal (inserting NULL as required). 
+
+    // If an ignoretag is found, it is stored here and removed when the 
+    // closing tag is found. Until then no replacements are made.
+    // Think of this as a stack that always has 0 or 1 items.
+    $opentag = '';
+    for ($i = 0; $i < count($chunks); $i++) {
+      // Even numbers are text, odd numbers are tags.
+      if ($i % 2 == 0) { 
+        // Only do replacements when there are no unclosed ignoretags.
+        if ($opentag == '') { 
+          switch ($task) {
+            case 1:
+              // Match absolute URLs.
+              $protocols = 'http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://';
+              $urlpattern = "(?:$protocols)(?:[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])";
+              $re = "!($urlpattern)([\.\,\?\!]*?)!i";
+              $chunks[$i] = preg_replace_callback($re, '_filter_url_parse_full_links', $chunks[$i] );
+              break;
+
+            case 2:
+              // Match e-mail addresses.
+              $urlpattern = '[A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4}';
+              $re = "!($urlpattern)!i";
+              $chunks[$i] = preg_replace($re, '<a href="mailto:\1">\1</a>', $chunks[$i]);
+              break;
+
+            case 3:
+              // Match www domains/addresses.
+              $urlpattern = 'www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-]';
+              $re = "!($urlpattern)([\.\,\?\!]*?)!i";
+              $chunks[$i] = preg_replace_callback($re, '_filter_url_parse_partial_links', $chunks[$i]);
+              break;
+          }
+        }
+      }
+      else { 
+        if ($opentag == '') { 
+          // No opening ignoretags.
+          if (preg_match( "!<($ignoretags)(?:\s|>)!i", $chunks[$i], $matches)) {
+            // Catch and store the tag in question.
+            $opentag = $matches[1];
+          }
+        }
+        else { 
+          // Nothing happens until we find a matching closing tag.
+          if (preg_match("!<\/$opentag>!i", $chunks[$i], $matches)) {
+            $opentag = '';
+          }
+        }
+      }
+    }
 
-  // Match www domains/addresses.
-  $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
-  $text = substr($text, 1, -1);
+  $text = implode($chunks);
+  }
 
   return $text;
 }
@@ -755,8 +808,9 @@ function _filter_htmlcorrector($text) {
 
   // Split tags from text.
   $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
-  // Note: PHP ensures the array consists of alternating delimiters and literals
-  // and begins and ends with a literal (inserting $null as required).
+  // Note: The return value of preg_split() consists of an array of
+  // alternating delimiters and literals that begins and ends with a
+  // literal (inserting NULL as required).
 
   $tag = false; // Odd/even counter. Tag or no tag.
   $stack = array();
@@ -811,37 +865,40 @@ function _filter_htmlcorrector($text) {
 }
 
 /**
- * Make links out of absolute URLs.
+ * Callback function. Make links out of absolute URLs.
  */
 function _filter_url_parse_full_links($match) {
-  $match[2] = decode_entities($match[2]);
-  $caption = check_plain(_filter_url_trim($match[2]));
-  $match[2] = check_url($match[2]);
-  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[5];
+  // Find the capturing parenthesis in the regexp containing the URL.
+  $i = 1;
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_url($match[$i]);
+  return '<a href="' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
 }
 
 /**
- * Make links out of domain names starting with "www."
+ * Callback function. Make links out of domain names starting with "www.".
  */
 function _filter_url_parse_partial_links($match) {
-  $match[2] = decode_entities($match[2]);
-  $caption = check_plain(_filter_url_trim($match[2]));
-  $match[2] = check_plain($match[2]);
-  return $match[1] . '<a href="http://' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[3];
+  // Find the parenthesis in the regexp containing the URL.
+  $i = 1;
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_plain($match[$i]);
+  return '<a href="http://' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
 }
 
 /**
  * Shortens long URLs to http://www.example.com/long/url...
  */
-function _filter_url_trim($text, $length = NULL) {
-  static $_length;
-  if ($length !== NULL) {
-    $_length = $length;
+function _filter_url_trim($text) {
+  static $length;
+  if ($length === NULL) {
+    $length = variable_get('filter_url_length_' . $format, 72);
   }
 
-  // Use +3 for '...' string length.
-  if (strlen($text) > $_length + 3) {
-    $text = substr($text, 0, $_length) . '...';
+  if (strlen($text) > $length) {
+    $text = substr($text, 0, $length) . '...';
   }
 
   return $text;
@@ -860,8 +917,10 @@ function _filter_autop($text) {
   // up code. We look for matched pairs and allow basic nesting. For example:
   // "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
   $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
-  // Note: PHP ensures the array consists of alternating delimiters and literals
-  // and begins and ends with a literal (inserting NULL as required).
+  // Note: The return value of preg_split() consists of an array of
+  // alternating delimiters and literals that begins and ends with a
+  // literal (inserting NULL as required).
+    
   $ignore = FALSE;
   $ignoretag = '';
   $output = '';
