--- modules/filter/filter.module.org	2008-05-06 15:18:47.000000000 +0300
+++ modules/filter/filter.module	2008-07-08 00:02:50.000000000 +0300
@@ -718,20 +718,92 @@
  * ftp links, etc.) into hyperlinks.
  */
 function _filter_url($text, $format) {
-  // Pass length to regexp callback
-  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
-
-  $text = ' ' . $text . ' ';
+  // List of tags - the content of which must be skipped.
+  $ignoretags = 'a|script|style|code';
 
+  // This filter identifies and makes clickable links of 3 types of "links".
+  // 1) URL's like http://www.example.com.
+  // 2) e-mail addresses like name@example.com.
+  // 3) Web addresses without the "http://" protocol defined, like www.example.com.
+  // Each type must be processed separately, as there is no one regular expression
+  // that could possibly match all of the cases in one pass.
+  //
+  // Create an array which contains the regexps for each type of link.
+  // The key to the regexp is the name of a function that is used as
+  // callback function to process matches of the regexp. The callback function
+  // is to return the replacement for the match.
+  // The array is used and matching/replacement done below inside some loops.
+  $tasks = NULL;
   // Match absolute URLs.
-  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
-
+  $protocols = 'http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://';
+  $urlpattern = "(?:$protocols)(?:[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])";
+  $re = "`($urlpattern)([\.\,\?\!]*?)`i";
+  $tasks['_filter_url_parse_full_links'] = $re;
   // Match e-mail addresses.
-  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
-
+  // Note: The ICANN seems to be on track towards accepting more diverse top level domains,
+  // so this pattern has been "future-proofed" to allow for TLD's of length 2-64.
+  $urlpattern = '[A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,64}';
+  $re = "`($urlpattern)`i";
+  $tasks['_filter_url_parse_email_links'] = $re;
   // Match www domains/addresses.
-  $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
-  $text = substr($text, 1, -1);
+  $urlpattern = 'www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-]';
+  $re = "`($urlpattern)([\.\,\?\!]*?)`i";
+  $tasks['_filter_url_parse_partial_links'] = $re;
+
+
+  // Pass length to regexp callback.
+  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
+
+  // We need to process each case of replacement type separately.
+  // The text must be joined and split again after each
+  // replacement, since replacements create new HTML tags and the new
+  // tags must be correctly protected before the next replacement can be done.
+  foreach ($tasks as $task => $re) {
+    // Split at all tags.
+    // This ensures that nothing that is a tagname or attribute will be processed.
+    $chunks = preg_split('/(<.+?>)/i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+    // Note: PHP ensures the array consists of alternating delimiters and literals
+    // and begins and ends with a literal (inserting NULL as required).
+    // Therefore, first chunk is always text:
+    $chunk_type = 'text';
+    // Tags to ignore are defined in $ignoretags (see above).
+    // If an ignoretag is found, it is stored here and removed only when the
+    // closing tag is found. Until the closing tag is found, no replacements are made.
+    $opentag = '';
+
+    for ($i = 0; $i < count($chunks); $i++) {
+      if ($chunk_type == 'text') {
+        // Only do replacements when there are no unclosed ignoretags.
+        if ($opentag == '') {
+          // This is the high point of this function! If there is a match,
+          // a link is created in the callback function named by $task.
+          $chunks[$i] = preg_replace_callback($re, $task, $chunks[$i]);
+        }
+        // Done processing text chunk, so next chunk is a tag.
+        $chunk_type = 'tag';
+      }
+      else {
+        if ($opentag == '') {
+          // No open ignoretags. Process this tag...
+          if (preg_match("`<($ignoretags)(?:\s|>)`i", $chunks[$i], $matches)) {
+            // This matches one of the $ignoretags.
+            // Catch and store the tag in question.
+            $opentag = $matches[1];
+          }
+        }
+        else {
+          // There is an $ignoretag open. See if this is a matching closing tag.
+          // Nothing else is done until we find the closing tag.
+          if (preg_match("`<\/$opentag>`i", $chunks[$i], $matches)) {
+            $opentag = '';
+          }
+        }
+        // Done processing tag chunk, so next chunk is text.
+        $chunk_type = 'text';
+      }
+    }
+  $text = implode($chunks);
+  }
 
   return $text;
 }
@@ -811,23 +883,42 @@
 }
 
 /**
- * Make links out of absolute URLs.
+ * Callback function. Make links out of absolute URLs.
  */
 function _filter_url_parse_full_links($match) {
-  $match[2] = decode_entities($match[2]);
-  $caption = check_plain(_filter_url_trim($match[2]));
-  $match[2] = check_url($match[2]);
-  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[5];
+  // The $i:th parenthesis in the regexp contains the URL.
+  $i = 1;
+
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_url($match[$i]);
+  return '<a href="' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
 }
 
 /**
- * Make links out of domain names starting with "www."
+ * Callback function. Make links out of e-mail addresses.
+ */
+function _filter_url_parse_email_links($match) {
+  // The $i:th parenthesis in the regexp contains the URL.
+  $i = 0;
+
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_url($match[$i]);
+  return '<a href="mailto:' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>';
+}
+
+/**
+ * Callback function. Make links out of domain names starting with "www.".
  */
 function _filter_url_parse_partial_links($match) {
-  $match[2] = decode_entities($match[2]);
-  $caption = check_plain(_filter_url_trim($match[2]));
-  $match[2] = check_plain($match[2]);
-  return $match[1] . '<a href="http://' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[3];
+  // The $i:th parenthesis in the regexp contains the URL.
+  $i = 1;
+
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_plain($match[$i]);
+  return '<a href="http://' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
 }
 
 /**
@@ -839,8 +930,7 @@
     $_length = $length;
   }
 
-  // Use +3 for '...' string length.
-  if (strlen($text) > $_length + 3) {
+  if (strlen($text) > $_length) {
     $text = substr($text, 0, $_length) . '...';
   }
 
