? .DS_Store
? locale_confirm_280628_7x_13.patch
? required-hidden.patch
? urlfilter-80.patch
? modules/.DS_Store
? modules/aggregator/.DS_Store
? modules/block/.DS_Store
? modules/book/.DS_Store
? sites/all/.DS_Store
? sites/all/README.txt
? sites/all/modules
? sites/default/files
? sites/default/settings.php
Index: modules/filter/filter.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/filter/filter.module,v
retrieving revision 1.217
diff -u -p -r1.217 filter.module
--- modules/filter/filter.module	24 Jul 2008 16:25:17 -0000	1.217
+++ modules/filter/filter.module	9 Aug 2008 15:17:05 -0000
@@ -716,27 +716,150 @@ function _filter_url_settings($format) {
 /**
  * URL filter. Automatically converts text web addresses (URLs, e-mail addresses,
  * ftp links, etc.) into hyperlinks.
+ * 
+ * This filter identifies and makes clickable links of 3 types of "links".
+ *  - URL's like http://www.example.com.
+ *  - E-mail addresses like name@example.com.
+ *  - Web addresses without the "http://" protocol defined, like www.example.com.
+ * Each type must be processed separately, as there is no one regular expression
+ * that could possibly match all of the cases in one pass.
  */
 function _filter_url($text, $format) {
-  // Pass length to regexp callback
-  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
+  // List of tags - the content of which must be skipped.
+  $ignore_tags = 'a|script|style|code';
 
-  $text = ' ' . $text . ' ';
+  // Create an array which contains the regexps for each type of link.
+  // The key to the regexp is the name of a function that is used as
+  // callback function to process matches of the regexp. The callback function
+  // is to return the replacement for the match. The array is used and 
+  // matching/replacement done below inside some loops.
+  $tasks = NULL;
 
   // Match absolute URLs.
-  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
+  $protocols = 'http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://';
+  $url_pattern = "(?:$protocols)(?:[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])";
+  $replacement = "`($url_pattern)([\.\,\?\!]*?)`i";
+  $tasks['_filter_url_parse_full_links'] = $replacement;
 
   // Match e-mail addresses.
-  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
+  // Note: The ICANN seems to be on track towards accepting more diverse top level domains,
+  // so this pattern has been "future-proofed" to allow for TLD's of length 2-64.
+  $url_pattern = '[A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,64}';
+  $replacement = "`($url_pattern)`i";
+  $tasks['_filter_url_parse_email_links'] = $replacement;
 
   // Match www domains/addresses.
-  $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
-  $text = substr($text, 1, -1);
+  $url_pattern = 'www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-]';
+  $replacement = "`($url_pattern)([\.\,\?\!]*?)`i";
+  $tasks['_filter_url_parse_partial_links'] = $replacement;
+
+  // Pass length to regexp callback.
+  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
+
+  // We need to process each case of replacement type separately.
+  // The text must be joined and split again after each
+  // replacement, since replacements create new HTML tags and the new
+  // tags must be correctly protected before the next replacement can be done.
+  foreach ($tasks as $task => $replacement) {
+    // Split at all tags.
+    // This ensures that nothing that is a tagname or attribute will be processed.
+    $chunks = preg_split('/(<.+?>)/i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+    // Note: PHP ensures the array consists of alternating delimiters and literals
+    // and begins and ends with a literal (inserting NULL as required).
+    // Therefore, first chunk is always text:
+    $chunk_type = 'text';
+    // Tags to ignore are defined in $ignore_tags (see above).
+    // If an ignoretag is found, it is stored here and removed only when the
+    // closing tag is found. Until the closing tag is found, no replacements are made.
+    $open_tag = '';
+
+    for ($i = 0; $i < count($chunks); $i++) {
+      if ($chunk_type == 'text') {
+        // Only do replacements when there are no unclosed ignoretags.
+        if ($open_tag == '') {
+          // This is the high point of this function! If there is a match,
+          // a link is created in the callback function named by $task.
+          $chunks[$i] = preg_replace_callback($replacement, $task, $chunks[$i]);
+        }
+        // Done processing text chunk, so next chunk is a tag.
+        $chunk_type = 'tag';
+      }
+      else {
+        if ($open_tag == '') {
+          // No open ignoretags. Process this tag...
+          if (preg_match("`<($ignore_tags)(?:\s|>)`i", $chunks[$i], $matches)) {
+            // This matches one of the $ignore_tags.
+            // Catch and store the tag in question.
+            $open_tag = $matches[1];
+          }
+        }
+        else {
+          // There is an $ignoretag open. See if this is a matching closing tag.
+          // Nothing else is done until we find the closing tag.
+          if (preg_match("`<\/$open_tag>`i", $chunks[$i], $matches)) {
+            $open_tag = '';
+          }
+        }
+        // Done processing tag chunk, so next chunk is text.
+        $chunk_type = 'text';
+      }
+    }
+    $text = implode($chunks);
+  }
 
   return $text;
 }
 
 /**
+ * Make links out of absolute URLs.
+ * Callback function. Make links out of absolute URLs.
+ */
+function _filter_url_parse_full_links($match) {
+  $match[2] = decode_entities($match[2]);
+  $caption = check_plain(_filter_url_trim($match[2]));
+  $match[2] = check_url($match[2]);
+  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[5];
+  // The $i:th parenthesis in the regexp contains the URL.
+  $i = 1;
+
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_url($match[$i]);
+  return '<a href="' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
+}
+
+/**
+ * Make links out of domain names starting with "www."
+ * Callback function. Make links out of e-mail addresses.
+ */
+function _filter_url_parse_email_links($match) {
+  // The $i:th parenthesis in the regexp contains the URL.
+  $i = 0;
+
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_url($match[$i]);
+  return '<a href="mailto:' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>';
+}
+
+/**
+ * Callback function. Make links out of domain names starting with "www.".
+ */
+function _filter_url_parse_partial_links($match) {
+  $match[2] = decode_entities($match[2]);
+  $caption = check_plain(_filter_url_trim($match[2]));
+  $match[2] = check_plain($match[2]);
+  return $match[1] . '<a href="http://' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[3];
+  // The $i:th parenthesis in the regexp contains the URL.
+  $i = 1;
+
+  $match[$i] = decode_entities($match[$i]);
+  $caption = check_plain(_filter_url_trim($match[$i]));
+  $match[$i] = check_plain($match[$i]);
+  return '<a href="http://' . $match[$i] . '" title="' . $match[$i] . '">' . $caption . '</a>' . $match[$i+1];
+}
+
+/**
  * Scan input and make sure that all HTML tags are properly closed and nested.
  */
 function _filter_htmlcorrector($text) {
