Index: link.module
===================================================================
--- link.module	(revision 27292)
+++ link.module	(working copy)
@@ -908,6 +908,65 @@
   return $url;
 }
 
+function _link_unicode_replace($uri) {
+  $l = mb_strlen ($uri);
+  $s = str_repeat (' ', $l);
+  for ($i = 0; $i < $l; ++$i) {
+    $ch = mb_substr ($uri, $i, 1);
+    $s[$i] = strlen ($ch) > 1 ? 'X' : $ch;
+  }
+  return $s;
+}
+
+function _link_filter_var_unicode($uri) {
+  $res = filter_var ($uri, FILTER_VALIDATE_URL);
+  if ($res) {
+    return TRUE;
+  }
+  // Check if it has unicode chars.
+  $l = mb_strlen ($uri);
+  if ($l !== strlen ($uri)) {
+    // Replace wide chars by “X”.
+    $s = _link_unicode_replace($uri);
+    // Re-check now.
+    $res = filter_var($s, FILTER_VALIDATE_URL);
+    if ($res) {    
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+function _link_validate_internal_unicode($text) {
+  $internal_pattern = "/^(?:[a-z0-9_\-+\[\]]+)";
+  $directories = "(?:\/[a-z0-9_\-\.~+%=&,$'#!():;*@\[\]]*)*";
+  // Yes, four backslashes == a single backslash.
+  $query = "(?:\/?\?([?a-z0-9+_|\-\.~\/\\\\%=&,$'():;*@\[\]{} ]*))";
+  $anchor = "(?:#[a-z0-9_\-\.~+%=&,$'():;*@\[\]\/\?]*)";
+  $end = $directories .'?'. $query .'?'. $anchor .'?'.'$/i';
+  if (preg_match($internal_pattern . $end, $text)) {
+    return TRUE;
+  }
+  else {
+    $s = _link_unicode_replace($text);
+    if (preg_match($internal_pattern . $end, $s)) {
+      return TRUE;
+    }
+  }
+
+  $internal_pattern_file = "/^(?:[a-z0-9_\-+\[\]\.]+)$/i";
+  if (preg_match($internal_pattern_file, $text)) {
+    return TRUE;
+  }
+  else {
+    $s = _link_unicode_replace($text);
+    if (preg_match($internal_pattern_file, $s)) {
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
 /**
  * A lenient verification for URLs. Accepts all URLs following RFC 1738 standard
  * for URL formation and all email addresses following the RFC 2368 standard for
@@ -918,116 +977,25 @@
  * the following attributes: protocol, hostname, ip, and port.
  */
 function link_validate_url($text) {
-  $LINK_ICHARS_DOMAIN = (string) html_entity_decode(implode("", array( // @TODO completing letters ...
-    "&#x00E6;", // æ
-    "&#x00C6;", // Æ
-    "&#x00C0;", // À
-    "&#x00E0;", // à
-    "&#x00C1;", // Á
-    "&#x00E1;", // á
-    "&#x00C2;", // Â
-    "&#x00E2;", // â
-    "&#x00E5;", // å
-    "&#x00C5;", // Å
-    "&#x00E4;", // ä
-    "&#x00C4;", // Ä
-    "&#x00C7;", // Ç
-    "&#x00E7;", // ç
-    "&#x00D0;", // Ð
-    "&#x00F0;", // ð
-    "&#x00C8;", // È
-    "&#x00E8;", // è
-    "&#x00C9;", // É
-    "&#x00E9;", // é
-    "&#x00CA;", // Ê
-    "&#x00EA;", // ê
-    "&#x00CB;", // Ë
-    "&#x00EB;", // ë
-    "&#x00CE;", // Î
-    "&#x00EE;", // î
-    "&#x00CF;", // Ï
-    "&#x00EF;", // ï
-    "&#x00F8;", // ø
-    "&#x00D8;", // Ø
-    "&#x00F6;", // ö
-    "&#x00D6;", // Ö
-    "&#x00D4;", // Ô
-    "&#x00F4;", // ô
-    "&#x00D5;",	// Õ
-    "&#x00F5;",	// õ
-    "&#x0152;", // Œ
-    "&#x0153;", // œ
-    "&#x00FC;", // ü
-    "&#x00DC;", // Ü
-    "&#x00D9;", // Ù
-    "&#x00F9;", // ù
-    "&#x00DB;", // Û
-    "&#x00FB;", // û
-    "&#x0178;", // Ÿ
-    "&#x00FF;", // ÿ 
-    "&#x00D1;", // Ñ
-    "&#x00F1;", // ñ
-    "&#x00FE;", // þ
-    "&#x00DE;", // Þ
-    "&#x00FD;", // ý
-    "&#x00DD;", // Ý
-    "&#x00BF;", // ¿
-  )), ENT_QUOTES, 'UTF-8');
-
-  $LINK_ICHARS = $LINK_ICHARS_DOMAIN . (string) html_entity_decode(implode("", array(
-    "&#x00DF;", // ß
-  )), ENT_QUOTES, 'UTF-8');
-  $allowed_protocols = variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal'));
-
-  // Starting a parenthesis group with (?: means that it is grouped, but is not captured
-  $protocol = '((?:'. implode("|", $allowed_protocols) .'):\/\/)';
-  $authentication = "(?:(?:(?:[\w\.\-\+!$&'\(\)*\+,;=" . $LINK_ICHARS . "]|%[0-9a-f]{2})+(?::(?:[\w". $LINK_ICHARS ."\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})*)?)?@)";
-  $domain = '(?:(?:[a-z0-9' . $LINK_ICHARS_DOMAIN . ']([a-z0-9'. $LINK_ICHARS_DOMAIN . '\-_\[\]])*)(\.(([a-z0-9' . $LINK_ICHARS_DOMAIN . '\-_\[\]])+\.)*('. LINK_DOMAINS .'|[a-z]{2}))?)';
-  $ipv4 = '(?:[0-9]{1,3}(\.[0-9]{1,3}){3})';
-  $ipv6 = '(?:[0-9a-fA-F]{1,4}(\:[0-9a-fA-F]{1,4}){7})';
-  $port = '(?::([0-9]{1,5}))';
-
-  // Pattern specific to external links.
-  $external_pattern = '/^'. $protocol .'?'. $authentication .'?('. $domain .'|'. $ipv4 .'|'. $ipv6 .' |localhost)'. $port .'?';
-
-  // Pattern specific to internal links.
-  $internal_pattern = "/^(?:[a-z0-9". $LINK_ICHARS ."_\-+\[\]]+)";
-  $internal_pattern_file = "/^(?:[a-z0-9". $LINK_ICHARS ."_\-+\[\]\.]+)$/i";
-
-  $directories = "(?:\/[a-z0-9". $LINK_ICHARS ."_\-\.~+%=&,$'#!():;*@\[\]]*)*";
-  // Yes, four backslashes == a single backslash.
-  $query = "(?:\/?\?([?a-z0-9". $LINK_ICHARS ."+_|\-\.~\/\\\\%=&,$'():;*@\[\]{} ]*))";
-  $anchor = "(?:#[a-z0-9". $LINK_ICHARS ."_\-\.~+%=&,$'():;*@\[\]\/\?]*)";
-
-  // The rest of the path for a standard URL.
-  $end = $directories .'?'. $query .'?'. $anchor .'?'.'$/i';
-
-  $message_id = '[^@].*@'. $domain;
-  $newsgroup_name = '(?:[0-9a-z+-]*\.)*[0-9a-z+-]*';
-  $news_pattern = '/^news:('. $newsgroup_name .'|'. $message_id .')$/i';
-
-  $user = '[a-zA-Z0-9'. $LINK_ICHARS .'_\-\.\+\^!#\$%&*+\/\=\?\`\|\{\}~\'\[\]]+';
-  $email_pattern = '/^mailto:'. $user .'@'.'(?:'. $domain .'|'. $ipv4 .'|'. $ipv6 .'|localhost)'. $query .'?$/';
-
   if (strpos($text, '<front>') === 0) {
     return LINK_FRONT;
   }
-  if (in_array('mailto', $allowed_protocols) && preg_match($email_pattern, $text)) {
-    return LINK_EMAIL;
+
+  if (_link_filter_var_unicode($text)) {
+    $allowed_protocols = variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal'));
+    if (strpos($text, 'mailto:') === 0) {
+      return in_array('mailto', $allowed_protocols) ? LINK_EMAIL : FALSE;
+    }
+    else if (strpos($text, 'news:') === 0 ) {
+      return in_array('news', $allowed_protocols) ? LINK_NEWS : FALSE;
+    }
+    else {
+      return LINK_EXTERNAL;
+    }
   }
-  if (in_array('news', $allowed_protocols) && preg_match($news_pattern, $text)) {
-    return LINK_NEWS;
+  else if (_link_validate_internal_unicode($text)) {
+    return LINK_INTERNAL;     
   }
-  if (preg_match($internal_pattern . $end, $text)) {
-    return LINK_INTERNAL;
-  }
-  if (preg_match($external_pattern . $end, $text)) {
-    return LINK_EXTERNAL;
-  }
-  if (preg_match($internal_pattern_file, $text)) {
-    return LINK_INTERNAL;
-  }
-
   return FALSE;
 }
 
