Index: includes/language.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/language.inc,v retrieving revision 1.14.2.2 diff -u -p -r1.14.2.2 language.inc --- includes/language.inc 22 Mar 2010 12:18:04 -0000 1.14.2.2 +++ includes/language.inc 7 Oct 2010 14:50:44 -0000 @@ -70,32 +70,75 @@ function language_initialize() { * Identify language from the Accept-language HTTP header we got. */ function language_from_browser() { - // Specified by the user via the browser's Accept Language setting + // Get enabled language and create an array of valid language objects. + $languages = language_list('enabled'); + $languages = $languages['1']; + + if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { + return; + } + + // RFC 2616 (section 14.4) defines the Accept-Language header as followed: + // Accept-Language = "Accept-Language" ":" + // 1#( language-range [ ";" "q" "=" qvalue ] ) + // language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5" $browser_langs = array(); - - if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { - $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']); - for ($i = 0; $i < count($browser_accept); $i++) { - // The language part is either a code or a code with a quality. - // We cannot do anything with a * code, so it is skipped. - // If the quality is missing, it is assumed to be 1 according to the RFC. - if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($browser_accept[$i]), $found)) { - $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0); - } + if (preg_match_all('@([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches, PREG_SET_ORDER)) { + foreach ($matches as $match) { + // We can safely use strtolower() here, tags are ASCII. + // RFC2616 mandates that the decimal part is no more than three digits, + // so we multiply the qvalue by 1000 to avoid floating point comparisons. + $langcode = strtolower($match[1]); + $qvalue = isset($match[2]) ? (float) $match[2] : 1; + $browser_langs[$langcode] = (int) ($qvalue * 1000); } } - // Order the codes by quality + // Some browsers (especially some versions of Internet Explorer) sometimes + // send a specific language tag (fr-CA) without the corresponding generic + // tag (fr). In that case, we assume that the lowest value of the specific + // tags is the value of the generic language. arsort($browser_langs); + foreach ($browser_langs as $langcode => $qvalue) { + $generic_tag = strtok($langcode, '-'); + if (!isset($browser_langs[$generic_tag])) { + $browser_langs[$generic_tag] = $qvalue; + } + } - // Try to find the first preferred language we have - $languages = language_list('enabled'); - foreach ($browser_langs as $langcode => $q) { - if (isset($languages['1'][$langcode])) { - return $languages['1'][$langcode]; + // Find the enabled language with the greatest qvalue, following the rules + // of RFC 2616 (section 14.4). If several languages have the same qvalue, + // prefer the one with the greatest weight. + $best_match = NULL; + $max_qvalue = 0; + foreach ($languages as $langcode => $language) { + // Language tags are case insensitive (RFC2616, sec 3.10). + $langcode = strtolower($langcode); + + // If nothing matches below, the default qvalue is the one of the wildcard + // language, if set, or is 0 (which will never match). + $qvalue = isset($browser_langs['*']) ? $browser_langs['*'] : 0; + + // Find the longest possible prefix of the browser-supplied language + // ('the language-range') that matches this site language ('the language tag'). + $prefix = $langcode; + do { + if (isset($browser_langs[$prefix])) { + $qvalue = $browser_langs[$prefix]; + break; + } + } + while ($prefix = substr($prefix, 0, strrpos($prefix, '-'))); + + // Find the best match. + if ($qvalue > $max_qvalue) { + $best_match = $language->language; + $max_qvalue = $qvalue; } } + + return $languages[$best_match]; } /**