#221712 by Damien Tournoud: fix browser language detection. From: Damien Tournoud --- locale.inc | 54 ++++++++++++++++++++++----------- locale/locale.test | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 21 deletions(-) diff --git includes/locale.inc includes/locale.inc index c3f1f25..7fae73d 100644 --- includes/locale.inc +++ includes/locale.inc @@ -770,33 +770,49 @@ function locale_language_from_content() { * A valid language code on success, FALSE otherwise. */ function locale_language_from_browser($languages) { + if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { + return; + } + // Specified by the user via the browser's Accept Language setting // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5" $browser_langs = array(); - - if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { - $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']); - foreach ($browser_accept as $langpart) { - // The language part is either a code or a code with a quality. - // We cannot do anything with a * code, so it is skipped. - // If the quality is missing, it is assumed to be 1 according to the RFC. - if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($langpart), $found)) { - $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0); - } + if (preg_match_all('@([a-zA-Z]{1,8}(?:-[a-zA-Z]{1,8})?|\*)(?:;q=(1(?:\.000)?|0(?:\.[0-9]{0,3})?))?\s*,?\s*@', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches, PREG_SET_ORDER)) { + foreach ($matches as $match) { + // We can safely use strtolower() here, tags are ASCII. + // RFC2616 mandates that the decimal part is no more than three digits, + // so we multiply the qvalue by 1000 to avoid floating point comparisons. + $browser_langs[strtolower($match[1])] = isset($match[2]) ? (int) ((float) $match[2] * 1000) : 1000; } } - // Order the codes by quality - arsort($browser_langs); - - // Try to find the first preferred language we have - foreach ($browser_langs as $langcode => $q) { - if (isset($languages[$langcode])) { - return $langcode; + // Find the enabled language with the greatest qvalue, following the rules + // of RFC2616 (section 14.4). If several languages have the same qvalue, + // prefer the one with the greatest weight. + $best_match = NULL; + $max_qvalue = 0; + foreach ($languages as $langcode => $language) { + // Language tags are case insensitive (RFC2616, sec 3.10). + $langcode = strtolower($langcode); + $qvalue = NULL; + if (isset($browser_langs[$langcode])) { + $qvalue = $browser_langs[$langcode]; + } + else if ((($prefix = strtok($langcode, '-')) != $langcode) && isset($browser_langs[$prefix])) { + $qvalue = $browser_langs[$prefix]; + } + else if (isset($browser_langs['*'])) { + $qvalue = $browser_langs['*']; + } + + if (isset($qvalue)) { + if ($qvalue > $max_qvalue) { + $best_match = $language; + $max_qvalue = $qvalue; + } } } - - return FALSE; + return $best_match; } /** diff --git modules/locale/locale.test modules/locale/locale.test index e5b5e9f..b13296c 100644 --- modules/locale/locale.test +++ modules/locale/locale.test @@ -1034,9 +1034,9 @@ class LocaleUninstallFrenchFunctionalTest extends LocaleUninstallFunctionalTest /** - * Functional tests for the language switching feature. + * Tests for the language switching feature. */ -class LanguageSwitchingFunctionalTest extends DrupalWebTestCase { +class LanguageSwitchingTest extends DrupalWebTestCase { public static function getInfo() { return array( @@ -1109,6 +1109,86 @@ class LanguageSwitchingFunctionalTest extends DrupalWebTestCase { $this->assertIdentical($links, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language list item is marked as active on the language switcher block.')); $this->assertIdentical($anchors, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language anchor is marked as active on the language switcher block.')); } + + /** + * Unit tests for the language_from_browser() function. + */ + function testLanguageFromBrowser() { + // Load the required functions. + require_once DRUPAL_ROOT . '/includes/locale.inc'; + + $languages = array( + // In our test case, 'en' has priority over 'en-US'. + 'en' => (object) array( + 'language' => 'en', + 'enabled' => 1, + 'weight' => 1, + ), + 'en-US' => (object) array( + 'language' => 'en-US', + 'enabled' => 1, + 'weight' => 0.6, + ), + // But 'fr-CA' has priority over 'fr'. + 'fr-CA' => (object) array( + 'language' => 'fr-CA', + 'enabled' => 1, + 'weight' => 0.5, + ), + 'fr' => (object) array( + 'language' => 'fr', + 'enabled' => 1, + 'weight' => 0.4, + ), + // And 'es-MX' is alone. + 'es-MX' => (object) array( + 'language' => 'es-MX', + 'enabled' => 1, + 'weight' => 0.3, + ), + ); + + $test_cases = array( + // Equal qvalue for each language, choose the site prefered one. + 'en,en-US,fr-CA,fr,es-MX' => 'en', + 'fr,en' => 'en', + 'en,fr' => 'en', + 'en-US,fr' => 'en-US', + 'fr,en-US' => 'en-US', + 'fr' => 'fr-CA', + 'fr,es-MX' => 'fr-CA', + 'fr,es' => 'fr-CA', + 'es,fr' => 'fr-CA', + 'es-MX,de' => 'es-MX', + 'de,es-MX' => 'es-MX', + + // Different qvalues. + 'en-US,en;q=0.5,fr;q=0.25' => 'en-US', + 'fr,en;q=0.5' => 'fr-CA', + 'fr,en;q=0.5,fr-CA;q=0.25' => 'fr', + + // Those should be equivalent, because a selector matches all sub-languages + 'es-MX,en;q=0.5' => 'es-MX', + 'es,en;q=0.5' => 'es-MX', + + // Silly wildcards are also valid. + '*,fr-CA;q=0.5' => 'en', + '*,en;q=0.25' => 'fr-CA', + 'en,en-US;q=0.5,fr;q=0.25' => 'en', + 'en-US,en;q=0.5,fr;q=0.25' => 'en-US', + + // Unresolvable cases. + '' => NULL, + 'de,pl' => NULL, + $this->randomName(10) => NULL, + ); + + foreach ($test_cases as $accept_language => $expected_result) { + $_SERVER['HTTP_ACCEPT_LANGUAGE'] = $accept_language; + $result = locale_language_from_browser($languages); + $this->assertIdentical(isset($result->language) ? $result->language : NULL, $expected_result, t("Language selection '@accept-language' selects '@result', result = '@actual'", array('@accept-language' => $accept_language, '@result' => $expected_result, '@actual' => isset($result->language) ? $result->language : 'none'))); + } + } } /**