Fix language parsing and matching from language_from_browser(). From: damz --- bootstrap.inc | 10 +++++- language.inc | 56 ++++++++++++++++++++++------------- locale/locale.test | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 22 deletions(-) diff --git includes/bootstrap.inc includes/bootstrap.inc index 334ef64..64addec 100644 --- includes/bootstrap.inc +++ includes/bootstrap.inc @@ -1278,14 +1278,20 @@ function drupal_init_language() { * Get a list of languages set up indexed by the specified key * * @param $field The field to index the list with. - * @param $reset Boolean to request a reset of the list. + * @param $reset Boolean to request a reset of the list, + * or (internal) an array of languages to feed that function with. */ function language_list($field = 'language', $reset = FALSE) { static $languages = NULL; // Reset language list if ($reset) { - $languages = NULL; + if (is_array($reset)) { + $languages = $reset; + } + else { + $languages = NULL; + } } // Init language list diff --git includes/language.inc includes/language.inc index 29207b5..d3c5ab1 100644 --- includes/language.inc +++ includes/language.inc @@ -70,32 +70,48 @@ function language_initialize() { * Identify language from the Accept-language HTTP header we got. */ function language_from_browser() { - // Specified by the user via the browser's Accept Language setting - // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5" - $browser_langs = array(); - - if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { - $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']); - for ($i = 0; $i < count($browser_accept); $i++) { - // The language part is either a code or a code with a quality. - // We cannot do anything with a * code, so it is skipped. - // If the quality is missing, it is assumed to be 1 according to the RFC. - if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($browser_accept[$i]), $found)) { - $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0); - } - } + if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) { + return; } - // Order the codes by quality - arsort($browser_langs); + $browser_langs = array(); + if (preg_match_all('@([a-zA-Z]{1,8}(?:-[a-zA-Z]{1,8})?|\*)(?:;q=(1(?:\.000)?|0(?:\.[0-9]{0,3})?))?\s*,?\s*@', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches, PREG_SET_ORDER)) { + foreach ($matches as $match) { + // We can safely use strtolower() here, tags are ASCII. + // RFC2616 mandates that the decimal part is no more than three digits, + // so we multiply the qvalue by 1000 to avoid floating point comparisons. + $browser_langs[strtolower($match[1])] = isset($match[2]) ? (int) ((float) $match[2] * 1000) : 1000; + } + } - // Try to find the first preferred language we have + // Find the enabled language with the greatest qvalue, following the rules + // of RFC2616 (section 14.4). If several languages have the same qvalue, + // prefer the one with the greatest weight. $languages = language_list('enabled'); - foreach ($browser_langs as $langcode => $q) { - if (isset($languages['1'][$langcode])) { - return $languages['1'][$langcode]; + $best_match = NULL; + $max_qvalue = 0; + foreach ($languages['1'] as $langcode => $language) { + // Language tags are case insensitive (RFC2616, sec 3.10). + $langcode = strtolower($langcode); + $qvalue = NULL; + if (isset($browser_langs[$langcode])) { + $qvalue = $browser_langs[$langcode]; + } + else if ((($prefix = strtok($langcode, '-')) != $langcode) && isset($browser_langs[$prefix])) { + $qvalue = $browser_langs[$prefix]; + } + else if (isset($browser_langs['*'])) { + $qvalue = $browser_langs['*']; + } + + if (!is_null($qvalue)) { + if ($qvalue > $max_qvalue) { + $best_match = $language; + $max_qvalue = $qvalue; + } } } + return $best_match; } /** diff --git modules/locale/locale.test modules/locale/locale.test index f8e0f9e..e925587 100644 --- modules/locale/locale.test +++ modules/locale/locale.test @@ -1059,6 +1059,90 @@ class LanguageSwitchingFunctionalTest extends DrupalWebTestCase { $this->assertIdentical($links, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language list item is marked as active on the language switcher block.')); $this->assertIdentical($anchors, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language anchor is marked as active on the language switcher block.')); } + + /** + * Unit tests for the language_from_browser() function. + */ + function testLanguageFromBrowser() { + $languages = array( + 'language' => array( + // In our test case, 'en' has priority over 'en-US'. + 'en' => (object) array( + 'language' => 'en', + 'enabled' => 1, + 'weight' => 1, + ), + 'en-US' => (object) array( + 'language' => 'en-US', + 'enabled' => 1, + 'weight' => 0.6, + ), + // But 'fr-CA' has priority over 'fr'. + 'fr-CA' => (object) array( + 'language' => 'fr-CA', + 'enabled' => 1, + 'weight' => 0.5, + ), + 'fr' => (object) array( + 'language' => 'fr', + 'enabled' => 1, + 'weight' => 0.4, + ), + // And 'es-MX' is alone. + 'es-MX' => (object) array( + 'language' => 'es-MX', + 'enabled' => 1, + 'weight' => 0.3, + ), + ) + ); + + drupal_function_exists('language_from_browser'); + + // Initialize the language list. + language_list('language', $languages); + + $test_cases = array( + // Equal qvalue for each language, choose the site prefered one. + 'en,en-US,fr-CA,fr,es-MX' => 'en', + 'fr,en' => 'en', + 'en,fr' => 'en', + 'en-US,fr' => 'en-US', + 'fr,en-US' => 'en-US', + 'fr' => 'fr-CA', + 'fr,es-MX' => 'fr-CA', + 'fr,es' => 'fr-CA', + 'es,fr' => 'fr-CA', + 'es-MX,de' => 'es-MX', + 'de,es-MX' => 'es-MX', + + // Different qvalues. + 'en-US,en;q=0.5,fr;q=0.25' => 'en-US', + 'fr,en;q=0.5' => 'fr-CA', + 'fr,en;q=0.5,fr-CA;q=0.25' => 'fr', + + // Those should be equivalent, because a selector matches all sub-languages + 'es-MX,en;q=0.5' => 'es-MX', + 'es,en;q=0.5' => 'es-MX', + + // Silly wildcards are also valid. + '*,fr-CA;q=0.5' => 'en', + '*,en;q=0.25' => 'fr-CA', + 'en,en-US;q=0.5,fr;q=0.25' => 'en', + 'en-US,en;q=0.5,fr;q=0.25' => 'en-US', + + // Unresolvable cases. + '' => NULL, + 'de,pl' => NULL, + $this->randomName(10) => NULL, + ); + + foreach ($test_cases as $accept_language => $expected_result) { + $_SERVER['HTTP_ACCEPT_LANGUAGE'] = $accept_language; + $result = language_from_browser(); + $this->assertIdentical(isset($result->language) ? $result->language : NULL, $expected_result, t("Language selection '@accept-language' selects '@result', result = '@actual'", array('@accept-language' => $accept_language, '@result' => $expected_result, '@actual' => isset($result->language) ? $result->language : 'none'))); + } + } } /**