#221712 by Damien Tournoud: fix browser language detection.

From: Damien Tournoud <damien@tournoud.net>


---
 locale.inc         |   53 +++++++++++++++++++++-----------
 locale/locale.test |   85 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 118 insertions(+), 20 deletions(-)

diff --git includes/locale.inc includes/locale.inc
index c3f1f25..f8ce145 100644
--- includes/locale.inc
+++ includes/locale.inc
@@ -770,33 +770,50 @@ function locale_language_from_content() {
  *   A valid language code on success, FALSE otherwise.
  */
 function locale_language_from_browser($languages) {
+  if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
+    return;
+  }
+
   // Specified by the user via the browser's Accept Language setting
   // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
   $browser_langs = array();
-
-  if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
-    $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
-    foreach ($browser_accept as $langpart) {
-      // The language part is either a code or a code with a quality.
-      // We cannot do anything with a * code, so it is skipped.
-      // If the quality is missing, it is assumed to be 1 according to the RFC.
-      if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($langpart), $found)) {
-        $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0);
-      }
+  if (preg_match_all('@([a-zA-Z]{1,8}(?:-[a-zA-Z]{1,8})?|\*)(?:;q=(1(?:\.000)?|0(?:\.[0-9]{0,3})?))?\s*,?\s*@', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches, PREG_SET_ORDER)) {
+    foreach ($matches as $match) {
+      // We can safely use strtolower() here, tags are ASCII.
+      // RFC2616 mandates that the decimal part is no more than three digits,
+      // so we multiply the qvalue by 1000 to avoid floating point comparisons.
+      $browser_langs[strtolower($match[1])] = isset($match[2]) ? (int) ((float) $match[2] * 1000) : 1000;
     }
   }
 
-  // Order the codes by quality
-  arsort($browser_langs);
-
-  // Try to find the first preferred language we have
-  foreach ($browser_langs as $langcode => $q) {
-    if (isset($languages[$langcode])) {
-      return $langcode;
+  // Find the enabled language with the greatest qvalue, following the rules
+  // of RFC2616 (section 14.4). If several languages have the same qvalue,
+  // prefer the one with the greatest weight.
+  $best_match = NULL;
+  $max_qvalue = 0;
+  foreach ($languages as $langcode => $language) {
+    // Language tags are case insensitive (RFC2616, sec 3.10).
+    $langcode = strtolower($langcode);
+    $qvalue = NULL;
+    if (isset($browser_langs[$langcode])) {
+      $qvalue = $browser_langs[$langcode];
+    }
+    else if ((($prefix = strtok($langcode, '-')) != $langcode) && isset($browser_langs[$prefix])) {
+      $qvalue = $browser_langs[$prefix];
+    }
+    else if (isset($browser_langs['*'])) {
+      $qvalue = $browser_langs['*'];
+    }
+    
+    if (isset($qvalue)) {
+      if ($qvalue > $max_qvalue) {
+        $best_match = $language->language;
+        $max_qvalue = $qvalue;
+      }
     }
   }
 
-  return FALSE;
+  return $best_match;
 }
 
 /**
diff --git modules/locale/locale.test modules/locale/locale.test
index e5b5e9f..cc27324 100644
--- modules/locale/locale.test
+++ modules/locale/locale.test
@@ -1034,9 +1034,9 @@ class LocaleUninstallFrenchFunctionalTest extends LocaleUninstallFunctionalTest
 
 
 /**
- * Functional tests for the language switching feature.
+ * Tests for the language switching feature.
  */
-class LanguageSwitchingFunctionalTest extends DrupalWebTestCase {
+class LanguageSwitchingTest extends DrupalWebTestCase {
 
   public static function getInfo() {
     return array(
@@ -1109,6 +1109,87 @@ class LanguageSwitchingFunctionalTest extends DrupalWebTestCase {
     $this->assertIdentical($links, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language list item is marked as active on the language switcher block.'));
     $this->assertIdentical($anchors, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language anchor is marked as active on the language switcher block.'));
   }
+
+  /**
+   * Unit tests for the language_from_browser() function.
+   */
+  function testLanguageFromBrowser() {
+    // Load the required functions.
+    require_once DRUPAL_ROOT . '/includes/locale.inc';
+
+    $languages = array(
+      // In our test case, 'en' has priority over 'en-US'.
+      'en' => (object) array(
+        'language' => 'en',
+        'enabled' => 1,
+        'weight' => 1,
+      ),
+      'en-US' => (object) array(
+        'language' => 'en-US',
+        'enabled' => 1,
+        'weight' => 0.6,
+      ),
+      // But 'fr-CA' has priority over 'fr'.
+      'fr-CA' => (object) array(
+        'language' => 'fr-CA',
+        'enabled' => 1,
+        'weight' => 0.5,
+      ),
+      'fr' => (object) array(
+        'language' => 'fr',
+        'enabled' => 1,
+        'weight' => 0.4,
+      ),
+      // And 'es-MX' is alone.
+      'es-MX' => (object) array(
+        'language' => 'es-MX',
+        'enabled' => 1,
+        'weight' => 0.3,
+      ),
+    );
+
+    $test_cases = array(
+      // Equal qvalue for each language, choose the site prefered one.
+      'en,en-US,fr-CA,fr,es-MX' => 'en',
+      'fr,en' => 'en',
+      'en,fr' => 'en',
+      'en-US,fr' => 'en-US',
+      'fr,en-US' => 'en-US',
+      'fr' => 'fr-CA',
+      'fr;q=1' => 'fr-CA',
+      'fr,es-MX' => 'fr-CA',
+      'fr,es' => 'fr-CA',
+      'es,fr' => 'fr-CA',
+      'es-MX,de' => 'es-MX',
+      'de,es-MX' => 'es-MX',
+
+      // Different qvalues.
+      'en-US,en;q=0.5,fr;q=0.25' => 'en-US',
+      'fr,en;q=0.5' => 'fr-CA',
+      'fr,en;q=0.5,fr-CA;q=0.25' => 'fr',
+
+      // Those should be equivalent, because a selector matches all sub-languages
+      'es-MX,en;q=0.5' => 'es-MX',
+      'es,en;q=0.5' => 'es-MX',
+
+      // Silly wildcards are also valid.
+      '*,fr-CA;q=0.5' => 'en',
+      '*,en;q=0.25' => 'fr-CA',
+      'en,en-US;q=0.5,fr;q=0.25' => 'en',
+      'en-US,en;q=0.5,fr;q=0.25' => 'en-US',
+
+      // Unresolvable cases.
+      '' => NULL,
+      'de,pl' => NULL,
+      $this->randomName(10) => NULL,
+    );
+
+    foreach ($test_cases as $accept_language => $expected_result) {
+      $_SERVER['HTTP_ACCEPT_LANGUAGE'] = $accept_language;
+      $result = locale_language_from_browser($languages);
+      $this->assertIdentical($result, $expected_result, t("Language selection '@accept-language' selects '@result', result = '@actual'", array('@accept-language' => $accept_language, '@result' => $expected_result, '@actual' => isset($result->language) ? $result->language : 'none')));
+    }
+  }
 }
 
 /**
