Index: includes/update.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/update.inc,v
retrieving revision 1.44
diff -u -p -r1.44 update.inc
--- includes/update.inc	21 Apr 2010 07:05:44 -0000	1.44
+++ includes/update.inc	22 Apr 2010 14:09:03 -0000
@@ -648,6 +648,13 @@ function update_parse_db_url($db_url) {
       'host' => urldecode($url['host']),
       'port' => isset($url['port']) ? urldecode($url['port']) : '',
     );
+    // Determine default database collation for MySQL. Borrowed from phpMyAdmin.
+    if ($databases[$database]['default']['driver'] == 'mysql') {
+      $collation = db_query('SELECT DEFAULT_COLLATION_NAME FROM information_schema.SCHEMATA WHERE SCHEMA_NAME = :database', array(':database' => $databases[$database]['default']['database']))->fetchField();
+      if ($collation) {
+        $databases[$database]['default']['collation'] = $collation;
+      }
+    }
   }
   return $databases;
 }
Index: includes/database/mysql/database.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/database/mysql/database.inc,v
retrieving revision 1.26
diff -u -p -r1.26 database.inc
--- includes/database/mysql/database.inc	7 Mar 2010 08:03:44 -0000	1.26
+++ includes/database/mysql/database.inc	22 Apr 2010 14:09:03 -0000
@@ -25,6 +25,10 @@ class DatabaseConnection_mysql extends D
       $connection_options['port'] = 3306;
     }
 
+    if (empty($connection_options['collation'])) {
+      $connection_options['collation'] = 'utf8_unicode_ci';
+    }
+
     $this->connectionOptions = $connection_options;
 
     $dsn = 'mysql:host=' . $connection_options['host'] . ';port=' . $connection_options['port'] . ';dbname=' . $connection_options['database'];
@@ -37,8 +41,11 @@ class DatabaseConnection_mysql extends D
       PDO::ATTR_CASE => PDO::CASE_LOWER,
     ));
 
-    // Force MySQL to use the UTF-8 character set by default.
-    $this->exec('SET NAMES "utf8"');
+    // Force MySQL to use the UTF-8 character set and the 'utf8_unicode_ci'
+    // collation for the connection. MySQL defaults to 'utf8_general_ci' for
+    // utf8, but Drupal defaults to 'utf8_unicode_ci' to avoid duplicate key
+    // errors for data stored in certain languages.
+    $this->exec('SET NAMES utf8 COLLATE ' . $connection_options['collation']);
 
     // Force MySQL's behavior to conform more closely to SQL standards.
     // This allows Drupal to run almost seamlessly on many different
Index: includes/database/mysql/schema.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/database/mysql/schema.inc,v
retrieving revision 1.36
diff -u -p -r1.36 schema.inc
--- includes/database/mysql/schema.inc	7 Apr 2010 15:07:59 -0000	1.36
+++ includes/database/mysql/schema.inc	22 Apr 2010 14:09:12 -0000
@@ -74,10 +74,11 @@ class DatabaseSchema_mysql extends Datab
    *   An array of SQL statements to create the table.
    */
   protected function createTableSql($name, $table) {
-    // Provide some defaults if needed
+    // Provide some defaults if needed.
     $table += array(
       'mysql_engine' => 'InnoDB',
-      'mysql_character_set' => 'UTF8',
+      'mysql_character_set' => 'utf8',
+      'mysql_collation' => $this->connection->connectionOptions['collation'],
     );
 
     $sql = "CREATE TABLE {" . $name . "} (\n";
@@ -96,7 +97,12 @@ class DatabaseSchema_mysql extends Datab
     // Remove the last comma and space.
     $sql = substr($sql, 0, -3) . "\n) ";
 
-    $sql .= 'ENGINE = ' . $table['mysql_engine'] . ' DEFAULT CHARACTER SET ' . $table['mysql_character_set'];
+    // Always add a character set specifying the default collation. By default,
+    // MySQL uses the default database collation for new tables, which is
+    // 'utf8_general_ci' for utf8. However, Drupal defaults to 'utf8_unicode_ci'
+    // to avoid duplicate key errors for data stored in certain languages.
+    // @see DatabaseConnection_mysql
+    $sql .= 'ENGINE = ' . $table['mysql_engine'] . ' DEFAULT CHARACTER SET ' . $table['mysql_character_set'] . ' COLLATE ' . $table['mysql_collation'];
 
     // Add table comment.
     if (!empty($table['description'])) {
Index: sites/default/default.settings.php
===================================================================
RCS file: /cvs/drupal/drupal/sites/default/default.settings.php,v
retrieving revision 1.44
diff -u -p -r1.44 default.settings.php
--- sites/default/default.settings.php	7 Apr 2010 15:07:59 -0000	1.44
+++ sites/default/default.settings.php	22 Apr 2010 14:09:03 -0000
@@ -53,7 +53,7 @@
  *
  * Each database connection is specified as an array of settings,
  * similar to the following:
- *
+ * @code
  * array(
  *   'driver' => 'mysql',
  *   'database' => 'databasename',
@@ -61,7 +61,9 @@
  *   'password' => 'password',
  *   'host' => 'localhost',
  *   'port' => 3306,
+ *   'collation' => 'utf8_unicode_ci',
  * );
+ * @endcode
  *
  * The "driver" property indicates what Drupal database driver the
  * connection should use.  This is usually the same as the name of the
@@ -85,11 +87,12 @@
  * fall back to the single master server.
  *
  * The general format for the $databases array is as follows:
- *
+ * @code
  * $databases['default']['default'] = $info_array;
  * $databases['default']['slave'][] = $info_array;
  * $databases['default']['slave'][] = $info_array;
  * $databases['extra']['default'] = $info_array;
+ * @endcode
  *
  * In the above example, $info_array is an array of settings described above.
  * The first line sets a "default" database that has one master database
@@ -99,7 +102,7 @@
  * "extra".
  *
  * For a single database configuration, the following is sufficient:
- *
+ * @code
  * $databases['default']['default'] = array(
  *   'driver' => 'mysql',
  *   'database' => 'databasename',
@@ -107,6 +110,7 @@
  *   'password' => 'password',
  *   'host' => 'localhost',
  * );
+ * @endcode
  *
  * You can optionally set prefixes for some or all database table names
  * by using the $db_prefix setting. If a prefix is specified, the table
@@ -115,14 +119,15 @@
  * are desired, leave it as an empty string ''.
  *
  * To have all database names prefixed, set $db_prefix as a string:
- *
+ * @code
  *   $db_prefix = 'main_';
+ * @endcode
  *
  * To provide prefixes for specific tables, set $db_prefix as an array.
  * The array's keys are the table names and the values are the prefixes.
  * The 'default' element holds the prefix for any tables not specified
  * elsewhere in the array. Example:
- *
+ * @code
  *   $db_prefix = array(
  *     'default'   => 'main_',
  *     'users'      => 'shared_',
@@ -130,13 +135,14 @@
  *     'role'      => 'shared_',
  *     'authmap'   => 'shared_',
  *   );
+ * @endcode
  *
  * You can also use db_prefix as a reference to a schema/database. This maybe
  * useful if your Drupal installation exists in a schema that is not the default
  * or you want to access several databases from the same code base at the same 
  * time.
  * Example:
- *
+ * @code
  *  $db_prefix = array(
  *    'default' => 'main.',
  *     'users'      => 'shared.',
@@ -144,16 +150,19 @@
  *     'role'      => 'shared.',
  *     'authmap'   => 'shared.',
  *  );
+ * @endcode
  *
  * NOTE: MySQL and SQLite's definition of a schema is a database.
  *
  * Database configuration format:
+ * @code
  *   $databases['default']['default'] = array(
  *     'driver' => 'mysql',
  *     'database' => 'databasename',
  *     'username' => 'username',
  *     'password' => 'password',
  *     'host' => 'localhost',
+ *     'collation' => 'utf8_unicode_ci',
  *   );
  *   $databases['default']['default'] = array(
  *     'driver' => 'pgsql',
@@ -166,6 +175,7 @@
  *     'driver' => 'sqlite',
  *     'database' => '/path/to/databasefilename',
  *   );
+ * @endcode
  */
 $databases = array();
 $db_prefix = '';
