diff --git a/core/lib/Drupal/Core/Database/Connection.php b/core/lib/Drupal/Core/Database/Connection.php index 51efaf4..5aa6c6f 100644 --- a/core/lib/Drupal/Core/Database/Connection.php +++ b/core/lib/Drupal/Core/Database/Connection.php @@ -134,6 +134,14 @@ * @var array */ protected $prefixReplace = array(); + + /** + * The character set of this connection. + * + * @var string + */ + protected $charset = 'utf8'; + function __construct($dsn, $username, $password, $driver_options = array()) { // Initialize and prepare the connection prefix. @@ -1172,4 +1180,17 @@ public function commit() { * also larger than the $existing_id if one was passed in. */ abstract public function nextId($existing_id = 0); + + /** + * Fetch the current character set for this connection. + * + * This is primarily important for handling 4-byte UTF8 in MySQL. See the + * documentation in sites/default/default.settings.php for more information. + * @return string + */ + public function charset() { + return $this->charset; + } } + + diff --git a/core/lib/Drupal/Core/Database/Database.php b/core/lib/Drupal/Core/Database/Database.php index 25fe8fa..fb6518d 100644 --- a/core/lib/Drupal/Core/Database/Database.php +++ b/core/lib/Drupal/Core/Database/Database.php @@ -220,6 +220,7 @@ if (empty($value['driver'])) { $database_info[$index][$target] = $database_info[$index][$target][mt_rand(0, count($database_info[$index][$target]) - 1)]; } + $database_info[$index][$target]['index'] = $index; // Parse the prefix information. if (!isset($database_info[$index][$target]['prefix'])) { diff --git a/core/lib/Drupal/Core/Database/Driver/mysql/Connection.php b/core/lib/Drupal/Core/Database/Driver/mysql/Connection.php index c70603c..45519a7 100644 --- a/core/lib/Drupal/Core/Database/Driver/mysql/Connection.php +++ b/core/lib/Drupal/Core/Database/Driver/mysql/Connection.php @@ -36,15 +36,6 @@ class Connection extends DatabaseConnection { */ protected $needsCleanup = FALSE; - /** - * The character set of this connection. - * - * Either utf8 or utf8mb4, but this could be extended to other charsets. - * - * @var string - */ - protected $charset = 'utf8'; - public function __construct(array $connection_options = array()) { // This driver defaults to transaction support, except if explicitly passed FALSE. $this->transactionSupport = !isset($connection_options['transactions']) || ($connection_options['transactions'] !== FALSE); @@ -78,14 +69,30 @@ public function __construct(array $connection_options = array()) { parent::__construct($dsn, $connection_options['username'], $connection_options['password'], $connection_options['pdo']); - // Apply user-defined charset for this connection if set for utf8mb4 support - // (see sites/default/default.settings.php for more on utf8mb4 support). - if (isset($connection_options['charset'])) { - $this->charset = $connection_options['charset']; +// Default to 'utf8', but allow user to change this to + // - any user-defined charset if this is not the default connection + // - any allowed charset if this is the default connection + // See sites/default/default.settings.php for full documentation + // Ideally we would test here to see if the charset is supported, but since + // we don't have a connection yet, we can't test here. So we only test + // during install. + + $default_connection_charsets = array('utf8', 'utf8mb4', 'utf8mb3'); //utf8mb3 is an alias for utf8 + if (isset($connection_options['charset']) && $connection_options['charset'] != 'utf8' ) { + if ($connection_options['index'] != 'default') { + $this->charset = $connection_options['charset']; + } + elseif (in_array($connection_options['charset'], $default_connection_charsets)) { + $this->charset = $connection_options['charset']; + } + else { + drupal_set_message(t('The default connnection charset must be "utf8", "utf8mb3" or "utf8mb4". Currently defined as: ' . $connection_options['charset'] . '. Reverting to "utf8". Please check your charset definition in settings.php'), 'warning'); + } } else { $this->charset = 'utf8'; } + if (!empty($connection_options['collation'])) { $this->exec('SET NAMES ' . $this->charset . ' COLLATE ' . $connection_options['collation']); } @@ -250,22 +257,8 @@ protected function popCommittableTransactions() { } } } - - /** - * Fetch the current character set for this connection. - * - * Drupal defaults to use 3-byte UTF-8 with MySQL, but as of MySQL 5.5.3 it is - * possible to use full 4-byte UTF-8 (utf8mb4). See the documentation in - * sites/default/default.settings.php on how to change this setting. - * - * @return string - */ - public function charset() { - return $this->charset; - } } - /** * @} End of "addtogroup database". */ diff --git a/core/lib/Drupal/Core/Database/Driver/mysql/Install/Tasks.php b/core/lib/Drupal/Core/Database/Driver/mysql/Install/Tasks.php index 0f3085f..5b37c47 100644 --- a/core/lib/Drupal/Core/Database/Driver/mysql/Install/Tasks.php +++ b/core/lib/Drupal/Core/Database/Driver/mysql/Install/Tasks.php @@ -90,13 +90,13 @@ public function validateDatabaseSettings($database) { // Perform standard validation. $errors = parent::validateDatabaseSettings($database); - // If we are using utf8mb4 charset, make sure the database supports it. - if (isset($database['charset']) && $database['charset'] == 'utf8mb4') { - if (!db_query("SHOW CHARACTER SET WHERE Charset = 'utf8mb4'")->rowCount()) { - $errors['mysql_charset'] = st('Your database does not support the utf8mb4 character set'); + // If we are using a custom charset, make sure the database supports it. + if (isset($database['charset']) && $database['charset'] != 'utf8') { + $supported = db_query("SHOW CHARACTER SET LIKE = '".$database['charset']."'")->rowCount(); + if (!$supported) { + $errors['mysql_charset'] = st('Your database does not support the '.$database['charset'].' character set'); } } - return $errors; } } diff --git a/core/lib/Drupal/Core/Database/Driver/mysql/Schema.php b/core/lib/Drupal/Core/Database/Driver/mysql/Schema.php index 01f07de..ebb5737 100644 --- a/core/lib/Drupal/Core/Database/Driver/mysql/Schema.php +++ b/core/lib/Drupal/Core/Database/Driver/mysql/Schema.php @@ -112,10 +112,13 @@ protected function createTableSql($name, $table) { $sql .= 'ENGINE = ' . $table['mysql_engine'] . ' DEFAULT CHARACTER SET ' . $table['mysql_character_set']; // By default, MySQL uses the default collation for new tables, which is // 'utf8_general_ci' for utf8. If an alternate collation has been set, it - // needs to be explicitly specified. + // needs to be explicitly specified for each column, but the default + // collation still needs to be utf8_general_ci. // @see DatabaseConnection_mysql + // This comment and the statement below code below could be deleted as MySQL + // will default to the correct value. Just here until this patch gets sorted. if (!empty($info['collation'])) { - $sql .= ' COLLATE ' . $info['collation']; + $sql .= ' COLLATE ' . 'utf8_general_ci'; } // Add table comment. @@ -138,6 +141,9 @@ protected function createTableSql($name, $table) { * The field specification, as per the schema data structure format. */ protected function createFieldSql($name, $spec) { + + $info = $this->connection->getConnectionOptions(); + $sql = "`" . $name . "` " . $spec['mysql_type']; if (in_array($spec['mysql_type'], array('VARCHAR', 'CHAR', 'TINYTEXT', 'MEDIUMTEXT', 'LONGTEXT', 'TEXT'))) { @@ -158,8 +164,11 @@ protected function createFieldSql($name, $spec) { // If it's a text field, check to see if we should use utf8mb4 (4-byte UTF8) // as the character set. - if (in_array($spec['mysql_type'], array('TINYTEXT', 'MEDIUMTEXT', 'LONGTEXT', 'TEXT')) && Database::getConnection()->charset() == 'utf8mb4') { - $sql .= ' CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci'; + // InnoDB indexes have a max of 767 bytes. This means we can't use 4-byte + // charsets on VARCHAR because there are VARCHAR-based indexes of 255 chars. + if (in_array($spec['mysql_type'], array('TINYTEXT', 'MEDIUMTEXT', 'LONGTEXT', 'TEXT')) && Database::getConnection()->charset() == 'utf8mb4') { + // isset($info['charset']) && $info['charset'] != 'utf8') { + $sql .= ' CHARACTER SET '.$info['charset'].' COLLATE ' . $info['collation']; } if (isset($spec['not null'])) { diff --git a/sites/default/default.settings.php b/sites/default/default.settings.php index 595ccf9..bfc4817 100644 --- a/sites/default/default.settings.php +++ b/sites/default/default.settings.php @@ -169,15 +169,33 @@ * problems when trying to save data that contains high-order UTF8 characters, * such as math symbols and rarer languages. If you have MySQL 5.5.3+, you can * turn on support for 4-byte UTF8 characters in text fields by enabling the - * utf8mb4 character set on all text columns. More information on utf8mb4 can be - * found here: + * 'utf8mb4' character set on all text columns. More information on 'utf8mb4' + * can be found here: * http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html - * - * An example of using the utf8mb4 character set: + * + * Note that if you change your character set, you must also change your + * collation. The collation prefix must match the character set name. See: + * http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-sets.html + * + * Finally, it is possible to use 'utf8mb3' which is currently simply an alias of + * 'utf8', but MySQL reserves the right at a future date to make 'utf8' default + * to a 4-byte character set at which point 'utf8mb3' would specifically + * indicate the legacy 3-byte version. + * + * On the default connection, Drupal only allows use of utf8, utf8mb4 and + * utf8mb3. Other charsets are allowed on other connections at the users risk + * + * An example of setting custom character sets on the default connection and on + * a connection to a legacy database with a Croation charset and collation: * * @code * $databases['default']['default'] = array( - * 'charset' => 'utf8mb4' + * 'charset' => 'utf8mb4', + * 'collation' => 'utf8mb4_general_ci', + * ); + * $databases['legacy-app']['default'] = array( + * 'charset' => 'cp1250', + * 'collation' => 'cp1250_croation_ci', * ); * @endcode *