/**
* Fix the collation used in the database to use utf8. Previously, several tables used the latin1 character set with
* latin1_swedish_ci collation. utf8 character sets with utf8_unicode_ci or utf8_bin collation should be used instead.
* https://drupal.org/node/772678
* https://drupal.org/node/129432
* https://drupal.org/files/issues/custom.install.txt (original patch from #19 above) - originally written for D6,
* converted for D7 here and expanded to consider the collation type (bin or ci).
*/
function hook_update_N() {
global $databases;
$ret = array();
// @see db_connect()
$database = $databases['default']['default']['database'];
// Convert/ensure default database collation.
$ret[] = db_query("ALTER DATABASE $database DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci");
// This is probably highly specific to mysql.
$schema = db_query("SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH,
CHARACTER_SET_NAME, COLLATION_NAME
FROM information_schema.columns");
//WHERE information_schema.columns.TABLE_SCHEMA like \"$database\""); // This errors out and I can't explain why...
$data = '
';
// Convert tables and columns.
foreach ($schema->fetchAll() as $info) {
// Make sure we are working with the Drupal database because we queried information_schema directly.
if($info->TABLE_SCHEMA == $database && $info->COLLATION_NAME != NULL && $info->CHARACTER_SET_NAME != 'utf8') {
$no_length_types = array('longtext', 'bigint', 'datetime', 'int', 'decimal', 'double', 'tinyint', 'smallint',
'float', 'longblob', 'mediumtext');
// Form a data type parameter for the modify clause. Some data types don't represent their corresponding lengths
if($info->CHARACTER_MAXIMUM_LENGTH == NULL || in_array($info->DATA_TYPE, $no_length_types)) {
$type = $info->DATA_TYPE;
}
else {
$type = $info->DATA_TYPE . '(' . $info->CHARACTER_MAXIMUM_LENGTH . ') ';
}
// Some columns that weren't previously latin1_bin but probably should be.
$exceptions = array('search_total.word', 'file_managed.uri');
// Change "latin1_swedish_ci" To whatever you are converting from
if ($info->COLLATION_NAME == 'latin1_swedish_ci'
&& !in_array($info->TABLE_NAME . "." . $info->COLUMN_NAME, $exceptions)) {
$data .= $info->TABLE_NAME .'.'. $info->COLUMN_NAME . " to utf8_unicode_ci
\n";
$ret[] = db_query("ALTER TABLE {" . $info->TABLE_NAME . "}
MODIFY $info->COLUMN_NAME $type CHARACTER SET utf8 COLLATE utf8_unicode_ci");
}
else {
$data .= $info->TABLE_NAME .'.'. $info->COLUMN_NAME . " to utf8_bin
\n";
$ret[] = db_query("ALTER TABLE {" . $info->TABLE_NAME . "}
MODIFY $info->COLUMN_NAME $type CHARACTER SET utf8 COLLATE utf8_bin");
}
}
}
watchdog('collate fix', "Updated the collation on database columns: " . $data);
return $ret;
}