/** * Fix the collation used in the database to use utf8. Previously, several tables used the latin1 character set with * latin1_swedish_ci collation. utf8 character sets with utf8_unicode_ci or utf8_bin collation should be used instead. * https://drupal.org/node/772678 * https://drupal.org/node/129432 * https://drupal.org/files/issues/custom.install.txt (original patch from #19 above) - originally written for D6, * converted for D7 here and expanded to consider the collation type (bin or ci). */ function hook_update_N() { global $databases; $ret = array(); // @see db_connect() $database = $databases['default']['default']['database']; // Convert/ensure default database collation. $ret[] = db_query("ALTER DATABASE $database DEFAULT CHARACTER SET utf8 COLLATE utf8_unicode_ci"); // This is probably highly specific to mysql. $schema = db_query("SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH, CHARACTER_SET_NAME, COLLATION_NAME FROM information_schema.columns"); //WHERE information_schema.columns.TABLE_SCHEMA like \"$database\""); // This errors out and I can't explain why... $data = '
'; // Convert tables and columns. foreach ($schema->fetchAll() as $info) { // Make sure we are working with the Drupal database because we queried information_schema directly. if($info->TABLE_SCHEMA == $database && $info->COLLATION_NAME != NULL && $info->CHARACTER_SET_NAME != 'utf8') { $no_length_types = array('longtext', 'bigint', 'datetime', 'int', 'decimal', 'double', 'tinyint', 'smallint', 'float', 'longblob', 'mediumtext'); // Form a data type parameter for the modify clause. Some data types don't represent their corresponding lengths if($info->CHARACTER_MAXIMUM_LENGTH == NULL || in_array($info->DATA_TYPE, $no_length_types)) { $type = $info->DATA_TYPE; } else { $type = $info->DATA_TYPE . '(' . $info->CHARACTER_MAXIMUM_LENGTH . ') '; } // Some columns that weren't previously latin1_bin but probably should be. $exceptions = array('search_total.word', 'file_managed.uri'); // Change "latin1_swedish_ci" To whatever you are converting from if ($info->COLLATION_NAME == 'latin1_swedish_ci' && !in_array($info->TABLE_NAME . "." . $info->COLUMN_NAME, $exceptions)) { $data .= $info->TABLE_NAME .'.'. $info->COLUMN_NAME . " to utf8_unicode_ci
\n"; $ret[] = db_query("ALTER TABLE {" . $info->TABLE_NAME . "} MODIFY $info->COLUMN_NAME $type CHARACTER SET utf8 COLLATE utf8_unicode_ci"); } else { $data .= $info->TABLE_NAME .'.'. $info->COLUMN_NAME . " to utf8_bin
\n"; $ret[] = db_query("ALTER TABLE {" . $info->TABLE_NAME . "} MODIFY $info->COLUMN_NAME $type CHARACTER SET utf8 COLLATE utf8_bin"); } } } watchdog('collate fix', "Updated the collation on database columns: " . $data); return $ret; }