diff --git a/search_api_db.install b/search_api_db.install index 43ca1b0..34db15e 100644 --- a/search_api_db.install +++ b/search_api_db.install @@ -108,3 +108,99 @@ function search_api_db_update_7103() { } } } + +/** + * Use a single full text table per index. + */ +function search_api_db_update_7104() { + $servers_query = db_select('search_api_server', 's') + ->condition('s.class', 'search_api_db_service'); + $servers_query->innerJoin('search_api_index', 'i', 'i.server = s.machine_name'); + // todo from this table? + $servers_query->fields('s', array('options')); + $servers_query->fields('i', array('machine_name', 'item_type')); + $servers = $servers_query->execute(); + foreach ($servers as $server) { + $options = unserialize($server->options); + if (!empty($options['indexes'])) { + // Add new table. + $text_table = 'search_api_db_' . $server->machine_name . '_text'; + $table = array( + 'name' => $text_table, + 'module' => 'search_api_db', + 'fields' => array( + 'item_id' => array( + 'description' => 'The primary identifier of the item.', + 'not null' => TRUE, + ), + 'field_name' => array( + 'description' => "Name of the item's field.", + 'not null' => TRUE, + 'type' => 'varchar', + 'length' => 32, + ), + 'word' => array( + 'description' => 'The text of the indexed token.', + 'type' => 'varchar', + 'length' => 50, + 'not null' => TRUE, + ), + 'score' => array( + 'description' => 'The score associated with this token.', + 'type' => 'float', + 'not null' => TRUE, + ), + ), + ); + // The type of the item_id field depends on the ID field's type. + $id_field = search_api_get_datasource_controller($server->item_type)->getIdFieldInfo(); + switch ($id_field['type']) { + case 'text': + case 'string': + case 'uri': + $table['fields']['item_id'] += array('type' => 'varchar', 'length' => 50); + break; + + case 'integer': + case 'duration': + case 'date': + // 'datetime' sucks. Therefore, we just store the timestamp. + $table['fields']['item_id'] += array('type' => 'int', 'size' => 'big'); + break; + + case 'decimal': + $table['fields']['item_id'] += array('type' => 'float'); + break; + + case 'boolean': + $table['fields']['item_id'] += array('type' => 'int', 'size' => 'tiny'); + break; + + default: + throw new SearchApiException(t('Unknown field type @type. Database search module might be out of sync with Search API.', array('@type' => $id_field['type']))); + } + db_create_table($text_table, $table); + global $databases; + list($key, $target) = explode(':', $options['database'], 2); + if ($databases[$key][$target]['driver'] === 'mysql') { + db_query("ALTER TABLE {{$text_table}} CONVERT TO CHARACTER SET 'utf8' COLLATE 'utf8_bin'"); + } + // Migrate data. + foreach ($options['indexes'] as $fields) { + foreach ($fields as $name => $field) { + if (search_api_is_text_type($field['type'])) { + $query = db_select($field['table'], 't') + ->fields('t', array('item_id', 'word', 'score')); + $query->addExpression(':field_name', 'field_name', array(':field_name' => $name)); + db_insert($text_table)->from($query)->execute(); + // todo + // db_drop_table($field['table']); + } + } + } + // Add keys. + db_add_index($text_table, 'word', array(array('word', 20))); + db_add_primary_key($text_table, array('item_id', 'field_name', 'word')); + } + } +} diff --git a/service.inc b/service.inc index d115ed1..f370841 100644 --- a/service.inc +++ b/service.inc @@ -333,53 +333,32 @@ class SearchApiDbService extends SearchApiAbstractService { } } - if ($type == 'text') { - // Text columns are always a separate table. - db_add_field($db['table'], 'word', array( - 'description' => 'The text of the indexed token.', - 'type' => 'varchar', - 'length' => 50, - 'not null' => TRUE, - )); - db_add_field($db['table'], 'score', array( - 'description' => 'The score associated with this token.', - 'type' => 'float', - 'not null' => TRUE, - )); - db_add_index($db['table'], 'word', array(array('word', 10))); - if ($new_table) { - // Add a covering index since word is not repeated for each item. - db_add_primary_key($db['table'], array('item_id', 'word')); - } + if (!isset($db['column'])) { + $db['column'] = 'value'; + } + $db_field = $this->sqlType($type); + $db_field += array( + 'description' => "The field's value for this item.", + ); + db_add_field($db['table'], $db['column'], $db_field); + if ($new_table && search_api_is_list_type($field['type'])) { + $db_field['not null'] = TRUE; + } + if ($db_field['type'] === 'varchar') { + db_add_index($db['table'], $db['column'], array(array($db['column'], 10))); } else { - if (!isset($db['column'])) { - $db['column'] = 'value'; - } - $db_field = $this->sqlType($type); - $db_field += array( - 'description' => "The field's value for this item.", - ); - if ($new_table && search_api_is_list_type($field['type'])) { - $db_field['not null'] = TRUE; - } - db_add_field($db['table'], $db['column'], $db_field); - if ($db_field['type'] === 'varchar') { - db_add_index($db['table'], $db['column'], array(array($db['column'], 10))); + db_add_index($db['table'], $db['column'], array($db['column'])); + } + if ($new_table) { + if (search_api_is_list_type($field['type'])) { + // Add a covering index for lists. + db_add_primary_key($db['table'], array('item_id', $db['column'])); } else { - db_add_index($db['table'], $db['column'], array($db['column'])); - } - if ($new_table) { - if (search_api_is_list_type($field['type'])) { - // Add a covering index for lists. - db_add_primary_key($db['table'], array('item_id', $db['column'])); - } - else { - // Otherwise, a denormalized table with many columns, where we can't - // predict the best covering index. - db_add_primary_key($db['table'], array('item_id')); - } + // Otherwise, a denormalized table with many columns, where we can't + // predict the best covering index. + db_add_primary_key($db['table'], array('item_id')); } } } @@ -432,6 +411,71 @@ class SearchApiDbService extends SearchApiAbstractService { $cleared = FALSE; $change = FALSE; + $prefix = 'search_api_db_' . $index->machine_name; + + // Always make sure the text table exists. + $text_table = $prefix . '_text'; + if (!db_table_exists($text_table)) { + $table = array( + 'name' => $text_table, + 'module' => 'search_api_db', + 'fields' => array( + 'item_id' => array( + 'description' => 'The primary identifier of the item.', + 'not null' => TRUE, + ), + 'field_name' => array( + 'description' => "Name of the item's field.", + 'not null' => TRUE, + 'type' => 'varchar', + 'length' => 32, + ), + 'word' => array( + 'description' => 'The text of the indexed token.', + 'type' => 'varchar', + 'length' => 50, + 'not null' => TRUE, + ), + 'score' => array( + 'description' => 'The score associated with this token.', + 'type' => 'float', + 'not null' => TRUE, + ), + ), + 'indexes' => array( + 'word' => array(array('word', 20)), + ), + // Add a covering index since word is not repeated for each item. + 'primary key' => array('item_id', 'field_name', 'word'), + ); + // The type of the item_id field depends on the ID field's type. + $id_field = $index->datasource()->getIdFieldInfo(); + $table['fields']['item_id'] += $this->sqlType($id_field['type'] == 'text' ? 'string' : $id_field['type']); + if (isset($table['fields']['item_id']['length'])) { + // A length of 255 is overkill for IDs. 50 should be more than enough. + $table['fields']['item_id']['length'] = 50; + } + db_create_table($text_table, $table); + + // Some DBMSs will need a character encoding and collation set. + global $databases; + list($key, $target) = explode(':', $this->options['database'], 2); + $db_driver = $databases[$key][$target]['driver']; + + switch ($db_driver) { + case 'mysql': + db_query("ALTER TABLE {{$text_table}} CONVERT TO CHARACTER SET 'utf8' COLLATE 'utf8_bin'"); + break; + + // @todo Add fixes for other DBMSs. + case 'oracle': + case 'pgsql': + case 'sqlite': + case 'sqlsrv': + break; + } + } + foreach ($fields as $name => $field) { if (!isset($new_fields[$name])) { // The field is no longer in the index, drop the data. @@ -485,27 +529,28 @@ class SearchApiDbService extends SearchApiAbstractService { } // Make sure the table and column now exist. (Especially important when // we actually add the index for the first time.) - if (!db_table_exists($field['table']) || (isset($field['column']) && !db_field_exists($field['table'], $field['column']))) { + if (!search_api_is_text_type($field['type']) && (!db_table_exists($field['table']) || (isset($field['column']) && !db_field_exists($field['table'], $field['column'])))) { $this->createFieldTable($index, $new_fields[$name], $field); } unset($new_fields[$name]); } - $prefix = 'search_api_db_' . $index->machine_name; // These are new fields that were previously not indexed. foreach ($new_fields as $name => $field) { - $reindex = TRUE; - if ($this->canDenormalize($field)) { - $fields[$name]['table'] = $prefix; - $fields[$name]['column'] = $this->findFreeColumn($fields[$name]['table'], $name); - } - else { - $fields[$name]['table'] = $this->findFreeTable($prefix . '_', $name); + if (!search_api_is_text_type($field['type'])) { + $reindex = TRUE; + if ($this->canDenormalize($field)) { + $fields[$name]['table'] = $prefix; + $fields[$name]['column'] = $this->findFreeColumn($fields[$name]['table'], $name); + } + else { + $fields[$name]['table'] = $this->findFreeTable($prefix . '_', $name); + } + $this->createFieldTable($index, $field, $fields[$name]); + $fields[$name]['type'] = $field['type']; + $fields[$name]['boost'] = $field['boost']; + $change = TRUE; } - $this->createFieldTable($index, $field, $fields[$name]); - $fields[$name]['type'] = $field['type']; - $fields[$name]['boost'] = $field['boost']; - $change = TRUE; } if ($change) { @@ -548,7 +593,7 @@ class SearchApiDbService extends SearchApiAbstractService { if ($this->canDenormalize($field) && isset($field['column'])) { db_drop_field($field['table'], $field['column']); } - else { + elseif (db_table_exists($field['table'])) { db_drop_table($field['table']); } } @@ -679,11 +724,12 @@ class SearchApiDbService extends SearchApiAbstractService { } } if ($words) { - $query = $this->connection->insert($table) - ->fields(array('item_id', 'word', 'score')); + $query = $this->connection->insert('search_api_db_' . $this->machine_name . '_text') + ->fields(array('item_id', 'field_name', 'word', 'score')); foreach ($words as $word) { $query->values(array( 'item_id' => $id, + 'field_name' => $name, 'word' => $word['value'], 'score' => $word['score'] * $boost, )); @@ -876,10 +922,20 @@ class SearchApiDbService extends SearchApiAbstractService { if (empty($this->options['indexes'])) { return; } - foreach ($this->options['indexes'] as $index) { + foreach ($this->options['indexes'] as $machine_name => $index) { + if (is_array($ids)) { + $this->connection->delete('search_api_db_' . $machine_name . '_text') + ->condition('item_id', $ids, 'IN') + ->execute(); + } + else { + $this->connection->truncate('search_api_db_' . $machine_name . '_text')->execute(); + } foreach ($index as $fields) { foreach ($fields as $field) { - $this->connection->truncate($field['table'])->execute(); + if (!search_api_is_text_type($field['type'])) { + $this->connection->truncate($field['table'])->execute(); + } } } } @@ -889,11 +945,21 @@ class SearchApiDbService extends SearchApiAbstractService { if (empty($this->options['indexes'][$index->machine_name])) { return; } + if (is_array($ids)) { + $this->connection->delete('search_api_db_' . $index->machine_name . '_text') + ->condition('item_id', $ids, 'IN') + ->execute(); + } + else { + $this->connection->truncate('search_api_db_' . $index->machine_name . '_text')->execute(); + } foreach ($this->options['indexes'][$index->machine_name] as $field) { if (is_array($ids)) { - $this->connection->delete($field['table']) - ->condition('item_id', $ids, 'IN') - ->execute(); + if (!search_api_is_text_type($field['type'])) { + $this->connection->delete($field['table']) + ->condition('item_id', $ids, 'IN') + ->execute(); + } } else { $this->connection->truncate($field['table'])->execute(); @@ -1027,6 +1093,7 @@ class SearchApiDbService extends SearchApiAbstractService { * If some illegal query setting (unknown field, etc.) was encountered. */ protected function createDbQuery(SearchApiQueryInterface $query, array $fields) { + $index = $query->getIndex(); $keys = &$query->getKeys(); $keys_set = (boolean) $keys; $keys = $this->prepareKeys($keys); @@ -1057,7 +1124,7 @@ class SearchApiDbService extends SearchApiAbstractService { $fulltext_fields[$name] = $fields[$name]; } - $db_query = $this->createKeysQuery($keys, $fulltext_fields, $fields); + $db_query = $this->createKeysQuery($index, $keys, $fulltext_fields, $fields); if (is_array($keys) && !empty($keys['#negation'])) { $db_query->addExpression(':score', 'score', array(':score' => 1)); $db_query->distinct(); @@ -1083,7 +1150,7 @@ class SearchApiDbService extends SearchApiAbstractService { $filter = $query->getFilter(); if ($filter->getFilters()) { - $condition = $this->createFilterCondition($filter, $fields, $db_query); + $condition = $this->createFilterCondition($index, $filter, $fields, $db_query); if ($condition) { $db_query->condition($condition); } @@ -1220,6 +1287,8 @@ class SearchApiDbService extends SearchApiAbstractService { * * Used as a helper method in createDbQuery() and createFilterCondition(). * + * @param $index + * The SearchApiIndex object for this query. * @param $keys * The search keys, formatted like the return value of * SearchApiQueryInterface::getKeys(), but preprocessed according to @@ -1234,7 +1303,7 @@ class SearchApiDbService extends SearchApiAbstractService { * A SELECT query returning item_id and score (or only item_id, if * $keys['#negation'] is set). */ - protected function createKeysQuery($keys, array $fields, array $all_fields) { + protected function createKeysQuery(SearchApiIndex $index, $keys, array $fields, array $all_fields) { if (!is_array($keys)) { $keys = array( '#conjunction' => 'AND', @@ -1242,7 +1311,6 @@ class SearchApiDbService extends SearchApiAbstractService { ); } - $or = db_or(); $neg = !empty($keys['#negation']); $conj = $keys['#conjunction']; $words = array(); @@ -1275,53 +1343,28 @@ class SearchApiDbService extends SearchApiAbstractService { $not_nested = ($subs <= 1 && count($fields) == 1) || ($neg && $conj == 'OR' && !$negated); if ($words) { - if (count($words) > 1) { - $mul_words = TRUE; - foreach ($words as $word) { - $or->condition('word', $word); - } + $db_query = $this->connection->select('search_api_db_' . $index->machine_name . '_text', 't'); + $mul_words = count($words) > 1; + if ($neg_nested) { + $db_query->fields('t', array('item_id', 'word')); } - else { - $word = array_shift($words); + elseif ($neg) { + $db_query->fields('t', array('item_id')); } - foreach ($fields as $name => $field) { - $table = $field['table']; - $query = $this->connection->select($table, 't'); - if ($neg_nested) { - $query->fields('t', array('item_id', 'word')); - } - elseif ($neg) { - $query->fields('t', array('item_id')); - } - elseif ($not_nested) { - $query->fields('t', array('item_id', 'score')); - } - else { - $query->fields('t'); - } - if ($mul_words) { - $query->condition($or); - } - else { - $query->condition('word', $word); - } - - if (!isset($db_query)) { - $db_query = $query; - } - elseif ($not_nested) { - $db_query->union($query, 'UNION'); - } - else { - $db_query->union($query, 'UNION ALL'); - } + elseif ($not_nested) { + $db_query->fields('t', array('item_id', 'score')); + } + else { + $db_query->fields('t'); } + $db_query->condition('word', $words, 'IN'); + $db_query->condition('field_name', array_keys($fields), 'IN'); } if ($nested) { $word = ''; foreach ($nested as $k) { - $query = $this->createKeysQuery($k, $fields, $all_fields); + $query = $this->createKeysQuery($index, $k, $fields, $all_fields); if (!$neg) { $word .= ' '; $var = ':word' . strlen($word); @@ -1395,13 +1438,13 @@ class SearchApiDbService extends SearchApiAbstractService { if ($conj == 'AND') { foreach ($negated as $k) { - $db_query->condition('t.item_id', $this->createKeysQuery($k, $fields, $all_fields), 'NOT IN'); + $db_query->condition('t.item_id', $this->createKeysQuery($index, $k, $fields, $all_fields), 'NOT IN'); } } else { $or = db_or(); foreach ($negated as $k) { - $or->condition('t.item_id', $this->createKeysQuery($k, $fields, $all_fields), 'NOT IN'); + $or->condition('t.item_id', $this->createKeysQuery($index, $k, $fields, $all_fields), 'NOT IN'); } if (isset($old_query)) { $or->condition('t.item_id', $old_query, 'NOT IN'); @@ -1422,6 +1465,8 @@ class SearchApiDbService extends SearchApiAbstractService { * * Used as a helper method in createDbQuery(). * + * @param $index + * The SearchApiIndex object for this query. * @param SearchApiQueryFilterInterface $filter * The filter for which a condition should be created. * @param array $fields @@ -1435,7 +1480,7 @@ class SearchApiDbService extends SearchApiAbstractService { * @throws SearchApiException * If an unknown field was used in the filter. */ - protected function createFilterCondition(SearchApiQueryFilterInterface $filter, array $fields, SelectQueryInterface $db_query) { + protected function createFilterCondition(SearchApiIndex $index, SearchApiQueryFilterInterface $filter, array $fields, SelectQueryInterface $db_query) { $cond = db_condition($filter->getConjunction()); $empty = TRUE; // Store whether a JOIN alrady occurred for a field, so we don't JOIN @@ -1445,7 +1490,7 @@ class SearchApiDbService extends SearchApiAbstractService { $tables = array(); foreach ($filter->getFilters() as $f) { if (is_object($f)) { - $c = $this->createFilterCondition($f, $fields, $db_query); + $c = $this->createFilterCondition($index, $f, $fields, $db_query); if ($c) { $empty = FALSE; $cond->condition($c); @@ -1467,7 +1512,7 @@ class SearchApiDbService extends SearchApiAbstractService { } if (search_api_is_text_type($field['type'])) { $keys = $this->prepareKeys($f[1]); - $query = $this->createKeysQuery($keys, array($field), $fields); + $query = $this->createKeysQuery($index, $keys, array($field), $fields); // We don't need the score, so we remove it. The score might either be // an expression or a field. $query_expressions = &$query->getExpressions();