=== modified file 'modules/node/node.module' --- modules/node/node.module 2007-10-03 17:35:22 +0000 +++ modules/node/node.module 2007-10-04 09:41:44 +0000 @@ -1020,15 +1020,13 @@ function node_search($op = 'search', $ke return t('Content'); case 'reset': - variable_del('node_cron_last'); - variable_del('node_cron_last_nid'); + db_query("UPDATE {search_dataset} SET reindex = %d AND type = 'node'", time()); return; case 'status': - $last = variable_get('node_cron_last', 0); - $last_nid = variable_get('node_cron_last_nid', 0); $total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1')); - $remaining = db_result(db_query('SELECT COUNT(*) FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid WHERE n.status = 1 AND ((GREATEST(n.created, n.changed, c.last_comment_timestamp) = %d AND n.nid > %d ) OR (n.created > %d OR n.changed > %d OR c.last_comment_timestamp > %d))', $last, $last_nid, $last, $last, $last)); + $remaining = db_result(db_query("SELECT COUNT(*) FROM {node} n LEFT JOIN {search_dataset} d ON d.type = 'node' AND d.sid = n.nid WHERE d.sid IS NULL OR d.reindex <> 0")); + return array('remaining' => $remaining, 'total' => $total); case 'admin': @@ -1056,17 +1054,21 @@ function node_search($op = 'search', $ke case 'search': // Build matching conditions - list($join1, $where1) = _db_rewrite_sql(); - $arguments1 = array(); - $conditions1 = 'n.status = 1'; + $where = array('n.status = 1'); + $arguments = array(); + list($join, $where1) = _db_rewrite_sql(); + $join .= " INNER JOIN {node} n ON n.nid = i.sid"; + if ($where1) { + $where[] = $where1; + } if ($type = search_query_extract($keys, 'type')) { $types = array(); foreach (explode(',', $type) as $t) { $types[] = "n.type = '%s'"; - $arguments1[] = $t; + $arguments[] = $t; } - $conditions1 .= ' AND ('. implode(' OR ', $types) .')'; + $where[] = '('. implode(' OR ', $types) .')'; $keys = search_query_insert($keys, 'type'); } @@ -1074,33 +1076,32 @@ function node_search($op = 'search', $ke $categories = array(); foreach (explode(',', $category) as $c) { $categories[] = "tn.tid = %d"; - $arguments1[] = $c; + $arguments[] = $c; } - $conditions1 .= ' AND ('. implode(' OR ', $categories) .')'; - $join1 .= ' INNER JOIN {term_node} tn ON n.vid = tn.vid'; + $where[] = '('. implode(' OR ', $categories) .')'; + $join .= ' INNER JOIN {term_node} tn ON n.vid = tn.vid'; $keys = search_query_insert($keys, 'category'); } // Build ranking expression (we try to map each parameter to a // uniform distribution in the range 0..1). $ranking = array(); - $arguments2 = array(); - $join2 = ''; + $ranking_arguments = array(); // Used to avoid joining on node_comment_statistics twice $stats_join = FALSE; $total = 0; if ($weight = (int)variable_get('node_rank_relevance', 5)) { // Average relevance values hover around 0.15 - $ranking[] = '%d * i.relevance'; - $arguments2[] = $weight; + $ranking[] = '%d * %relevance'; + $ranking_arguments[] = $weight; $total += $weight; } if ($weight = (int)variable_get('node_rank_recent', 5)) { // Exponential decay with half-life of 6 months, starting at last indexed node $ranking[] = '%d * POW(2, (GREATEST(n.created, n.changed, c.last_comment_timestamp) - %d) * 6.43e-8)'; - $arguments2[] = $weight; - $arguments2[] = (int)variable_get('node_cron_last', 0); - $join2 .= ' INNER JOIN {node} n ON n.nid = i.sid LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid'; + $ranking_arguments[] = $weight; + $arguments[] = (int)variable_get('node_cron_last', 0); + $join .= ' LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid'; $stats_join = TRUE; $total += $weight; } @@ -1108,10 +1109,10 @@ function node_search($op = 'search', $ke // Inverse law that maps the highest reply count on the site to 1 and 0 to 0. $scale = variable_get('node_cron_comments_scale', 0.0); $ranking[] = '%d * (2.0 - 2.0 / (1.0 + c.comment_count * %f))'; - $arguments2[] = $weight; - $arguments2[] = $scale; + $ranking_arguments[] = $weight; + $arguments[] = $scale; if (!$stats_join) { - $join2 .= ' LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid'; + $join .= ' LEFT JOIN {node_comment_statistics} c ON c.nid = i.sid'; } $total += $weight; } @@ -1120,15 +1121,18 @@ function node_search($op = 'search', $ke // Inverse law that maps the highest view count on the site to 1 and 0 to 0. $scale = variable_get('node_cron_views_scale', 0.0); $ranking[] = '%d * (2.0 - 2.0 / (1.0 + nc.totalcount * %f))'; - $arguments2[] = $weight; - $arguments2[] = $scale; - $join2 .= ' LEFT JOIN {node_counter} nc ON nc.nid = i.sid'; + $ranking_arguments[] = $weight; + $arguments[] = $scale; + $join .= ' LEFT JOIN {node_counter} nc ON nc.nid = i.sid'; $total += $weight; } - $select2 = (count($ranking) ? implode(' + ', $ranking) : 'i.relevance') .' AS score'; // Do search - $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid '. $join1 .' INNER JOIN {users} u ON n.uid = u.uid', $conditions1 . (empty($where1) ? '' : ' AND '. $where1), $arguments1, $select2, $join2, $arguments2); + $columns = (count($ranking) ? '('. implode(' + ', $ranking) .')' : '%relevance') .' AS score'; + $join .= ' INNER JOIN {users} u ON n.uid = u.uid'; + $sort = ' ORDER BY score'; + $where2 = '('. implode(') AND (', $where) .')'; + $find = do_search($keys, 'node', $columns, $ranking_arguments, $join, $where2, $arguments, $sort); // Load results $results = array(); @@ -1563,59 +1567,46 @@ function node_page_view($node, $cid = NU } /** - * shutdown function to make sure we always mark the last node processed. - */ -function node_update_shutdown() { - global $last_change, $last_nid; - - if ($last_change && $last_nid) { - variable_set('node_cron_last', $last_change); - variable_set('node_cron_last_nid', $last_nid); - } -} - -/** * Implementation of hook_update_index(). */ function node_update_index() { - global $last_change, $last_nid; - - register_shutdown_function('node_update_shutdown'); - - $last = variable_get('node_cron_last', 0); - $last_nid = variable_get('node_cron_last_nid', 0); $limit = (int)variable_get('search_cron_limit', 100); // Store the maximum possible comments per thread (used for ranking by reply count) variable_set('node_cron_comments_scale', 1.0 / max(1, db_result(db_query('SELECT MAX(comment_count) FROM {node_comment_statistics}')))); variable_set('node_cron_views_scale', 1.0 / max(1, db_result(db_query('SELECT MAX(totalcount) FROM {node_counter}')))); - $result = db_query_range('SELECT GREATEST(IF(c.last_comment_timestamp IS NULL, 0, c.last_comment_timestamp), n.changed) as last_change, n.nid FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid WHERE n.status = 1 AND ((GREATEST(n.changed, c.last_comment_timestamp) = %d AND n.nid > %d) OR (n.changed > %d OR c.last_comment_timestamp > %d)) ORDER BY GREATEST(n.changed, c.last_comment_timestamp) ASC, n.nid ASC', $last, $last_nid, $last, $last, 0, $limit); + $result = db_query_range("SELECT n.nid FROM {node} n LEFT JOIN {search_dataset} d ON d.type = 'node' AND d.sid = n.nid WHERE d.sid IS NULL OR d.reindex <> 0 ORDER BY d.reindex ASC, n.nid ASC", 0, $limit); while ($node = db_fetch_object($result)) { - $last_change = $node->last_change; - $last_nid = $node->nid; - $node = node_load($node->nid); + _node_index_node($node); + } +} - // Build the node body. - $node->build_mode = NODE_BUILD_SEARCH_INDEX; - $node = node_build_content($node, FALSE, FALSE); - $node->body = drupal_render($node->content); +/** + * Index a single node + */ +function _node_index_node($node) { + $node = node_load($node->nid); - // Allow modules to modify the fully-built node. - node_invoke_nodeapi($node, 'alter'); + // save the changed time of the most recent indexed node, for the search results half-life calculation + variable_set('node_cron_last', $node->changed); - $text = '

'. check_plain($node->title) .'

'. $node->body; + // Build the node body. + $node->build_mode = NODE_BUILD_SEARCH_INDEX; + $node = node_build_content($node, FALSE, FALSE); + $node->body = drupal_render($node->content); - // Fetch extra data normally not visible - $extra = node_invoke_nodeapi($node, 'update index'); - foreach ($extra as $t) { - $text .= $t; - } + $text = '

'. check_plain($node->title) .'

'. $node->body; - // Update index - search_index($node->nid, 'node', $text); + // Fetch extra data normally not visible + $extra = node_invoke_nodeapi($node, 'update index'); + foreach ($extra as $t) { + $text .= $t; } + + // Update index + search_index($node->nid, 'node', $text); } /** === modified file 'modules/search/search.module' --- modules/search/search.module 2007-10-01 08:50:36 +0000 +++ modules/search/search.module 2007-10-04 09:24:20 +0000 @@ -229,9 +229,11 @@ function search_wipe($sid = NULL, $type } else { db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type); - db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type); - // When re-indexing, keep link references - db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type); + db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type); + // Don't remove links if re-indexing. + if (!$reindex) { + db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + } } } @@ -345,7 +347,7 @@ function search_expand_cjk($matches) { // FIFO queue of characters $chars = array(); // Begin loop - for ($i = 0; $i < $l; ++$i) { + for ($i = 0; $i < $l; ++ $i) { // Grab next character $current = drupal_substr($str, 0, 1); $str = substr($str, strlen($current)); @@ -527,23 +529,26 @@ function search_index($sid, $type, $text $word = (int)ltrim($word, '-0'); } + // Links score mainly for the target. if ($link) { if (!isset($results[$linknid])) { $results[$linknid] = array(); } - $results[$linknid][$word] += $score * $focus; + $results[$linknid][] = $word; + // Reduce score of the link caption in the source. + $focus *= 0.2; } - else { - if (!isset($results[0][$word])) { - $results[0][$word] = 0; - } - $results[0][$word] += $score * $focus; - // Focus is a decaying value in terms of the amount of unique words up to this point. - // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. - $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); + // Fall-through + if (!isset($results[0][$word])) { + $results[0][$word] = 0; } + $results[0][$word] += $score * $focus; + + // Focus is a decaying value in terms of the amount of unique words up to this point. + // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. + $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); } - $tagwords++; + $tagwords ++; // Too many words inside a single tag probably mean a tag was accidentally left open. if (count($tagstack) && $tagwords >= 15) { $tagstack = array(); @@ -558,7 +563,7 @@ function search_index($sid, $type, $text search_wipe($sid, $type, TRUE); // Insert cleaned up data into dataset - db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum); + db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0); // Insert results into search index foreach ($results[0] as $word => $score) { @@ -567,13 +572,80 @@ function search_index($sid, $type, $text } unset($results[0]); - // Now insert links to nodes + // Get all previous links from this item. + $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + $links = array(); + while ($link = db_fetch_object($result)) { + $links[$link->nid] = $link->caption; + } + + // Now store links to nodes. foreach ($results as $nid => $words) { - foreach ($words as $word => $score) { - db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score); - search_dirty($word); + $caption = implode(' ', $words); + if (isset($links[$nid])) { + if ($links[$nid] != $caption) { + // Update the existing link and mark the node for reindexing. + db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid); + search_touch_node($nid); + } + // Unset the link to mark it as processed. + unset($links[$nid]); + } + else { + // Insert the existing link and mark the node for reindexing. + db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid); + search_touch_node($nid); } } + // Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing. + foreach ($links as $nid) { + db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid); + search_touch_node($nid); + } +} + +/** + * Change a node's changed timestamp to now to force reindexing. + */ +function search_touch_node($nid) { + db_query("UPDATE {search_dataset} SET reindex = %d WHERE sid = %d AND type = 'node'", time(), $nid); +} + +/** + * Implementation of hook_nodeapi(). + */ +function search_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) { + switch ($op) { + // Transplant links to a node into the target node. + case 'update index': + $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid); + $output = array(); + while ($link = db_fetch_object($result)) { + $output[] = $link->caption; + } + return '('. implode(', ', $output) .')'; + // Reindex the node when it is updated. The node is automatically indexed + // when it is added, simply by being added to the node table. + case 'update': + search_touch_node($node->nid); + break; + } +} + +/** + * Implementation of hook_comment(). + */ +function search_comment($a1, $op) { + switch ($op) { + // Reindex the node when comments are added or changed + case 'insert': + case 'update': + case 'delete': + case 'publish': + case 'unpublish': + search_touch_node($a1['nid']); + break; + } } /** @@ -616,6 +688,7 @@ function search_parse_query($text) { // Classify tokens $or = FALSE; + $or_warning = FALSE; foreach ($matches as $match) { $phrase = FALSE; // Strip off phrase quotes @@ -645,6 +718,9 @@ function search_parse_query($text) { } // Plain keyword else { + if ($match[2] == 'or') { + $or_warning = TRUE; + } if ($or) { // Add to last element (which is an array) $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words); @@ -704,9 +780,9 @@ function search_parse_query($text) { $query = implode(' AND ', $query); // Build word-index conditions for the first pass - $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4); + $query2 = 'i.word IN ('. implode(', ', array_fill(0, count($arguments2), "'%s'")) .')'; - return array($query, $arguments, $query2, $arguments2, $matches); + return array($query, $arguments, $query2, $arguments2, $matches, $or_warning); } /** @@ -723,7 +799,7 @@ function _search_parse_query(&$word, &$s $s = $num ? ((int)ltrim($s, '-0')) : $s; if (!isset($scores[$s])) { $scores[$s] = $s; - $count++; + $count ++; } } } @@ -738,28 +814,18 @@ function _search_parse_query(&$word, &$s * This function is normally only called by each module that support the * indexed search (and thus, implements hook_update_index()). * - * Two queries are performed which can be extended by the caller. + * search_index table contains: * - * The first query selects a set of possible matches based on the search index - * and any extra given restrictions. This is the classic "OR" search. - * - * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance - * FROM {search_index} i - * INNER JOIN {search_total} t ON i.word = t.word - * $join1 - * WHERE $where1 AND (...) - * GROUP BY i.type, i.sid - * - * The second query further refines this set by verifying advanced text - * conditions (such as AND, negative or phrase matches), and orders the results - * on a the column or expression 'score': - * - * SELECT i.type, i.sid, $select2 - * FROM temp_search_sids i - * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type - * $join2 - * WHERE (...) - * ORDER BY score DESC + * object identifier + * word + * relevancy of the word inside the object + * + * For example, for nodes we store the nid, the word and it's relevancy + * score. do_search groups the words belonging to one node (or other + * searchable object) and count how many of the search terms appear in + * this group: if terms are connected with AND then this count must be + * the number of search terms. If terms are connected with OR then this + * count must be more than zero. * * @param $keywords * A search string as entered by the user. @@ -778,18 +844,6 @@ function _search_parse_query(&$word, &$s * @param $arguments1 * (optional) Extra SQL arguments belonging to the first query. * - * @param $select2 - * (optional) Inserted into the SELECT pat of the second query. Must contain - * a column selected as 'score'. - * defaults to 'i.relevance AS score' - * - * @param $join2 - * (optional) Inserted into the JOIN par of the second SQL query. - * For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid" - * - * @param $arguments2 - * (optional) Extra SQL arguments belonging to the second query parameter. - * * @param $sort_parameters * (optional) SQL arguments for sorting the final results. * Default: 'ORDER BY score DESC' @@ -799,40 +853,40 @@ function _search_parse_query(&$word, &$s * * @ingroup search */ -function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') { - $query = search_parse_query($keywords); +function do_search($keywords, $type, $columns = '%relevance AS relevance', $column_arguments = array(), $join = '', $where = '1', $arguments = array(), $sort_parameters = 'ORDER BY relevance DESC') { + $parsed = search_parse_query($keywords); - if ($query[2] == '') { + if ($parsed[2] == '') { form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3)))); } - if ($query === NULL || $query[0] == '' || $query[2] == '') { + if ($parsed[5]) { + form_set_error('keys', t('Try uppercase "OR" to search for either of two terms.')); + } + if ($parsed === NULL || ($parsed[0] == '' && $parsed[2] == '')) { return array(); } - // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords. - // 'matches' is used to reject those items that cannot possibly match the query. - $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'"; - $arguments = array_merge($arguments1, $query[3], array($type, $query[4])); - $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d", $arguments, 'temp_search_sids'); - - // Calculate maximum relevance, to normalize it - $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids')); - if (!$normalize) { - return array(); + if (count($parsed[1])) { + $join .= " INNER JOIN {search_dataset} d ON i.sid = d.sid"; + $where .= "AND $parsed[0]"; + $arguments = array_merge($arguments, $parsed[1]); } - $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2); - // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...) - $conditions = '('. $query[0] .')'; - $arguments = array_merge($arguments2, $query[1]); - $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions $sort_parameters", $arguments, 'temp_search_results'); - if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) { - return array(); + $sql = "FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join WHERE $where AND $parsed[2] AND i.type = '%s' GROUP BY i.type, i.sid"; + $arguments = array_merge($arguments, $parsed[3], array($type)); + if ($parsed[4] > 1) { + $sql .= " HAVING COUNT(*) = %d"; + $arguments[] = $parsed[4]; } - $count_query = "SELECT $count"; + $sql .= " $sort_parameters"; + + $columns = str_replace('%relevance', 'SUM(i.score * t.count)', $columns); + $query = "SELECT i.type, i.sid, COUNT(*) AS matches, $columns $sql"; + + $count_query = "SELECT COUNT(*) $sql"; // Do actual search query - $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query); + $result = pager_query($query, 10, 0, $count_query, array_merge($column_arguments, $arguments)); $results = array(); while ($item = db_fetch_object($result)) { $results[] = $item; === modified file 'modules/search/search.schema' --- modules/search/search.schema 2007-07-15 10:09:21 +0000 +++ modules/search/search.schema 2007-10-04 09:24:20 +0000 @@ -6,7 +6,8 @@ function search_schema() { 'fields' => array( 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => FALSE), - 'data' => array('type' => 'text', 'not null' => TRUE, 'size' => 'big') + 'data' => array('type' => 'text', 'not null' => TRUE, 'size' => 'big'), + 'reindex' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), ), 'indexes' => array('sid_type' => array('sid', 'type')), ); @@ -16,25 +17,33 @@ function search_schema() { 'word' => array('type' => 'varchar', 'length' => 50, 'not null' => TRUE, 'default' => ''), 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => FALSE), - 'fromsid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), - 'fromtype' => array('type' => 'varchar', 'length' => 16, 'not null' => FALSE), - 'score' => array('type' => 'float', 'not null' => FALSE) + 'score' => array('type' => 'float', 'not null' => FALSE), ), 'indexes' => array( - 'from_sid_type' => array('fromsid', 'fromtype'), 'sid_type' => array('sid', 'type'), - 'word' => array('word') + 'word' => array('word'), ), + 'unique keys' => array('word_sid_type' => array('word', 'sid', 'type')), ); $schema['search_total'] = array( 'fields' => array( 'word' => array('type' => 'varchar', 'length' => 50, 'not null' => TRUE, 'default' => ''), - 'count' => array('type' => 'float', 'not null' => FALSE) + 'count' => array('type' => 'float', 'not null' => FALSE), ), 'primary key' => array('word'), ); + $schema['search_node_links'] = array( + 'fields' => array( + 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), + 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => TRUE, 'default' => ''), + 'nid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), + 'caption' => array('type' => 'text', 'size' => 'big', 'not null' => FALSE), + ), + 'primary key' => array('sid', 'type', 'nid'), + 'indexes' => array('nid' => array('nid')), + ); + return $schema; } - === modified file 'modules/system/system.install' --- modules/system/system.install 2007-10-03 13:19:19 +0000 +++ modules/system/system.install 2007-10-04 09:41:44 +0000 @@ -3778,6 +3778,54 @@ function system_update_6033() { /** + * Drop and recreate the search index. + */ +function system_update_6033() { + $ret = array(); + if (db_table_exists('search_index')) { + // Create the search_dataset.reindex column. + db_add_field($ret, 'seach_dataset', 'reindex', array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0)); + + // Drop the search_index.from fields which are no longer used. + db_drop_index($ret, 'search_index', 'from_sid_type'); + db_drop_field($ret, 'search_index', 'from_sid'); + db_drop_field($ret, 'search_index', 'from_type'); + + // Create the search_node_links Table. + $search_node_links_schema = array( + 'fields' => array( + 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), + 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => TRUE, 'default' => ''), + 'nid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), + 'caption' => array('type' => 'text', 'size' => 'big', 'not null' => FALSE), + ), + 'primary key' => array('sid', 'type', 'nid'), + 'indexes' => array('nid' => array('nid')), + ); + db_create_table($ret, 'search_node_links', $search_node_links_schema); + + // with the change to search_dataset.reindex, the search queue is handled differently, + // and this is no longer needed + variable_del('node_cron_last'); + + // Everything needs to be reindexed. + $ret[] = update_sql("UPDATE {search_dataset} SET reindex = 1"); + + // Add a unique index for the search_index. + // Since it's possible that some existing sites have duplicates, + // create the index using the IGNORE keyword, which ignores duplicate errors. + // However, pgsql doesn't support it + if ($GLOBALS['db_type'] == 'mysql') { + $ret[] = update_sql("ALTER IGNORE TABLE {search_index} ADD UNIQUE KEY sid_word_type (sid, word, type)"); + } + else { + db_add_unique_key($ret, 'search_index', 'sid_word_type', array('sid', 'word', 'type')); + } + } + return $ret; +} + +/** * @} End of "defgroup updates-5.x-to-6.x" * The next series of updates should start at 7000. */