=== modified file 'modules/search/search.install' --- modules/search/search.install 2007-05-25 15:26:33 +0000 +++ modules/search/search.install 2007-06-21 16:24:24 +0000 @@ -19,3 +19,18 @@ function search_uninstall() { variable_del('minimum_word_size'); variable_del('overlap_cjk'); } + +/** + * Drop and recreate the search index. + */ +function search_update_1() { + $ret = array(); + db_drop_table($ret, 'search_dataset'); + db_drop_table($ret, 'search_index'); + db_drop_table($ret, 'search_total'); + drupal_install_schema('search'); + return $ret; +} + +// Note: if further schema changes are made before 6.0, update_1 should be +// renamed to update_2, and a dummy update_1 should be provided. \ No newline at end of file === modified file 'modules/search/search.module' --- modules/search/search.module 2007-06-16 17:53:05 +0000 +++ modules/search/search.module 2007-06-21 16:36:55 +0000 @@ -293,9 +293,11 @@ function search_wipe($sid = NULL, $type } else { db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type); - db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type); - // When re-indexing, keep link references - db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type); + db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type); + // Don't remove links if re-indexing. + if (!$reindex) { + db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + } } } @@ -591,21 +593,24 @@ function search_index($sid, $type, $text $word = (int)ltrim($word, '-0'); } + // Links score mainly for the target. if ($link) { if (!isset($results[$linknid])) { $results[$linknid] = array(); } - $results[$linknid][$word] += $score * $focus; + $results[$linknid][] = $word; + // Reduce score of the link caption in the source. + $focus *= 0.2; } - else { - if (!isset($results[0][$word])) { - $results[0][$word] = 0; - } - $results[0][$word] += $score * $focus; - // Focus is a decaying value in terms of the amount of unique words up to this point. - // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. - $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); + // Fall-through + if (!isset($results[0][$word])) { + $results[0][$word] = 0; } + $results[0][$word] += $score * $focus; + + // Focus is a decaying value in terms of the amount of unique words up to this point. + // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. + $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); } $tagwords++; // Too many words inside a single tag probably mean a tag was accidentally left open. @@ -631,13 +636,59 @@ function search_index($sid, $type, $text } unset($results[0]); - // Now insert links to nodes + // Get all previous links from this item. + $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + $links = array(); + while ($link = db_fetch_object($result)) { + $links[$link->nid] = $link->caption; + } + + // Now store links to nodes. foreach ($results as $nid => $words) { - foreach ($words as $word => $score) { - db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score); - search_dirty($word); + $caption = implode(' ', $words); + if (isset($links[$nid])) { + if ($links[$nid] != $caption) { + // Update the existing link and mark the node for reindexing. + db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid); + search_touch_node($nid); + } + // Unset the link to mark it as processed. + unset($links[$nid]); + } + else { + // Insert the existing link and mark the node for reindexing. + db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid); + search_touch_node($nid); } } + // Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing. + foreach ($links as $nid) { + db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid); + search_touch_node($nid); + } +} + +/** + * Change a node's changed timestamp to now to force reindexing. + */ +function search_touch_node($nid) { + db_query("UPDATE {node} SET changed = %d WHERE nid = %d", time(), $nid); +} + +/** + * Implementation of hook_nodeapi(). + */ +function search_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) { + switch ($op) { + // Transplant links to a node into the target node. + case 'update index': + $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid); + $output = array(); + while ($link = db_fetch_object($result)) { + $output[] = $link->caption; + } + return '('. implode(', ', $output) .')'; + } } /** === modified file 'modules/search/search.schema' --- modules/search/search.schema 2007-06-15 21:27:10 +0000 +++ modules/search/search.schema 2007-06-21 16:18:43 +0000 @@ -5,25 +5,22 @@ function search_schema() { $schema['search_dataset'] = array( 'fields' => array( 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), - 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => FALSE), + 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => TRUE, 'default' => ''), 'data' => array('type' => 'text', 'not null' => TRUE, 'size' => 'medium') ), - 'indexes' => array('sid_type' => array('sid', 'type')), + 'primary key' => array('sid', 'type'), ); $schema['search_index'] = array( 'fields' => array( 'word' => array('type' => 'varchar', 'length' => 50, 'not null' => TRUE, 'default' => ''), 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), - 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => FALSE), - 'fromsid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), - 'fromtype' => array('type' => 'varchar', 'length' => 16, 'not null' => FALSE), + 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => TRUE, 'default' => ''), 'score' => array('type' => 'float', 'not null' => FALSE) ), + 'primary key' => array('sid', 'type', 'word'), 'indexes' => array( - 'from_sid_type' => array('fromsid', 'fromtype'), - 'sid_type' => array('sid', 'type'), - 'word' => array('word') + 'type_word' => array('type', 'word') ), ); @@ -35,6 +32,19 @@ function search_schema() { 'primary key' => array('word'), ); + $schema['search_node_links'] = array( + 'fields' => array( + 'sid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), + 'type' => array('type' => 'varchar', 'length' => 16, 'not null' => TRUE, 'default' => ''), + 'nid' => array('type' => 'int', 'unsigned' => TRUE, 'not null' => TRUE, 'default' => 0), + 'caption' => array('type' => 'text', 'size' => 'big', 'not null' => FALSE) + ), + 'primary key' => array('sid', 'type', 'nid'), + 'indexes' => array( + 'nid' => array('nid') + ), + ); + return $schema; }