--- orig/drupal/modules/search/search.install 2006-09-01 03:40:08.000000000 -0400 +++ patched/drupal/modules/search/search.install 2007-09-28 02:17:51.000000000 -0400 @@ -10,20 +10,18 @@ case 'mysqli': db_query("CREATE TABLE {search_dataset} ( sid int unsigned NOT NULL default '0', - type varchar(16) default NULL, + type varchar(16) NOT NULL default '', data longtext NOT NULL, - KEY sid_type (sid, type) + PRIMARY KEY (sid, type) ) /*!40100 DEFAULT CHARACTER SET UTF8 */ "); db_query("CREATE TABLE {search_index} ( word varchar(50) NOT NULL default '', sid int unsigned NOT NULL default '0', - type varchar(16) default NULL, - fromsid int unsigned NOT NULL default '0', - fromtype varchar(16) default NULL, + type varchar(16) NOT NULL default '', score float default NULL, - KEY sid_type (sid, type), - KEY from_sid_type (fromsid, fromtype), + PRIMARY KEY (sid, type, word), + KEY type_word (type, word), KEY word (word) ) /*!40100 DEFAULT CHARACTER SET UTF8 */ "); @@ -32,32 +30,49 @@ count float default NULL, PRIMARY KEY (word) ) /*!40100 DEFAULT CHARACTER SET UTF8 */ "); + + db_query("CREATE TABLE {search_node_links} ( + sid int unsigned NOT NULL default '0', + type varchar(16) NOT NULL default '', + nid int unsigned NOT NULL default '0', + caption longtext, + PRIMARY KEY (sid, type, nid), + KEY nid (nid) + ) /*!40100 DEFAULT CHARACTER SET UTF8 */ "); break; + case 'pgsql': db_query("CREATE TABLE {search_dataset} ( sid int_unsigned NOT NULL default '0', - type varchar(16) default NULL, - data text NOT NULL + type varchar(16) NOT NULL default '', + data text NOT NULL, + PRIMARY KEY (sid, type) )"); db_query("CREATE INDEX {search_dataset}_sid_type_idx ON {search_dataset} (sid, type)"); db_query("CREATE TABLE {search_index} ( word varchar(50) NOT NULL default '', sid int_unsigned NOT NULL default '0', - type varchar(16) default NULL, - fromsid int_unsigned NOT NULL default '0', - fromtype varchar(16) default NULL, - score float default NULL + type varchar(16) NOT NULL default '', + score float default NULL, + PRIMARY KEY (sid, type, word) )"); - db_query("CREATE INDEX {search_index}_sid_type_idx ON {search_index} (sid, type)"); - db_query("CREATE INDEX {search_index}_from_sid_type_idx ON {search_index} (fromsid, fromtype)"); - db_query("CREATE INDEX {search_index}_word_idx ON {search_index} (word)"); + db_query("CREATE INDEX {search_index}_type_word_idx ON {search_index} (type, word)"); db_query("CREATE TABLE {search_total} ( word varchar(50) NOT NULL default '', count float default NULL, PRIMARY KEY (word) )"); + + db_query("CREATE TABLE {search_node_links} ( + sid int_unsigned NOT NULL default '0', + type varchar(16) NOT NULL default '', + nid int_unsigned NOT NULL default '0', + caption text, + PRIMARY KEY (sid, type, nid) + )"); + db_query("CREATE INDEX {search_node_links}_nid_idx ON {search_node_links} (nid)"); break; } } @@ -69,6 +84,20 @@ db_query('DROP TABLE {search_dataset}'); db_query('DROP TABLE {search_index}'); db_query('DROP TABLE {search_total}'); + db_query('DROP TABLE {search_node_links}'); variable_del('minimum_word_size'); variable_del('overlap_cjk'); } + +/** + * Drop and recreate the search index. + */ +function search_update_1() { + db_query('DROP TABLE {search_dataset}'); + db_query('DROP TABLE {search_index}'); + db_query('DROP TABLE {search_total}'); + variable_del('node_cron_last'); + variable_del('node_cron_last_nid'); + search_install(); + return array(); +} --- orig/drupal/modules/search/search.module 2007-07-26 15:16:48.000000000 -0400 +++ patched/drupal/modules/search/search.module 2007-09-28 02:10:07.000000000 -0400 @@ -268,9 +268,11 @@ } else { db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type); - db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type); - // When re-indexing, keep link references - db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type); + db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type); + // Don't remove links if re-indexing. + if (!$reindex) { + db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + } } } @@ -566,18 +568,24 @@ $word = (int)ltrim($word, '-0'); } + // Links score mainly for the target. if ($link) { if (!isset($results[$linknid])) { $results[$linknid] = array(); } - $results[$linknid][$word] += $score * $focus; + $results[$linknid][] = $word; + // Reduce score of the link caption in the source. + $focus *= 0.2; } - else { - $results[0][$word] += $score * $focus; - // Focus is a decaying value in terms of the amount of unique words up to this point. - // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. - $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); + // Fall-through + if (!isset($results[0][$word])) { + $results[0][$word] = 0; } + $results[0][$word] += $score * $focus; + + // Focus is a decaying value in terms of the amount of unique words up to this point. + // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. + $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); } $tagwords++; // Too many words inside a single tag probably mean a tag was accidentally left open. @@ -603,12 +611,58 @@ } unset($results[0]); - // Now insert links to nodes + // Get all previous links from this item. + $links = array(); + $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + while ($link = db_fetch_object($result)) { + $links[$link->nid] = $link->caption; + } + + // Now store links to nodes. foreach ($results as $nid => $words) { - foreach ($words as $word => $score) { - db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score); - search_dirty($word); + $caption = implode(' ', $words); + if (isset($links[$nid])) { + if ($links[$nid] != $caption) { + // Update the existing link and mark the node for reindexing. + db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid); + search_touch_node($nid); + } + // Unset the link to mark it as processed. + unset($links[$nid]); } + else { + // Insert the existing link and mark the node for reindexing. + db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid); + search_touch_node($nid); + } + } + // Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing. + foreach ($links as $nid) { + db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid); + search_touch_node($nid); + } +} + +/** + * Change a node's changed timestamp to now to force reindexing. + */ +function search_touch_node($nid) { + db_query("UPDATE {node} SET changed = %d WHERE nid = %d", time(), $nid); +} + +/** + * Implementation of hook_nodeapi(). + */ +function search_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) { + switch ($op) { + // Transplant links to a node into the target node. + case 'update index': + $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid); + $output = array(); + while ($link = db_fetch_object($result)) { + $output[] = $link->caption; + } + return '('. implode(', ', $output) .')'; } }