cvs diff -u search.module (in directory C:\CVS\drupalpatch\modules\) Index: search.module =================================================================== RCS file: /cvs/drupal/drupal/modules/search.module,v retrieving revision 1.125 diff -u -r1.125 search.module --- search.module 23 Apr 2005 07:34:22 -0000 1.125 +++ search.module 23 Apr 2005 08:49:29 -0000 @@ -380,6 +380,9 @@ $linknid = $match[1]; if ($linknid > 0) { $link = true; + // A link counts as a general score boost for the target page. + // The words used for the link caption also count (see below). + $results[$linknid][''] += 1; } } } @@ -523,8 +526,35 @@ } $count_query = "SELECT $count"; - // Do pager query - $query = "SELECT i.type, i.sid, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC"; + /* + Do pager query + + The search index consists of the unique words in a piece of content (e.g. a + node), and the words' scores (see search_index() to see how they are + calculated). + + To find items, we find all matching keywords and divide each word's absolute + score in a particular item by the sum of all scores for this word (cached in + search_total) to get a relative score. All the relative scores are then + summed per item and the result is used as a ranking key for the results. + + The search indexer also takes note of links to items (for now only nodes) + and assigns extra link scores to items under an empty word. These extra + scores are fetched with a LEFT JOIN on another table. + + Each link counts for 0.02. Experience shows that on a good database, good + results score between 0.20 and 0.40, while great results are above 0.40. + In theory the ranking score is unbounded, as more keywords equal + more scores to sum, but in practice long queries tend to be vagues and + contain more noise words. Thus the ranking score rarely goes above 1.0. When + it does, the results are usually very relevant already so link boosting is + unnecessary. + + Aside from the general link boost, the scores for a link's text are also + added to the target item's total. However, this is handled transparently by + simply indexing them directly with the item they link to. + */ + $query = "SELECT i.type, i.sid, (SUM(i.score/t.count) + MAX(if(j.word IS NULL, 0, j.score))*0.02) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word LEFT JOIN {search_index} j ON i.sid = j.sid AND j.word = '' WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC"; $result = pager_query($query, 15, 0, $count_query, $arguments); $results = array();