Index: modules/statistics/statistics.module =================================================================== RCS file: /cvs/drupal/drupal/modules/statistics/statistics.module,v retrieving revision 1.306 diff -u -p -r1.306 statistics.module --- modules/statistics/statistics.module 2 Jul 2009 04:27:23 -0000 1.306 +++ modules/statistics/statistics.module 4 Jul 2009 13:22:31 -0000 @@ -381,10 +381,15 @@ function statistics_ranking() { return array( 'views' => array( 'title' => t('Number of views'), - 'join' => 'LEFT JOIN {node_counter} node_counter ON node_counter.nid = i.sid', + 'join' => array( + 'type' => 'LEFT', + 'table' => 'node_counter', + 'alias' => 'node_counter', + 'on' => 'node_counter.nid = i.sid', + ), // Inverse law that maps the highest view count on the site to 1 and 0 to 0. - 'score' => '2.0 - 2.0 / (1.0 + node_counter.totalcount * CAST(%f AS DECIMAL))', - 'arguments' => array(variable_get('node_cron_views_scale', 0)), + 'score' => '2.0 - 2.0 / (1.0 + node_counter.totalcount * CAST(:scale AS DECIMAL))', + 'arguments' => array(':scale' => variable_get('node_cron_views_scale', 0)), ), ); } Index: modules/comment/comment.module =================================================================== RCS file: /cvs/drupal/drupal/modules/comment/comment.module,v retrieving revision 1.737 diff -u -p -r1.737 comment.module --- modules/comment/comment.module 3 Jul 2009 19:21:54 -0000 1.737 +++ modules/comment/comment.module 4 Jul 2009 13:22:32 -0000 @@ -2318,10 +2318,15 @@ function comment_ranking() { return array( 'comments' => array( 'title' => t('Number of comments'), - 'join' => 'LEFT JOIN {node_comment_statistics} node_comment_statistics ON node_comment_statistics.nid = i.sid', + 'join' => array( + 'type' => 'LEFT', + 'table' => 'node_comment_statistics', + 'alias' => 'node_comment_statistics', + 'on' => 'node_comment_statistics.nid = i.sid', + ), // Inverse law that maps the highest reply count on the site to 1 and 0 to 0. - 'score' => '2.0 - 2.0 / (1.0 + node_comment_statistics.comment_count * CAST(%f AS DECIMAL))', - 'arguments' => array(variable_get('node_cron_comments_scale', 0)), + 'score' => '2.0 - 2.0 / (1.0 + node_comment_statistics.comment_count * CAST(:scale AS DECIMAL))', + 'arguments' => array(':scale' => variable_get('node_cron_comments_scale', 0)), ), ); } Index: modules/node/node.module =================================================================== RCS file: /cvs/drupal/drupal/modules/node/node.module,v retrieving revision 1.1078 diff -u -p -r1.1078 node.module --- modules/node/node.module 3 Jul 2009 10:57:46 -0000 1.1078 +++ modules/node/node.module 4 Jul 2009 13:22:33 -0000 @@ -1306,37 +1306,26 @@ function node_perm() { /** * Gather the rankings from the the hook_ranking implementations. + * + * @param $query + * A query object that has been extended with the Search DB Extender. */ -function _node_rankings() { - $rankings = array( - 'total' => 0, 'join' => array(), 'score' => array(), 'args' => array(), - ); +function _node_rankings(SelectQueryExtender $query) { if ($ranking = module_invoke_all('ranking')) { + $tables = &$query->getTables(); foreach ($ranking as $rank => $values) { if ($node_rank = variable_get('node_rank_' . $rank, 0)) { // If the table defined in the ranking isn't already joined, then add it. - if (isset($values['join']) && !isset($rankings['join'][$values['join']])) { - $rankings['join'][$values['join']] = $values['join']; - } - - // Add the rankings weighted score multiplier value, handling NULL gracefully. - $rankings['score'][] = 'CAST(%f AS DECIMAL) * COALESCE((' . $values['score'] . '), 0)'; - - // Add the the administrator's weighted score multiplier value for this ranking. - $rankings['total'] += $node_rank; - $rankings['arguments'][] = $node_rank; - - // Add any additional arguments used by this ranking. - if (isset($values['arguments'])) { - $rankings['arguments'] = array_merge($rankings['arguments'], $values['arguments']); + if (isset($values['join']) && !isset($tables[$values['join']['alias']])) { + $query->addJoin($values['join']['type'], $values['join']['table'], $values['join']['alias'], $values['join']['on']); } + $arguments = isset($values['arguments']) ? $values['arguments'] : array(); + $query->addScore($values['score'], $arguments, $node_rank); } } } - return $rankings; } - /** * Implement hook_search(). */ @@ -1383,61 +1372,35 @@ function node_search($op = 'search', $ke case 'search': // Build matching conditions - list($join1, $where1) = _db_rewrite_sql(); - $arguments1 = array(); - $conditions1 = 'n.status = 1'; - - if ($type = search_query_extract($keys, 'type')) { - $types = array(); - foreach (explode(',', $type) as $t) { - $types[] = "n.type = '%s'"; - $arguments1[] = $t; - } - $conditions1 .= ' AND (' . implode(' OR ', $types) . ')'; - $keys = search_query_insert($keys, 'type'); - } - - if ($term = search_query_extract($keys, 'term')) { - $terms = array(); - foreach (explode(',', $term) as $c) { - $terms[] = "tn.tid = %d"; - $arguments1[] = $c; - } - $conditions1 .= ' AND (' . implode(' OR ', $terms) . ')'; - $join1 .= ' INNER JOIN {taxonomy_term_node} tn ON n.vid = tn.vid'; - $keys = search_query_insert($keys, 'term'); + $query = db_search()->extend('PagerDefault'); + $query->join('node', 'n', 'n.nid = i.sid'); + $query + ->condition('n.status', 1) + ->addTag('node_access') + ->searchExpression($keys, 'node'); + + // Insert special keywords. + $query->setOption('type', 'n.type'); + $query->setOption('language', 'n.language'); + if ($query->setOption('term', 'tn.nid')) { + $query->join('taxonomy_term_node', 'tn', 'n.vid = tn.vid'); } - - if ($languages = search_query_extract($keys, 'language')) { - $terms = array(); - foreach (explode(',', $languages) as $l) { - $terms[] = "n.language = '%s'"; - $arguments1[] = $l; - } - $conditions1 .= ' AND (' . implode(' OR ', $terms) . ')'; - $keys = search_query_insert($keys, 'language'); + // Only continue if the first pass query matches. + if (!$query->executeFirstPass()) { + return array(); } - // Get the ranking expressions. - $rankings = _node_rankings(); + // Add the ranking expressions. + _node_rankings($query); - // When all search factors are disabled (ie they have a weight of zero), - // The default score is based only on keyword relevance. - if ($rankings['total'] == 0) { - $total = 1; - $arguments2 = array(); - $join2 = ''; - $select2 = 'SUM(i.relevance) AS calculated_score'; - } - else { - $total = $rankings['total']; - $arguments2 = $rankings['arguments']; - $join2 = implode(' ', $rankings['join']); - $select2 = 'SUM(' . implode(' + ', $rankings['score']) . ') AS calculated_score'; - } - - // Do search. - $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid ' . $join1, $conditions1 . (empty($where1) ? '' : ' AND ' . $where1), $arguments1, $select2, $join2, $arguments2); + // Add a count query. + $inner_query = clone $query; + $count_query = db_select($inner_query->fields('i', array('sid'))); + $count_query->addExpression('COUNT(*)'); + $query->setCountQuery($count_query); + $find = $query + ->limit(10) + ->execute(); // Load results. $results = array(); @@ -1448,9 +1411,9 @@ function node_search($op = 'search', $ke $node->rendered = drupal_render($node->content); // Fetch comments for snippet. - $node->rendered .= module_invoke('comment', 'node_update_index', $node); + $node->rendered .= ' ' . module_invoke('comment', 'node_update_index', $node); // Fetch terms for snippet. - $node->rendered .= module_invoke('taxonomy', 'node_update_index', $node); + $node->rendered .= ' ' . module_invoke('taxonomy', 'node_update_index', $node); $extra = module_invoke_all('node_search_result', $node); @@ -1462,7 +1425,7 @@ function node_search($op = 'search', $ke 'date' => $node->changed, 'node' => $node, 'extra' => $extra, - 'score' => $total ? ($item->calculated_score / $total) : 0, + 'score' => $item->calculated_score, 'snippet' => search_excerpt($keys, $node->rendered), ); } @@ -1498,8 +1461,8 @@ function node_ranking() { $ranking['recent'] = array( 'title' => t('Recently posted'), // Exponential decay with half-life of 6 months, starting at last indexed node - 'score' => 'POW(2.0, (GREATEST(n.created, n.changed) - %d) * 6.43e-8)', - 'arguments' => array($node_cron_last), + 'score' => 'POW(2.0, (GREATEST(n.created, n.changed) - :node_cron_last) * 6.43e-8)', + 'arguments' => array(':node_cron_last' => $node_cron_last), ); } return $ranking; @@ -2157,15 +2120,15 @@ function node_search_validate($form, &$f // checkboxes to 0. $form_state['values']['type'] = array_filter($form_state['values']['type']); if (count($form_state['values']['type'])) { - $keys = search_query_insert($keys, 'type', implode(',', array_keys($form_state['values']['type']))); + $keys = search_expression_insert($keys, 'type', implode(',', array_keys($form_state['values']['type']))); } } if (isset($form_state['values']['term']) && is_array($form_state['values']['term'])) { - $keys = search_query_insert($keys, 'term', implode(',', $form_state['values']['term'])); + $keys = search_expression_insert($keys, 'term', implode(',', $form_state['values']['term'])); } if (isset($form_state['values']['language']) && is_array($form_state['values']['language'])) { - $keys = search_query_insert($keys, 'language', implode(',', array_filter($form_state['values']['language']))); + $keys = search_expression_insert($keys, 'language', implode(',', array_filter($form_state['values']['language']))); } if ($form_state['values']['or'] != '') { if (preg_match_all('/ ("[^"]+"|[^" ]+)/i', ' ' . $form_state['values']['or'], $matches)) { Index: modules/search/search.api.php =================================================================== RCS file: /cvs/drupal/drupal/modules/search/search.api.php,v retrieving revision 1.11 diff -u -p -r1.11 search.api.php --- modules/search/search.api.php 22 Jun 2009 09:10:06 -0000 1.11 +++ modules/search/search.api.php 4 Jul 2009 13:22:33 -0000 @@ -29,8 +29,9 @@ * capabilities. To do this, node module also implements hook_update_index() * which is used to create and maintain the index. * - * We call do_search() with the keys, the module name, and extra SQL fragments - * to use when searching. See hook_update_index() for more information. + * We call db_search() and then add the keys, the module name, and extra SQL + * fragments to use when searching. + * See hook_update_index() for more information. * * @param $op * A string defining which operation to perform: @@ -76,12 +77,15 @@ function hook_search($op = 'search', $ke return t('Content'); case 'reset': - db_query("UPDATE {search_dataset} SET reindex = %d WHERE type = 'node'", REQUEST_TIME); + db_update('search_dataset') + ->fields(array('reindex' => REQUEST_TIME)) + ->condition('type', 'node') + ->execute(); return; case 'status': - $total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1')); - $remaining = db_result(db_query("SELECT COUNT(*) FROM {node} n LEFT JOIN {search_dataset} d ON d.type = 'node' AND d.sid = n.nid WHERE n.status = 1 AND d.sid IS NULL OR d.reindex <> 0")); + $total = db_query('SELECT COUNT(*) FROM {node} WHERE status = 1')->fetchField(); + $remaining = db_query("SELECT COUNT(*) FROM {node} n LEFT JOIN {search_dataset} d ON d.type = 'node' AND d.sid = n.nid WHERE n.status = 1 AND d.sid IS NULL OR d.reindex <> 0")->fetchField(); return array('remaining' => $remaining, 'total' => $total); case 'admin': @@ -110,61 +114,35 @@ function hook_search($op = 'search', $ke case 'search': // Build matching conditions - list($join1, $where1) = _db_rewrite_sql(); - $arguments1 = array(); - $conditions1 = 'n.status = 1'; - - if ($type = search_query_extract($keys, 'type')) { - $types = array(); - foreach (explode(',', $type) as $t) { - $types[] = "n.type = '%s'"; - $arguments1[] = $t; - } - $conditions1 .= ' AND (' . implode(' OR ', $types) . ')'; - $keys = search_query_insert($keys, 'type'); - } - - if ($category = search_query_extract($keys, 'category')) { - $categories = array(); - foreach (explode(',', $category) as $c) { - $categories[] = "tn.tid = %d"; - $arguments1[] = $c; - } - $conditions1 .= ' AND (' . implode(' OR ', $categories) . ')'; - $join1 .= ' INNER JOIN {taxonomy_term_node} tn ON n.vid = tn.vid'; - $keys = search_query_insert($keys, 'category'); - } - - if ($languages = search_query_extract($keys, 'language')) { - $categories = array(); - foreach (explode(',', $languages) as $l) { - $categories[] = "n.language = '%s'"; - $arguments1[] = $l; - } - $conditions1 .= ' AND (' . implode(' OR ', $categories) . ')'; - $keys = search_query_insert($keys, 'language'); - } - - // Get the ranking expressions. - $rankings = _node_rankings(); - - // When all search factors are disabled (ie they have a weight of zero), - // The default score is based only on keyword relevance. - if ($rankings['total'] == 0) { - $total = 1; - $arguments2 = array(); - $join2 = ''; - $select2 = 'i.relevance AS score'; - } - else { - $total = $rankings['total']; - $arguments2 = $rankings['arguments']; - $join2 = implode(' ', $rankings['join']); - $select2 = '(' . implode(' + ', $rankings['score']) . ') AS score'; - } - - // Do search. - $find = do_search($keys, 'node', 'INNER JOIN {node} n ON n.nid = i.sid ' . $join1, $conditions1 . (empty($where1) ? '' : ' AND ' . $where1), $arguments1, $select2, $join2, $arguments2); + $query = db_search()->extend('PagerDefault'); + $query->join('node', 'n', 'n.nid = i.sid'); + $query + ->condition('n.status', 1) + ->addTag('node_access') + ->searchExpression($keys, 'node'); + + // Insert special keywords. + $query->setOption('type', 'n.type'); + $query->setOption('language', 'n.language'); + if ($query->setOption('term', 'tn.nid')) { + $query->join('taxonomy_term_node', 'tn', 'n.vid = tn.vid'); + } + // Only continue if the first pass query matches. + if (!$query->executeFirstPass()) { + return array(); + } + + // Add the ranking expressions. + _node_rankings($query); + + // Add a count query. + $inner_query = clone $query; + $count_query = db_select($inner_query->fields('i', array('sid'))); + $count_query->addExpression('COUNT(*)'); + $query->setCountQuery($count_query); + $find = $query + ->limit(10) + ->execute(); // Load results. $results = array(); @@ -175,9 +153,9 @@ function hook_search($op = 'search', $ke $node->body = drupal_render($node->content); // Fetch comments for snippet. - $node->body .= module_invoke('comment', 'node', $node, 'update_index'); + $node->rendered .= ' ' . module_invoke('comment', 'node_update_index', $node); // Fetch terms for snippet. - $node->body .= module_invoke('taxonomy', 'node', $node, 'update_index'); + $node->rendered .= ' ' . module_invoke('taxonomy', 'node_update_index', $node); $extra = module_invoke_all('node_search_result', $node); @@ -189,7 +167,7 @@ function hook_search($op = 'search', $ke 'date' => $node->changed, 'node' => $node, 'extra' => $extra, - 'score' => $total ? ($item->score / $total) : 0, + 'score' => $item->calculated_score, 'snippet' => search_excerpt($keys, $node->body), ); } Index: modules/search/search.extender.inc =================================================================== RCS file: modules/search/search.extender.inc diff -N modules/search/search.extender.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ modules/search/search.extender.inc 4 Jul 2009 13:22:33 -0000 @@ -0,0 +1,454 @@ + array(), 'negative' => array()); + + /** + * Indicates if the first pass query requires complex conditions (LIKE). + * + * @var boolean. + */ + protected $simple = TRUE; + + /** + * Conditions that are used for exact searches. + * + * This is always used for the second pass query but not for the first pass, + * unless $this->simple is FALSE. + * + * @var DatabaseCondition + */ + protected $conditions; + + /** + * Indicates how many matches for a search query are necessary. + * + * @var int + */ + protected $matches = 0; + + /** + * Array of search words. + * + * These words have to match against search_index.word. + * + * @var array + */ + protected $words = array(); + + /** + * Multiplier for the normalized search score. + * + * This value is calculated by the first pass query and multiplied with the + * actual score of a specific word to make sure that the resulting calculated + * score is between 0 and 1. + * + * @var float + */ + protected $normalize; + + /** + * Indicates if the first pass query has been executed. + * + * @var boolean + */ + protected $executedFirstPass = FALSE; + + /** + * Stores score expressions. + * + * @var array + */ + protected $scores = array(); + + /** + * Stores arguments for score expressions. + * + * @var array + */ + protected $scoresArguments = array(); + + /** + * Total value of all the multipliers. + * + * @var array() + */ + protected $multiply = array(); + + /** + * Construct a new SelectQuery and assign it. + */ + public function __construct(array $options = array()) { + if (empty($options['target'])) { + $options['target'] = 'default'; + } + $this->query = db_select('search_index', 'i', $options); + $this->connection = Database::getConnection($options['target']); + } + + /** + * Search items for the given search query string and type. + * + * @param $query + * A search query string, that can contain options. + * @param $type + * The type of search, this maps to the type column in search_index. + * @return + * The SearchQuery object. + */ + public function searchExpression($expression, $type) { + $this->searchExpression = $expression; + $this->type = $type; + + return $this; + } + + /** + * Apply a search option and remove it from the search query string. + * + * These options are in the form option:value,value2,value3. + * + * @param $option + * Name of the option. + * @param $column + * Name of the db column to which the value should be applied. + * @return + * TRUE if at least a value for that option has been found, FALSE if not. + */ + public function setOption($option, $column) { + if ($values = search_expression_extract($this->searchExpression, $option)) { + $or = db_or(); + foreach (explode(',', $values) as $value) { + $or->condition($column, $value); + } + $this->condition($or); + $this->searchExpression = search_expression_insert($this->searchExpression, $option); + return TRUE; + } + return FALSE; + } + + /** + * Parse a search query into SQL conditions. + * + * We build two queries that matches the dataset bodies. + */ + protected function parseSearchExpression() { + preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $this->searchExpression , $keywords, PREG_SET_ORDER); + + if (count($keywords) == 0) { + return; + } + + // Classify tokens. + $or = FALSE; + $warning = ''; + foreach ($keywords as $match) { + $phrase = FALSE; + // Strip off phrase quotes. + if ($match[2]{0} == '"') { + $match[2] = substr($match[2], 1, -1); + $phrase = TRUE; + $this->simple = FALSE; + } + // Simplify keyword according to indexing rules and external preprocessors. + $words = search_simplify($match[2]); + // Re-explode in case simplification added more words, except when + // matching a phrase. + $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY); + // Negative matches. + if ($match[1] == '-') { + $this->keys['negative'] = array_merge($this->keys['negative'], $words); + } + // OR operator: instead of a single keyword, we store an array of all + // OR'd keywords. + elseif ($match[2] == 'OR' && count($this->keys['positive'])) { + $last = array_pop($this->keys['positive']); + // Starting a new OR? + if (!is_array($last)) { + $last = array($last); + } + $this->keys['positive'][] = $last; + $or = TRUE; + continue; + } + // AND operator: implied, so just ignore it. + elseif ($match[2] == 'AND' || $match[2] == 'and') { + $warning = $match[2]; + continue; + } + + // Plain keyword. + else { + if ($match[2] == 'or') { + $warning = $match[2]; + } + if ($or) { + // Add to last element (which is an array). + $this->keys['positive'][count($this->keys['positive']) - 1] = array_merge($this->keys['positive'][count($this->keys['positive']) - 1], $words); + } + else { + $this->keys['positive'] = array_merge($this->keys['positive'], $words); + } + } + $or = FALSE; + } + + // Convert keywords into SQL statements. + $this->conditions = db_and(); + $simple_and = FALSE; + $simple_or = FALSE; + // Positive matches. + foreach ($this->keys['positive'] as $key) { + // Group of ORed terms. + if (is_array($key) && count($key)) { + $simple_or = TRUE; + $any = FALSE; + $queryor = db_or(); + foreach ($key as $or) { + list($num_new_scores) = $this->parseWord($or); + $any |= $num_new_scores; + $queryor->condition('d.data', "% $or %", 'LIKE'); + } + if (count($queryor)) { + $this->conditions->condition($queryor); + // A group of OR keywords only needs to match once. + $this->matches += ($any > 0); + } + } + // Single ANDed term. + else { + $simple_and = TRUE; + list($num_new_scores, $num_valid_words) = $this->parseWord($key); + $this->conditions->condition('d.data', "% $key %", 'LIKE'); + if (!$num_valid_words) { + $this->simple = FALSE; + } + // Each AND keyword needs to match at least once. + $this->matches += $num_new_scores; + } + } + if ($simple_and && $simple_or) { + $this->simple = FALSE; + } + // Negative matches. + foreach ($this->keys['negative'] as $key) { + $this->conditions->condition('d.data', "% $key %", 'NOT LIKE'); + $this->simple = FALSE; + } + + if ($warning == 'or') { + drupal_set_message(t('Search for either of the two terms with uppercase OR. For example, cats OR dogs.')); + } + } + + /** + * Helper function for parseQuery(). + */ + protected function parseWord($word) { + $num_new_scores = 0; + $num_valid_words = 0; + // Determine the scorewords of this word/phrase. + $split = explode(' ', $word); + foreach ($split as $s) { + $num = is_numeric($s); + if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) { + $s = $num ? ((int)ltrim($s, '-0')) : $s; + if (!isset($this->words[$s])) { + $this->words[$s] = $s; + $num_new_scores++; + } + $num_valid_words++; + } + } + // Return matching snippet and number of added words. + return array($num_new_scores, $num_valid_words); + } + + /** + * Execute the first pass query. + * + * This can either be done explicitly, so that additional scores and + * conditions can be applied to the second pass query or implicitly by + * addScore() or execute(). + * + * @return + * TRUE if search items exist, FALSE if not. + */ + public function executeFirstPass() { + $this->parseSearchExpression(); + + if (count($this->words) == 0) { + form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.')); + return FALSE; + } + $this->executedFirstPass = TRUE; + + if (!empty($this->words)) { + $or = db_or(); + foreach ($this->words as $word) { + $or->condition('i.word', $word); + } + $this->condition($or); + } + // Build query for keyword normalization. + $this->join('search_total', 't', 'i.word = t.word'); + $this + ->condition('i.type', $this->type) + ->groupBy('i.type') + ->groupBy('i.sid') + ->having('COUNT(*) >= :matches', array(':matches' => $this->matches)); + + // Clone the query object to do the firstPass query; + $first = clone $this->query; + + // For complex search queries, add the LIKE conditions to the first pass query. + if (!$this->simple) { + $first->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); + $first->condition($this->conditions); + } + + // Calculate maximum keyword relevance, to normalize it. + $first->addExpression('SUM(i.score * t.count)', 'calculated_score'); + $this->normalize = $first + ->range(0, 1) + ->orderBy('calculated_score', 'DESC') + ->execute() + ->fetchField(); + + if ($this->normalize) { + return TRUE; + } + return FALSE; + } + + /** + * Adds a custom score expression to the search query. + * + * Each score expression can optionally use a multiplicator and multiple + * expressions are combined. + * + * @param $score + * The score expression. + * @param $arguments + * Custom query arguments for that expression. + * @param $multiply + * If set, the score is multiplied with that value. Search query ensures + * that the search scores are still normalized. + */ + public function addScore($score, $arguments = array(), $multiply = FALSE) { + if ($multiply) { + $i = count($this->multiply); + $score = "CAST(:multiply_$i AS DECIMAL) * COALESCE(( " . $score . "), 0) / CAST(:total_$i AS DECIMAL)"; + $arguments[':multiply_' . $i] = $multiply; + $this->multiply[] = $multiply; + } + + $this->scores[] = $score; + $this->scoresArguments += $arguments; + + return $this; + } + + /** + * Execute the search. + * + * If not already done, this executes the first pass query, then the complex + * conditions are applied to the query including score expressions and + * ordering. + * + * @return + * FALSE if the first pass query returned no results and a database result + * set if not. + */ + public function execute() + { + if (!$this->executedFirstPass) { + $this->executeFirstPass(); + } + if (!$this->normalize) { + return FALSE; + } + + $this->join('search_dataset', 'd', 'i.sid = d.sid AND i.type = d.type'); + $this->condition($this->conditions); + + if (empty($this->scores)) { + // Add default score. + $this->addScore('i.relevance'); + } + if (count($this->getOrderBy()) == 0) { + // Add default order. + $this->orderBy('calculated_score', 'DESC'); + } + + if (count($this->multiply)) { + // Add the total multiplicator as many times as requested to maintain + // normalization as far as possible. + $i = 0; + $sum = array_sum($this->multiply); + foreach ($this->multiply as $total) { + $this->scoresArguments['total_' . $i] = $sum; + } + } + + // Replace i.relevance pseudo-field with the actual, normalized value. + $this->scores = str_replace('i.relevance', '(' . (1.0 / $this->normalize) . ' * i.score * t.count)', $this->scores); + // Convert scores to an expression. + $this->addExpression('SUM(' . implode(' + ', $this->scores) . ')', 'calculated_score', $this->scoresArguments); + + // Add tag and useful metadata. + $this + ->addTag('search_' . $this->type) + ->addMetaData('normalize', $this->normalize) + ->fields('i', array('type', 'sid')); + + return $this->query->execute(); + } +} \ No newline at end of file Index: modules/search/search.info =================================================================== RCS file: /cvs/drupal/drupal/modules/search/search.info,v retrieving revision 1.9 diff -u -p -r1.9 search.info --- modules/search/search.info 19 Jun 2009 06:26:51 -0000 1.9 +++ modules/search/search.info 4 Jul 2009 13:22:33 -0000 @@ -9,3 +9,4 @@ files[] = search.admin.inc files[] = search.pages.inc files[] = search.install files[] = search.test +files[] = search.extender.inc Index: modules/search/search.module =================================================================== RCS file: /cvs/drupal/drupal/modules/search/search.module,v retrieving revision 1.300 diff -u -p -r1.300 search.module --- modules/search/search.module 1 Jul 2009 20:39:20 -0000 1.300 +++ modules/search/search.module 4 Jul 2009 13:22:33 -0000 @@ -248,11 +248,20 @@ function search_wipe($sid = NULL, $type module_invoke_all('search', 'reset'); } else { - db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type); - db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type); + db_delete('search_dataset') + ->condition('sid', $sid) + ->condition('type', $type) + ->execute(); + db_delete('search_index') + ->condition('sid', $sid) + ->condition('type', $type) + ->execute(); // Don't remove links if re-indexing. if (!$reindex) { - db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + db_delete('search_node_links') + ->condition('sid', $sid) + ->condition('type', $type) + ->execute(); } } } @@ -292,20 +301,29 @@ function search_cron() { * up to date (even if cron times out or otherwise fails). */ function search_update_totals() { - // Update word IDF (Inverse Document Frequency) counts for new/changed words + // Update word IDF (Inverse Document Frequency) counts for new/changed words. foreach (search_dirty() as $word => $dummy) { // Get total count - $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word)); - // Apply Zipf's law to equalize the probability distribution + $total = db_query("SELECT SUM(score) FROM {search_index} WHERE word = :word", array(':word' => $word))->fetchField(); + // Apply Zipf's law to equalize the probability distribution. $total = log10(1 + 1/(max(1, $total))); - db_merge('search_total')->key(array('word' => $word))->fields(array('count' => $total))->execute(); + db_merge('search_total') + ->key(array('word' => $word)) + ->fields(array('count' => $total)) + ->execute(); } // Find words that were deleted from search_index, but are still in // search_total. We use a LEFT JOIN between the two tables and keep only the // rows which fail to join. $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL"); - while ($word = db_fetch_object($result)) { - db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword); + $or = db_or(); + foreach ($result as $word) { + $or->condition('word', $word->realword); + } + if (count($or) > 0) { + db_delete('search_total') + ->condition($or) + ->execute(); } } @@ -578,27 +596,40 @@ function search_index($sid, $type, $text search_wipe($sid, $type, TRUE); // Insert cleaned up data into dataset - db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0); + db_insert('search_dataset') + ->fields(array( + 'sid' => $sid, + 'type' => $type, + 'data' => $accum, + 'reindex' => 0, + )) + ->execute(); // Insert results into search index foreach ($results[0] as $word => $score) { // If a word already exists in the database, its score gets increased // appropriately. If not, we create a new record with the appropriate // starting score. - db_merge('search_index')->key(array( - 'word' => $word, - 'sid' => $sid, - 'type' => $type, - ))->fields(array('score' => $score))->expression('score', 'score + :score', array(':score' => $score)) - ->execute(); + db_merge('search_index') + ->key(array( + 'word' => $word, + 'sid' => $sid, + 'type' => $type, + )) + ->fields(array('score' => $score)) + ->expression('score', 'score + :score', array(':score' => $score)) + ->execute(); search_dirty($word); } unset($results[0]); // Get all previous links from this item. - $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type); + $result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = :sid AND type = :type", array( + ':sid' => $sid, + ':type' => $type + )); $links = array(); - while ($link = db_fetch_object($result)) { + foreach ($result as $link) { $links[$link->nid] = $link->caption; } @@ -608,7 +639,12 @@ function search_index($sid, $type, $text if (isset($links[$nid])) { if ($links[$nid] != $caption) { // Update the existing link and mark the node for reindexing. - db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid); + db_update('search_node_links') + ->fields(array('caption' => $caption)) + ->condition('sid', $sid) + ->condition('type', $type) + ->condition('nid', $nid) + ->execute(); search_touch_node($nid); } // Unset the link to mark it as processed. @@ -616,13 +652,24 @@ function search_index($sid, $type, $text } else { // Insert the existing link and mark the node for reindexing. - db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid); + db_insert('search_node_links') + ->fields(array( + 'caption' => $caption, + 'sid' => $sid, + 'type' => $type, + 'nid' => $nid, + )) + ->execute(); search_touch_node($nid); } } // Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing. foreach ($links as $nid => $caption) { - db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid); + db_delete('search_node_links') + ->condition('sid', $sid) + ->condition('type', $type) + ->condition('nid', $nid) + ->execute(); search_touch_node($nid); } } @@ -634,7 +681,11 @@ function search_index($sid, $type, $text * The nid of the node that needs reindexing. */ function search_touch_node($nid) { - db_query("UPDATE {search_dataset} SET reindex = %d WHERE sid = %d AND type = 'node'", REQUEST_TIME, $nid); + db_update('search_dataset') + ->fields(array('reindex' => REQUEST_TIME)) + ->condition('type', 'node') + ->condition('sid', $nid) + ->execute(); } /** @@ -642,9 +693,9 @@ function search_touch_node($nid) { */ function search_node_update_index($node) { // Transplant links to a node into the target node. - $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = %d", $node->nid); + $result = db_query("SELECT caption FROM {search_node_links} WHERE nid = :nid", array(':nid' => $node->nid)); $output = array(); - while ($link = db_fetch_object($result)) { + foreach ($result as $link) { $output[] = $link->caption; } if (count($output)) { @@ -704,7 +755,7 @@ function search_comment_unpublish($comme /** * Extract a module-specific search option from a search query. e.g. 'type:book' */ -function search_query_extract($keys, $option) { +function search_expression_extract($keys, $option) { if (preg_match('/(^| )' . $option . ':([^ ]*)( |$)/i', $keys, $matches)) { return $matches[2]; } @@ -714,8 +765,8 @@ function search_query_extract($keys, $op * Return a query with the given module-specific search option inserted in. * e.g. 'type:book'. */ -function search_query_insert($keys, $option, $value = '') { - if (search_query_extract($keys, $option)) { +function search_expression_insert($keys, $option, $value = '') { + if (search_expression_extract($keys, $option)) { $keys = trim(preg_replace('/(^| )' . $option . ':[^ ]*/i', '', $keys)); } if ($value != '') { @@ -725,281 +776,6 @@ function search_query_insert($keys, $opt } /** - * Parse a search query into SQL conditions. - * - * We build two queries that matches the dataset bodies. @See do_search for - * more about these. - * - * @param $text - * The search keys. - * @return - * A list of six elements. - * * A series of statements AND'd together which will be used to provide all - * possible matches. - * * Arguments for this query part. - * * A series of exact word matches OR'd together. - * * Arguments for this query part. - * * A boolean indicating whether this is a simple query or not. Negative - * terms, presence of both AND / OR make this FALSE. - * * A boolean indicating the presence of a lowercase or. Maybe the user - * wanted to use OR. - */ -function search_parse_query($text) { - $keys = array('positive' => array(), 'negative' => array()); - - // Tokenize query string - preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' ' . $text, $matches, PREG_SET_ORDER); - - if (count($matches) < 1) { - return NULL; - } - - // Classify tokens - $or = FALSE; - $warning = ''; - $simple = TRUE; - foreach ($matches as $match) { - $phrase = FALSE; - // Strip off phrase quotes - if ($match[2]{0} == '"') { - $match[2] = substr($match[2], 1, -1); - $phrase = TRUE; - $simple = FALSE; - } - // Simplify keyword according to indexing rules and external preprocessors - $words = search_simplify($match[2]); - // Re-explode in case simplification added more words, except when matching a phrase - $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY); - // Negative matches - if ($match[1] == '-') { - $keys['negative'] = array_merge($keys['negative'], $words); - } - // OR operator: instead of a single keyword, we store an array of all - // OR'd keywords. - elseif ($match[2] == 'OR' && count($keys['positive'])) { - $last = array_pop($keys['positive']); - // Starting a new OR? - if (!is_array($last)) { - $last = array($last); - } - $keys['positive'][] = $last; - $or = TRUE; - continue; - } - // AND operator: implied, so just ignore it - elseif ($match[2] == 'AND' || $match[2] == 'and') { - $warning = $match[2]; - continue; - } - - // Plain keyword - else { - if ($match[2] == 'or') { - $warning = $match[2]; - } - if ($or) { - // Add to last element (which is an array) - $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words); - } - else { - $keys['positive'] = array_merge($keys['positive'], $words); - } - } - $or = FALSE; - } - - // Convert keywords into SQL statements. - $query = array(); - $query2 = array(); - $arguments = array(); - $arguments2 = array(); - $matches = 0; - $simple_and = FALSE; - $simple_or = FALSE; - // Positive matches - foreach ($keys['positive'] as $key) { - // Group of ORed terms - if (is_array($key) && count($key)) { - $simple_or = TRUE; - $queryor = array(); - $any = FALSE; - foreach ($key as $or) { - list($q, $num_new_scores) = _search_parse_query($or, $arguments2); - $any |= $num_new_scores; - if ($q) { - $queryor[] = $q; - $arguments[] = "% $or %"; - } - } - if (count($queryor)) { - $query[] = '(' . implode(' OR ', $queryor) . ')'; - // A group of OR keywords only needs to match once - $matches += ($any > 0); - } - } - // Single ANDed term - else { - $simple_and = TRUE; - list($q, $num_new_scores, $num_valid_words) = _search_parse_query($key, $arguments2); - if ($q) { - $query[] = $q; - $arguments[] = "% $key %"; - if (!$num_valid_words) { - $simple = FALSE; - } - // Each AND keyword needs to match at least once - $matches += $num_new_scores; - } - } - } - if ($simple_and && $simple_or) { - $simple = FALSE; - } - // Negative matches - foreach ($keys['negative'] as $key) { - list($q) = _search_parse_query($key, $arguments2, TRUE); - if ($q) { - $query[] = $q; - $arguments[] = "% $key %"; - $simple = FALSE; - } - } - $query = implode(' AND ', $query); - - // Build word-index conditions for the first pass - $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4); - - return array($query, $arguments, $query2, $arguments2, $matches, $simple, $warning); -} - -/** - * Helper function for search_parse_query(); - */ -function _search_parse_query(&$word, &$scores, $not = FALSE) { - $num_new_scores = 0; - $num_valid_words = 0; - // Determine the scorewords of this word/phrase - if (!$not) { - $split = explode(' ', $word); - foreach ($split as $s) { - $num = is_numeric($s); - if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) { - $s = $num ? ((int)ltrim($s, '-0')) : $s; - if (!isset($scores[$s])) { - $scores[$s] = $s; - $num_new_scores++; - } - $num_valid_words++; - } - } - } - // Return matching snippet and number of added words - return array("d.data " . ($not ? 'NOT ' : '') . "LIKE '%s'", $num_new_scores, $num_valid_words); -} - -/** - * Do a query on the full-text search index for a word or words. - * - * This function is normally only called by each module that support the - * indexed search (and thus, implements hook_update_index()). - * - * Results are retrieved in two logical passes. However, the two passes are - * joined together into a single query. And in the case of most simple - * queries the second pass is not even used. - * - * The first pass selects a set of all possible matches, which has the benefit - * of also providing the exact result set for simple "AND" or "OR" searches. - * - * The second portion of the query further refines this set by verifying - * advanced text conditions (such negative or phrase matches) - * - * @param $keywords - * A search string as entered by the user. - * - * @param $type - * A string identifying the calling module. - * - * @param $join1 - * (optional) Inserted into the JOIN part of the first SQL query. - * For example "INNER JOIN {node} n ON n.nid = i.sid". - * - * @param $where1 - * (optional) Inserted into the WHERE part of the first SQL query. - * For example "(n.status > %d)". - * - * @param $arguments1 - * (optional) Extra SQL arguments belonging to the first query. - * - * @param $columns2 - * (optional) Inserted into the SELECT pat of the second query. Must contain - * a column selected as 'calculated_score'. - * defaults to 'SUM(i.relevance) AS calculated_score' - * - * @param $join2 - * (optional) Inserted into the JOIN par of the second SQL query. - * For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid" - * - * @param $arguments2 - * (optional) Extra SQL arguments belonging to the second query parameter. - * - * @param $sort_parameters - * (optional) SQL arguments for sorting the final results. - * Default: 'ORDER BY calculated_score DESC' - * - * @return - * An array of SIDs for the search results. - * - * @ingroup search - */ -function do_search($keywords, $type, $join1 = '', $where1 = '1 = 1', $arguments1 = array(), $columns2 = 'SUM(i.relevance) AS calculated_score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY calculated_score DESC') { - $query = search_parse_query($keywords); - - if ($query[2] == '') { - form_set_error('keys', format_plural(variable_get('minimum_word_size', 3), 'You must include at least one positive keyword with 1 character or more.', 'You must include at least one positive keyword with @count characters or more.')); - } - if ($query[6]) { - if ($query[6] == 'or') { - drupal_set_message(t('Search for either of the two terms with uppercase OR. For example, cats OR dogs.')); - } - } - if ($query === NULL || $query[0] == '' || $query[2] == '') { - return array(); - } - - // Build query for keyword normalization. - $conditions = "$where1 AND ($query[2]) AND i.type = '%s'"; - $arguments1 = array_merge($arguments1, $query[3], array($type)); - $join = "INNER JOIN {search_total} t ON i.word = t.word $join1"; - if (!$query[5]) { - $conditions .= " AND ($query[0])"; - $arguments1 = array_merge($arguments1, $query[1]); - $join .= " INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type"; - } - - // Calculate maximum keyword relevance, to normalize it. - $select = "SELECT SUM(i.score * t.count) AS calculated_score FROM {search_index} i $join WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d ORDER BY calculated_score DESC"; - $arguments = array_merge($arguments1, array($query[4])); - $normalize = db_result(db_query_range($select, $arguments, 0, 1)); - if (!$normalize) { - return array(); - } - $columns2 = str_replace('i.relevance', '(' . (1.0 / $normalize) . ' * i.score * t.count)', $columns2); - - // Build query to retrieve results. - $select = "SELECT i.type, i.sid, $columns2 FROM {search_index} i $join $join2 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d"; - $count_select = "SELECT COUNT(*) FROM ($select) n1"; - $arguments = array_values(array_merge($arguments2, $arguments1, array($query[4]))); - - // Do actual search query - $result = pager_query("$select $sort_parameters", 10, 0, $count_select, $arguments); - $results = array(); - while ($item = db_fetch_object($result)) { - $results[] = $item; - } - return $results; -} - -/** * Helper function for grabbing search keys. */ function search_get_keys() { @@ -1328,3 +1104,10 @@ function search_forms() { ); return $forms; } + +/** + * Returns a SearchQuery object. + */ +function db_search(array $options = array()) { + return new SearchQuery($options); +} Index: modules/search/search.test =================================================================== RCS file: /cvs/drupal/drupal/modules/search/search.test,v retrieving revision 1.24 diff -u -p -r1.24 search.test --- modules/search/search.test 3 Jul 2009 19:21:54 -0000 1.24 +++ modules/search/search.test 4 Jul 2009 13:22:33 -0000 @@ -133,7 +133,11 @@ class SearchMatchTestCase extends Drupal 'xx "minim am veniam es" OR dolore' => array() ); foreach ($queries as $query => $results) { - $set = do_search($query, SEARCH_TYPE); + $result = db_search() + ->searchExpression($query, SEARCH_TYPE) + ->execute(); + + $set = $result ? $result->fetchAll() : array(); $this->_testQueryMatching($query, $set, $results); $this->_testQueryScores($query, $set, $results); } @@ -148,7 +152,11 @@ class SearchMatchTestCase extends Drupal 'germany' => array(11, 12), ); foreach ($queries as $query => $results) { - $set = do_search($query, SEARCH_TYPE_2); + $result = db_search() + ->searchExpression($query, SEARCH_TYPE_2) + ->execute(); + + $set = $result ? $result->fetchAll() : array(); $this->_testQueryMatching($query, $set, $results); $this->_testQueryScores($query, $set, $results); }