Index: includes/path.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/path.inc,v retrieving revision 1.37 diff -u -F^f -r1.37 path.inc --- includes/path.inc 26 May 2009 09:12:28 -0000 1.37 +++ includes/path.inc 31 May 2009 01:52:23 -0000 @@ -66,8 +66,16 @@ function drupal_lookup_path($action, $pa $count = NULL; $system_paths = array(); $no_aliases = array(); + + $whitelist = drupal_path_alias_whitelist_rebuild(); + variable_set('path_alias_whitelist', $whitelist); } elseif ($count > 0 && $path != '') { + // Retrieve the path alias whitelist + $whitelist = variable_get('path_alias_whitelist', array()); + // And derive the top level component of the path + $top_level = strtok($path, '/'); + if ($action == 'alias') { // During the first call to drupal_lookup_path() per language, load the // expected system paths for the page from cache. @@ -93,6 +101,12 @@ function drupal_lookup_path($action, $pa if (isset($map[$path_language][$path])) { return $map[$path_language][$path]; } + elseif (!isset($whitelist[$top_level])) { + // Check the path whitelist, if the top_level part before the first / + // is not in the list, then there is no need to do anything further, + // it is not in the database + return FALSE; + } // For system paths which were not cached, query aliases individually. else if (!isset($no_aliases[$path_language][$path])) { // Get the most fitting result falling back with alias without language @@ -347,3 +361,38 @@ function drupal_match_path($path, $patte function current_path() { return $_GET['q']; } + +/** + * Rebuild the path alias white list. + * + * This code may be hard to understand at first, but it will become clearer in a minute. + * + * The idea is to use indexed lookups and progressively exclude the top part + * that has been already looked up. + * + * So the initial query entering the look is like this: + * SELECT src FROM url_alias LIMIT 1 + * Suppose this returns 'node/nnnn', then the next query executed within the look will be: + * SELECT src FROM url_alias WHERE src <> 'node' AND src NOT LIKE 'node/%' LIMIT 1 + * This excludes the 'node' part, and suppose it returns 'user', the next query becomes + * SELECT src FROM url_alias WHERE src <> 'node' AND src NOT LIKE 'node/%' + * AND src <> 'user' AND src NOT LIKE 'user/%' LIMIT 1 + * And so on. We keep going until we process all the top level parts, and store them in + * the whitelist array. + * + * @return + * An array containing a white list of path aliases. + */ +function drupal_path_alias_whitelist_rebuild() { + $whitelist = array(); + $query = db_select('url_alias', 'u'); + $query->addField('u', 'src'); + $query->range(0, 1); + while ($result = $query->execute()->fetchField()) { + $part = strtok($result, '/'); + $whitelist[$part] = TRUE; + $query->condition('src', "$part/%", 'NOT LIKE'); + $query->condition('src', $part, '<>'); + } + return $whitelist; +}