=== modified file 'includes/path.inc' --- includes/path.inc 2009-05-26 09:12:28 +0000 +++ includes/path.inc 2009-05-31 04:39:26 +0000 @@ -68,6 +68,11 @@ function drupal_lookup_path($action, $pa $no_aliases = array(); } elseif ($count > 0 && $path != '') { + // Retrieve the path alias whitelist + $whitelist = variable_get('path_whitelist', array()); + // And derive the top level component of the path + $top_level = strtok($path, '/'); + if ($action == 'alias') { // During the first call to drupal_lookup_path() per language, load the // expected system paths for the page from cache. @@ -93,6 +98,12 @@ function drupal_lookup_path($action, $pa if (isset($map[$path_language][$path])) { return $map[$path_language][$path]; } + elseif (!isset($whitelist[$top_level])) { + // Check the path whitelist, if the top_level part before the first / + // is not in the list, then there is no need to do anything further, + // it is not in the database + return FALSE; + } // For system paths which were not cached, query aliases individually. else if (!isset($no_aliases[$path_language][$path])) { // Get the most fitting result falling back with alias without language === modified file 'modules/path/path.module' --- modules/path/path.module 2009-05-27 18:33:54 +0000 +++ modules/path/path.module 2009-05-31 02:44:08 +0000 @@ -141,10 +141,49 @@ function path_set_alias($path = NULL, $a ->execute(); } } + $whitelist = variable_get('path_whitelist', array()); + $part = strtok($path, '/'); + if (!isset($whitelist[$part])) { + $whitelist[$part] = TRUE; + variable_set('path_whitelist', $whitelist); + } drupal_clear_path_cache(); } /** + * Rebuild the path alias white list. + * + * The function uses indexed lookups in a loop and progressively excludes the + * top parts already found. + * + * So the initial query entering the look is like this: + * SELECT src FROM url_alias LIMIT 1 + * Suppose this returns 'node/nnnn', then the next query executed within the look will be: + * SELECT src FROM url_alias WHERE src <> 'node' AND src NOT LIKE 'node/%' LIMIT 1 + * This excludes the 'node' part, and suppose it returns 'user', the next query becomes + * SELECT src FROM url_alias WHERE src <> 'node' AND src NOT LIKE 'node/%' + * AND src <> 'user' AND src NOT LIKE 'user/%' LIMIT 1 + * And so on. We keep going until we process all the top level parts, and store them in + * the whitelist array. + * + * @return + * A list of system path first paths which have aliases as a key of an array. + */ +function path_whitelist_rebuild() { + $whitelist = array(); + $query = db_select('url_alias', 'u'); + $query->addField('u', 'src'); + $query->range(0, 1); + while ($result = $query->execute()->fetchField()) { + $part = strtok($result, '/'); + $whitelist[$part] = TRUE; + $query->condition('src', "$part/%", 'NOT LIKE'); + $query->condition('src', $part, '<>'); + } + return $whitelist; +} + +/** * Implement hook_node_validate(). */ function path_node_validate($node, $form) { === modified file 'modules/path/path.test' --- modules/path/path.test 2009-05-16 19:07:02 +0000 +++ modules/path/path.test 2009-05-31 02:38:12 +0000 @@ -235,3 +235,33 @@ class PathLanguageTestCase extends Drupa } } +class PathWhiteListTestCase extends DrupalWebTestCase { + public static function getInfo() { + return array( + 'name' => t('Path aliases whitelist'), + 'description' => t('Confirm that the whitelist is built correctly.'), + 'group' => t('Path'), + ); + } + + function setUp() { + parent::setUp('path'); + } + + function testWhiteList() { + // Lets create a bunch of aliases. + for ($i = 0; $i < 3; $i++) { + $prefix = $this->randomName(); + $prefixes[] = $prefix; + path_set_alias($prefix, $this->randomName()); + for ($j = 0; $j < 3; $j++) { + path_set_alias($prefix ."/$j", $this->randomName()); + } + } + sort($prefixes); + $whitelist = array_keys(path_whitelist_rebuild()); + sort($whitelist); + $this->assertIdentical($prefixes, $whitelist, t('Whitelist functionality OK')); + } +} + === modified file 'modules/system/system.admin.inc' --- modules/system/system.admin.inc 2009-05-29 19:51:43 +0000 +++ modules/system/system.admin.inc 2009-05-31 02:38:07 +0000 @@ -931,6 +931,7 @@ function system_modules_submit($form, &$ drupal_set_message(t('The configuration options have been saved.')); } + module_invoke('path', 'whitelist_rebuild'); drupal_clear_css_cache(); drupal_clear_js_cache(); === modified file 'modules/system/system.install' --- modules/system/system.install 2009-05-27 18:33:54 +0000 +++ modules/system/system.install 2009-05-31 02:38:19 +0000 @@ -3496,6 +3496,14 @@ function system_update_7023() { } /** + * Generate the URL alias white list. + */ +function system_update_7024() { + drupal_load('module', 'path'); + path_whitelist_rebuild(); +} + +/** * @} End of "defgroup updates-6.x-to-7.x" * The next series of updates should start at 8000. */