Extend the registry to record class hierarchy, interfaces implementation and to parse docblocks.

From: damz <damz@dev.local.local>


---

 graph.inc    |    2 
 registry.inc |  244 ++++++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 179 insertions(+), 67 deletions(-)

diff --git includes/graph.inc includes/graph.inc
index 1d09f7e..153252f 100644
--- includes/graph.inc
+++ includes/graph.inc
@@ -27,7 +27,7 @@
  *   On return you will also have:
  *   @code
  *     $graph[1]['paths'][2] = 1;
- *     $graph[1]['paths'][3] = 2;
+ *     $graph[1]['paths'][3] = 1;
  *     $graph[2]['reverse_paths'][1] = 1;
  *     $graph[3]['reverse_paths'][1] = 1;
  *   @endcode
diff --git includes/registry.inc includes/registry.inc
index 21dc6f1..ed57792 100644
--- includes/registry.inc
+++ includes/registry.inc
@@ -37,8 +37,6 @@ function _registry_rebuild() {
   require_once DRUPAL_ROOT . '/includes/database/select.inc';
   require_once DRUPAL_ROOT . '/includes/database/' . $driver . '/query.inc';
 
-  // Reset the resources cache.
-  _registry_get_resource_name();
   // Get the list of files we are going to parse.
   $files = array();
   foreach (module_rebuild_cache() as $module) {
@@ -119,9 +117,49 @@ function _registry_parse_files($files) {
     $new_file = !isset($file['md5']);
     if ($new_file || $md5 != $file['md5']) {
       $parsed_files[] = $filename;
-      // We update the md5 after we've saved the files resources rather than here, so if we
+      $resources = _registry_parse_file($contents);
+
+      // Delete registry entries for this file, so we can insert the new resources.
+      db_delete('registry')
+        ->condition('filename', $filename)
+        ->execute();
+
+      // Save this file resources.
+      foreach ($resources as $type => $resources_of_type) {
+        foreach ($resources_of_type as $resource_name => $resource_info) {
+          $suffix = '';
+          // Collect the part of the function name after the module name,
+          // so that we can query the registry for possible hook implementations.
+          if ($type == 'function' && !empty($file['module'])) {
+            $n = strlen($file['module']);
+            if (substr($resource_name, 0, $n) == $file['module']) {
+              $suffix = substr($resource_name, $n + 1);
+            }
+          }
+
+          // Because some systems, such as cache, currently use duplicate function
+          // names in separate files an insert query cannot be used here as it
+          // would cause a key constraint violation. Instead we use a merge query.
+          // In practice this should not be an issue as those systems all initialize
+          // pre-registry and therefore are never loaded by the registry so it
+          // doesn't matter if those records in the registry table point to one
+          // filename instead of another.
+          // TODO: Convert this back to an insert query after all duplicate
+          // function names have been purged from Drupal.
+          db_merge('registry')
+            ->key(array('name' => $resource_name, 'type' => $type))
+            ->fields(array(
+              'filename' => $filename,
+              'module' => $file['module'],
+              'suffix' => $suffix,
+              'weight' => $file['weight'],
+            ))
+            ->execute();
+        }
+      }
+
+      // We only update the md5 after we've saved the files resources so if we
       // don't make it through this rebuild, the next run will reparse the file.
-      _registry_parse_file($filename, $contents, $file['module'], $file['weight']);
       $file['md5'] = $md5;
       db_merge('registry_file')
         ->key(array('filename' => $filename))
@@ -135,99 +173,99 @@ function _registry_parse_files($files) {
 /**
  * Parse a file and save its function and class listings.
  *
- * @param $filename
- *  Name of the file we are going to parse.
  * @param $contents
  *  Contents of the file we are going to parse as a string.
- * @param $module
- *   (optional) Name of the module this file belongs to.
- * @param $weight
- *   (optional) Weight of the module.
  */
-function _registry_parse_file($filename, $contents, $module = '', $weight = 0) {
+function _registry_parse_file($contents) {
   static $map = array(T_FUNCTION => 'function', T_CLASS => 'class', T_INTERFACE => 'interface');
-  // Delete registry entries for this file, so we can insert the new resources.
-  db_delete('registry')
-    ->condition('filename', $filename)
-    ->execute();
+  $resources = array(
+    'interface' => array(),
+    'class' => array(),
+    'function' => array(),
+  );
+
   $tokens = token_get_all($contents);
+  $docblock = NULL;
   while ($token = next($tokens)) {
+    // Save the last documentation block before a resource.
+    if (is_array($token) && $token[0] == T_DOC_COMMENT) {
+      $docblock = $token[1];
+    }
+
     // Ignore all tokens except for those we are specifically saving.
     if (is_array($token) && isset($map[$token[0]])) {
       $type = $map[$token[0]];
-      if ($resource_name = _registry_get_resource_name($tokens, $type)) {
-        $suffix = '';
-        // Collect the part of the function name after the module name,
-        // so that we can query the registry for possible hook implementations.
-        if ($type == 'function' && !empty($module)) {
-          $n = strlen($module);
-          if (substr($resource_name, 0, $n) == $module) {
-            $suffix = substr($resource_name, $n + 1);
-          }
-        }
-        $fields = array(
-          'filename' => $filename,
-          'module' => $module,
-          'suffix' => $suffix,
-          'weight' => $weight,
-        );
-        // Because some systems, such as cache, currently use duplicate function
-        // names in separate files an insert query cannot be used here as it
-        // would cause a key constraint violation. Instead we use a merge query.
-        // In practice this should not be an issue as those systems all initialize
-        // pre-registry and therefore are never loaded by the registry so it
-        // doesn't matter if those records in the registry table point to one
-        // filename instead of another.
-        // TODO: Convert this back to an insert query after all duplicate
-        // function names have been purged from Drupal.
-        db_merge('registry')
-          ->key(array('name' => $resource_name, 'type' => $type))
-          ->fields($fields)
-          ->execute();
-
-        // We skip the body because classes may contain functions.
-        _registry_skip_body($tokens);
-      }
+      $resource_info = _registry_get_resource_info($tokens, $type, $docblock);
+
+      // Save the resource.
+      $resources[$type][$resource_info['name']] = $resource_info;
+
+      // We skip the body because classes may contain functions.
+      _registry_skip_body($tokens);
+
+      // Clear the documentation block.
+      $docblock = NULL;
     }
   }
+
+  return $resources;
 }
 
 /**
  * Derive the name of the next resource in the token stream.
  *
- * When called without arguments, it resets its static cache.
- *
  * @param $tokens
  *  The collection of tokens for the current file being parsed.
  * @param $type
  *  The human-readable token name, either: "function", "class", or "interface".
+ * @param $docblock
+ *  The documentation block applying to that resource.
  * @return
- *  The name of the resource, or FALSE if the resource has already been processed.
+ *  The name of the resource.
  */
-function _registry_get_resource_name(&$tokens = NULL, $type = NULL) {
-  // Keep a running list of all resources we've saved so far, so that we never
-  // save one more than once.
-  static $resources;
-
-  if (!isset($tokens)) {
-    $resources = array();
-    return;
-  }
+function _registry_get_resource_info(array &$tokens, $type, $docblock) {
+  $info = array();
+
   // Determine the name of the resource.
   next($tokens); // Eat a space.
   $token = next($tokens);
   if ($token == '&') {
     $token = next($tokens);
   }
-  $resource_name = $token[1];
+  $info['name'] = $token[1];
+
+  if ($type == 'interface' || $type == 'class') {
+    _registry_skip_whitespace($tokens); // Eat a space.
+
+    $token = next($tokens);
+    while (is_array($token) && ($token[0] == T_EXTENDS || $token[0] == T_IMPLEMENTS)) {
+      if ($token[0] == T_EXTENDS) {
+        _registry_skip_whitespace($tokens); // Eat a space.
+        $info['extends'] = _registry_string_value(next($tokens));
+      }
+      else {
+        $info['implements'] = array();
+        do {
+          _registry_skip_whitespace($tokens);
+          $info['implements'][] = _registry_string_value(next($tokens));
+          _registry_skip_whitespace($tokens);
+        }
+        while (_registry_string_value(next($tokens)) == ',');
+        // Reclaim the last token.
+        prev($tokens);
+      }
+      _registry_skip_whitespace($tokens); // Eat a space.
+      $token = next($tokens);
+    }
+    // Reclaim the last token.
+    prev($tokens);
+  }
 
-  // Ensure that we never save it more than once.
-  if (isset($resources[$type][$resource_name])) {
-    return FALSE;
+  if (isset($docblock)) {
+    $info['doc'] = _registry_parse_docblock($docblock);
   }
-  $resources[$type][$resource_name] = TRUE;
 
-  return $resource_name;
+  return $info;
 }
 
 /**
@@ -258,6 +296,80 @@ function _registry_skip_body(&$tokens) {
 }
 
 /**
+ * Utility function: return the string value of a token.
+ *
+ * @param $token
+ *   The token, in the form returned by token_get_all().
+ * @return The string value of the token.
+ */
+function _registry_string_value($token) {
+  if (is_array($token)) {
+    return $token[1];
+  }
+  else {
+    return $token;
+  }
+}
+
+function _registry_skip_whitespace(&$tokens) {
+  $token = next($tokens);
+  if (!is_array($token) || $token[0] != T_WHITESPACE) {
+    prev($tokens);
+  }
+}
+
+function _registry_parse_docblock($docblock) {
+  // Remove start and end comments.
+  $docblock = substr($docblock, 2, -2);
+  // Remove all leading asterisks.
+  $docblock = trim(preg_replace('@^\s*\* *(.*)$@m', "\$1", $docblock));
+
+  $state = "summary";
+  $summary = '';
+  $description = '';
+  $blocks = array();
+  foreach (explode("\n", $docblock) as $line) {
+    if (preg_match('|^@([^ ]+) (.*)$|', $line, $matches)) {
+      $state = "blocks";
+      if (isset($current_block)) {
+        $blocks[] = $current_block;
+      }
+      $in_summary = FALSE;
+      $in_description = FALSE;
+      $current_block = array(
+        'tag' => $matches[1],
+        'text' => $matches[2],
+      );
+    }
+    else {
+      if ($state == "summary") {
+        if (strlen(trim($line)) == 0) {
+          $in_summary = FALSE;
+          $in_description = TRUE;
+          continue;
+        }
+        $summary .= $line . " ";
+      }
+      else if ($state == "description") {
+        $description .= $line . " ";
+      }
+      else if ($state == "blocks") {
+        $current_block['text'] .= "\n" . $line;
+      }
+    }
+  }
+  if (isset($current_block)) {
+    $blocks[] = $current_block;
+  }
+
+  return array(
+    'summary' => trim($summary),
+    'description' => trim($description),
+    'elements' => $blocks,
+  );
+}
+
+/**
  * @} End of "defgroup registry".
  */
 
