? tests/coder_format
? tests/coder_format.zip
Index: scripts/coder_format/coder_format.inc
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/coder/scripts/coder_format/coder_format.inc,v
retrieving revision 1.2.4.5
diff -u -r1.2.4.5 coder_format.inc
--- scripts/coder_format/coder_format.inc	16 Jan 2008 22:26:33 -0000	1.2.4.5
+++ scripts/coder_format/coder_format.inc	19 Jan 2008 03:47:53 -0000
@@ -88,17 +88,7 @@
   fclose($fd);
   
   if ($code !== false) {
-    // Preprocess source code.
-    $code = coder_exec_processors($code, 'coder_preprocessor');
-    
-    // Process source code.
-    $code = coder_format_string($code);
-    
-    // Postprocess source code.
-    $code = coder_exec_processors($code, 'coder_postprocessor');
-    
-    // Fix beginning and end of code.
-    $code = coder_trim_php($code);
+    $code = coder_format_string_all($code);
     
     if ($code !== false) {
       // Write formatted source code to target file.
@@ -113,6 +103,29 @@
 }
 
 /**
+ * Formats source code according to Drupal conventions, also using
+ * post and pre-processors.
+ * 
+ * @param
+ *   $code Code to process.
+ */
+function coder_format_string_all($code) {
+  // Preprocess source code.
+  $code = coder_exec_processors($code, 'coder_preprocessor');
+  
+  // Process source code.
+  $code = coder_format_string($code);
+  
+  // Postprocess source code.
+  $code = coder_exec_processors($code, 'coder_postprocessor');
+  
+  // Fix beginning and end of code.
+  $code = coder_trim_php($code);
+  
+  return $code;
+}
+
+/**
  * Format the source code according to Drupal coding style guidelines.
  *
  * This function uses PHP's tokenizer functions.
@@ -166,6 +179,13 @@
  *   $inline_if bool
  *      Controls formatting of ? and : for inline ifs until a ; (semicolon) is
  *      processed.
+ *   $in_function_declaration
+ *      Prevents whitespace after & for function declarations, e.g.
+ *      function &foo(). Is true after function token but before first
+ *      parenthesis.
+ *   $in_parenthesis
+ *      Array of parenthesis level to whether or not the structure
+ *      is multiline.
  *
  * @param $code
  *      The source code to format.
@@ -187,11 +207,15 @@
   $in_do_while    = false;
   
   // Whitespace controls:
-  $in_object   = false;
-  $in_at       = false;
-  $in_php      = false;
-  $in_quote    = false;
-  $inline_if   = false;
+  $in_object        = false;
+  $in_at            = false;
+  $in_php           = false;
+  $in_quote         = false;
+  $inline_if        = false;
+  $in_multiline     = array();
+  
+  // Whether or not a function token was encountered:
+  $in_function_declaration = false;
   
   $result      = '';
   $lasttoken   = array(0);
@@ -220,7 +244,8 @@
               ++$braces_in_case;
             }
             ++$_coder_indent;
-            $result = rtrim($result) .' '. $text . coder_br();
+            $result = rtrim($result) .' '. $text;
+            coder_br($result);
           }
           else {
             $in_brace = true;
@@ -243,9 +268,10 @@
             $result = rtrim($result);
             if (substr($result, -1) != '{') {
               // Avoid line break in empty curly braces.
-              $result .= coder_br();
+              coder_br($result);
             }
-            $result .= $text . coder_br();
+            $result .= $text;
+            coder_br($result);
           }
           else {
             $in_brace = false;
@@ -256,7 +282,7 @@
         case ';':
           $result = rtrim($result) . $text;
           if (!$parenthesis && !$in_heredoc) {
-            $result .= coder_br();
+            coder_br($result);
           }
           else {
             $result .= ' ';
@@ -279,24 +305,33 @@
             if ($in_case) {
               ++$_coder_indent;
             }
-            $result = rtrim($result) . $text . coder_br();
+            $result = rtrim($result) . $text;
+            coder_br($result);
           }
           break;
         
         case '(':
           $result .= $text;
           ++$parenthesis;
+          // Not multiline until proven so by whitespace.
+          $in_multiline[$parenthesis] = false;
+          // Terminate function declaration, as a parenthesis indicates
+          // the beginning of the arguments. This will catch all other
+          // instances of parentheses, but in this case it's not a problem.
+          $in_function_declaration = false;
           break;
         
         case ')':
-          if (!$in_quote && !$in_heredoc && substr(rtrim($result), -1) == ',') {
-            // Fix indent of right parenthesis in multiline arrays by
+          if (!$in_quote && !$in_heredoc && (substr(rtrim($result), -1) == ',' || $in_multiline[$parenthesis])) {
+            // Fix indent of right parenthesis in multiline structures by
             // increasing indent for each parenthesis and decreasing one level.
             $_coder_indent = $_coder_indent + $parenthesis - 1;
-            $result = rtrim($result) . coder_br() . $text;
-            $_coder_indent = $_coder_indent - $parenthesis + 1;
-          }
-          else {
+            $result = rtrim($result);
+            coder_br($result);
+            $result .= $text;
+            // Undo temporary change.
+            $_coder_indent = $_coder_indent - ($parenthesis - 1);
+          } else {
             $result .= $text;
           }
           if ($parenthesis) {
@@ -340,14 +375,24 @@
             $result .= $text;
           }
           else {
-            $result = rtrim($result) .' '. $text .' ';
+            $result = rtrim($result) .' '. $text;
+            // Ampersands used to declare reference return value for
+            // functions should not have trailing space.
+            if (!$in_function_declaration) {
+              $result .= ' ';
+            }
           }
           break;
 
         case '-':
           $result = rtrim($result);
           // Do not add a space before negative numbers or variables.
-          if (substr($result, -1) == '>' || substr($result, -1) == '=' || substr($result, -1) == ',' || substr($result, -1) == ':') {
+          $c = substr($result, -1);
+          // If hyphen trails after open parentheses, it should be adjacent.
+          if ($c == '(') {
+            $result .= ltrim($text);
+          }
+          elseif ($c == '>' || $c == '=' || $c == ',' || $c == ':' || $c == '?') {
             $result .= ' '. $text;
           }
           else {
@@ -397,11 +442,11 @@
           $in_php = true;
           // Add a line break between two PHP tags.
           if (substr(rtrim($result), -2) == '?>') {
-            $result .= coder_br();
+            coder_br($result);
           }
           $result .= trim($text);
           if ($first_php_tag) {
-            $result .= coder_br();
+            coder_br($result);
             $first_php_tag = false;
           }
           else {
@@ -454,18 +499,26 @@
           // Avoid duplicate line feeds outside arrays.
           $c = $parenthesis ? 0 : 1;
           
-          for ($c, $cc = substr_count($text, chr(10)); $c < $cc; ++$c) {
+          for ($c, $cc = substr_count($text, "\n"); $c < $cc; ++$c) {
             if ($parenthesis) {
               // Add extra indent for each parenthesis in multiline definitions (f.e. arrays).
               $_coder_indent = $_coder_indent + $parenthesis;
-              $result = rtrim($result) . coder_br();
+              $result = rtrim($result);
+              coder_br($result);
               $_coder_indent = $_coder_indent - $parenthesis;
             }
             else {
               // Discard any whitespace, just insert a line break.
-              $result .= coder_br();
+              coder_br($result);
             }
           }
+          
+          // If there were newlines present inside a parenthesis,
+          // turn on multiline mode.
+          if ($cc && $parenthesis) {
+            $in_multiline[$parenthesis] = true;
+          }
+          
           break;
         
         case T_IF:
@@ -504,7 +557,9 @@
         case T_ELSE:
         case T_ELSEIF:
           // Write else and else if to a new line.
-          $result = rtrim($result) . coder_br() . trim($text) .' ';
+          $result = rtrim($result);
+          coder_br($result);
+          $result .= trim($text) .' ';
           break;
         
         case T_CASE:
@@ -515,19 +570,22 @@
             $in_case = true;
             // Add a line break between cases.
             if (substr($result, -1) != '{') {
-              $result .= coder_br();
+              coder_br($result);
             }
           }
           else {
             // Decrease current indent to align multiple cases.
             --$_coder_indent;
           }
-          $result .= coder_br() . trim($text) .' ';
+          coder_br($result);
+          $result .= trim($text) .' ';
           break;
         
         case T_BREAK:
           // Write break to a new line.
-          $result = rtrim($result) . coder_br() . trim($text);
+          $result = rtrim($result);
+          coder_br($result);
+          $result .= trim($text);
           if ($in_case && !$braces_in_case) {
             --$_coder_indent;
             $in_case = false;
@@ -546,13 +604,16 @@
           break;
         
         case T_FUNCTION:
+          $in_function_declaration = true;
+          // Fall through.
         case T_CLASS:
           // Write function and class to new lines.
           $result = rtrim($result);
           if (substr($result, -1) == '}') {
-            $result .= coder_br();
+            coder_br($result);
           }
-          $result .= coder_br() . trim($text) .' ';
+          coder_br($result);
+          $result .= trim($text) .' ';
           break;
         
         case T_EXTENDS:
@@ -600,7 +661,9 @@
         case T_DOC_COMMENT:
           if (substr($text, 0, 3) == '/**') {
             // Prepend a new line.
-            $result = rtrim($result) . coder_br() . coder_br();
+            $result = rtrim($result);
+            coder_br($result);
+            coder_br($result);
             
             // Remove carriage returns.
             $text = str_replace("\r", '', $text);
@@ -612,7 +675,8 @@
               
               // Add a new line between function description and first parameter description.
               if (!$params_fixed && substr($lines[$l], 0, 8) == '* @param' && $lines[$l - 1] != '*') {
-                $result .= ' *'. coder_br();
+                $result .= ' *';
+                coder_br($result);
                 $params_fixed = true;
               }
               else if (!$params_fixed && substr($lines[$l], 0, 8) == '* @param') {
@@ -622,7 +686,8 @@
               
               // Add a new line between function params and return.
               if (substr($lines[$l], 0, 9) == '* @return' && $lines[$l - 1] != '*') {
-                $result .= ' *'. coder_br();
+                $result .= ' *';
+                coder_br($result);
               }
               
               // Add one space indent to get ' *[...]'.
@@ -631,7 +696,7 @@
               }
               $result .= $lines[$l];
               if ($l < count($lines)) {
-                $result .= coder_br();
+                coder_br($result);
               }
             }
           }
@@ -640,12 +705,13 @@
             if ($parenthesis) {
               // Add extra indent for each parenthesis in multiline definitions (f.e. arrays).
               $_coder_indent = $_coder_indent + $parenthesis;
-              $result = rtrim($result) . coder_br();
+              $result = rtrim($result);
+              coder_br($result);
               $_coder_indent = $_coder_indent - $parenthesis;
             }
             else {
               // Discard any whitespace, just insert a line break.
-              $result .= coder_br();
+              coder_br($result);
             }
           }
           break;
@@ -655,12 +721,14 @@
           break;
         
         case T_START_HEREDOC:
-          $result .= trim($text) . coder_br(false);
+          $result .= trim($text);
+          coder_br($result, false);
           $in_heredoc = true;
           break;
         
         case T_END_HEREDOC:
-          $result .= trim($text) . coder_br(false);
+          $result .= trim($text);
+          coder_br($result, false);
           $in_heredoc = false;
           break;
         
@@ -678,20 +746,36 @@
 
 /**
  * Generate a line feed including current line indent.
- *
+ * 
+ * This function will also remove all line indentation from the
+ * previous line if no text was added.
+ * 
+ * @param $result
+ *   Result variable to append break and indent to.
  * @param $add_indent
  *   Whether to add current line indent after line feed.
- * @return
- *   The resulting string.
  */
-function coder_br($add_indent = true) {
+function coder_br(&$result, $add_indent = true) {
   global $_coder_indent;
   
+  // Scan result backwards for whitespace.
+  for ($i = strlen($result) - 1; $i >= 0; $i--) {
+    if ($result[$i] == ' ') {
+      continue;
+    }
+    if ($result[$i] == "\n") {
+      $result = rtrim($result, ' ');
+      break;
+    }
+    // Non-whitespace was encountered, no changes necessary.
+    break;
+  }
+  
   $output = "\n";
   if ($add_indent && $_coder_indent >= 0) {
     $output .= str_repeat('  ', $_coder_indent);
   }
-  return $output;
+  $result .= $output;
 }
 
 /**
@@ -852,23 +936,34 @@
     // (\n|(?X>!\);).+?,?\n) matches a line break or the first array item.
     // (.*?[^,;]) matches the rest array items.
     // ,?(\n\s*)\); matches the end of multiline array, optionally including a comma.
-    '#search' => '/(^[\040\t]*(?!\*|\/\/)[^\*\/\n]*?\sarray\()(\n|(?>!\);).+?,?\n)(.*?[^,;]),?(\n\s*\);)/ism',
-    '#replace' => '$1$2$3,$4',
+    //             1                                          2                   3           4
+    '#search' => '/(^[\040\t]*(?!\*|\/\/)[^\*\/\n]*?\sarray\()(\n|(?>!\);).+?,?\n)(.*?[^,;]),?(\n\s*\)[;])/ism',
+    '#replace_callback' => 'coder_ml_array_add_comma',
     //'#debug' => true,
   );
 }
 
+function coder_ml_array_add_comma($matches) {
+  $contents = $matches[2] . $matches[3];
+  // Add commas to all lines that are missing them, this lets us
+  // handle nested arrays too:
+  $contents = preg_replace("/(=>.*?),?(\n?\s*\))/", '$1,$2', $contents);
+  return $matches[1] . $contents .','. $matches[4];
+}
+
 function coder_preprocessor_inline_comment() {
   return array(
     '#title' => 'Move inline comments above remarked line.',
     '#weight' => 2,
     // [\040\t] matches only a space or tab.
     // (?!case) prevents matching of case statements.
+    // (?!*) prevents matching of docblock statements.
+    // (?!//) prevents matching of comment statements
     // \S prevents matching of lines containing only a comment.
     // [^:] prevents matching of URL protocols.
-    // [^;\$] prevents matching of CVS keyword Id comment and double slashes.
-    //   in quotes (f.e. "W3C//DTD").
-    '#search' => '@^([\040\t]*)(?!case)(\S.+?)[\040\t]*[^:]//\s*([^;\$]+?)$@m',
+    // [^;,\$] prevents matching of CVS keyword Id comment and double slashes
+    //   in quotes (f.e. "W3C//DTD") or in multiline arrays (f.e. "//",)
+    '#search' => '@^([\040\t]*)(?!case)(?!\*)(?!//)(\S.+?)[\040\t]*[^:]//\s*([^;,\$]+?)$@m',
     '#replace' => "$1// $3\n$1$2",
   );
 }
@@ -948,6 +1043,64 @@
   return $return;
 }
 
+function coder_postprocessor_indent_multiline_array() {
+  // Still buggy, disabled for now.
+  return array(
+    '#title' => 'Align equal signs of multiline array assignments in the same column.',
+    // ?: prevents capturing
+    // \s* initial whitespace
+    // ([\'"]).+?\1 matches a string key
+    // .+? matches any other key w/o whitespace
+    // \s*=>\s* matches associative array arrow syntax
+    // .+? matches value
+    '#search' => '/^(?:\s*(?:(?:([\'"]).+?\1|.+?)\s*=>\s*.+?|\),\s?)$){3,}/mi',
+    //'#replace_callback' => 'coder_replace_indent_multiline_array',
+  );
+}
+
+function coder_replace_indent_multiline_array($matches) {
+  // Separate out important components of the multiline array:
+  // (\s*) matches existing indent as \1
+  // (([\'"]).+?\2|\$.+?|[+\-]?(?:0x)?[0-9A-F]+) matches key as \2
+  //    ([\'"]).+?\3 matches a quoted key, quote used is \3
+  //    \.+? matches anything else
+  // \),\s*? matches a closing parenthesis in a nested array
+  // \s*=>\s* matches existing indentation and arrow to be discarded
+  // (.+?) matches value as \4
+  // {3,} requires three or more of these lines
+  // mi enables multiline and caseless mode
+  preg_match_all('/^(\s*)(?:(([\'"]).+?\3|\.+?)\s*=>\s*(.+?),?|\),)\s*?$/mi', $matches[0], $vars, PREG_SET_ORDER);
+  // Determine max key length for varying indentations:
+  $maxlengths = array();
+  foreach ($vars as $var) {
+    list(, $indent, $key) = $var;
+    if (!isset($maxlengths[$indent])) {
+      $maxlengths[$indent] = 0;
+    }
+    if (($t = strlen($key)) > $maxlengths[$indent]) {
+      $maxlengths[$indent] = $t;
+    }
+  }
+  // Reconstruct variable array declaration:
+  $return = '';
+  foreach ($vars as $var) {
+    list(, $indent, $key,, $value) = $var;
+    if ($key === null) {
+      $return .= "$indent),\n";
+      continue;
+    }
+    $spaces = str_repeat(' ', $maxlengths[$indent] - strlen($key));
+    if ($value !== 'array(') {
+      $comma = ',';
+    } else {
+      $comma = '';
+    }
+    $return .= "$indent$key$spaces => $value$comma\n";
+  }
+  $return = rtrim($return, "\n");
+  return $return;
+}
+
 function coder_postprocessor_array_rearrange() {
   // @bug common.inc, comment.module:
   // Not yet working properly 25/03/2007 sun.
