Index: codefilter.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/codefilter/codefilter.module,v
retrieving revision 1.25
diff -u -r1.25 codefilter.module
--- codefilter.module	3 Dec 2007 08:01:51 -0000	1.25
+++ codefilter.module	4 Dec 2007 14:43:58 -0000
@@ -30,6 +30,7 @@
   $text = preg_replace('@</?(br|p)\s*/?>@', '', str_replace('\"', '"', $text));
   // Undo the escaping in the prepare step
   $text = decode_entities($text);
+  $text = str_replace("\xFEn\xFF", "\n", $text);
   // Trim leading and trailing linebreaks
   $text = trim($text, "\r\n");
   // Highlight as PHP
@@ -57,7 +58,7 @@
  */
 function codefilter_process_code($text) {
   // Undo linebreak escaping
-  $text = str_replace('&#10;', "\n", $text);
+  $text = str_replace("\xFEn\xFF", "\n", $text);
   // Inline or block level piece?
   $multiline = strpos($text, "\n") !== FALSE;
   // Note, pay attention to odd preg_replace-with-/e behaviour on slashes
@@ -81,9 +82,98 @@
 }
 
 function codefilter_escape($text) {
-  // Note, pay attention to odd preg_replace-with-/e behaviour on slashes
   // Protect newlines from linebreak filter
-  return str_replace(array("\r", "\n"), array('', '&#10;'), check_plain(str_replace('\"', '"', $text)));
+  return str_replace(array("\r", "\n"), array('', "\xFEn\xFF"), check_plain($text));
+}
+
+function codefilter_prepare_find_block($text, $start_offset, $block_start, $block_end, &$chosen_start, &$chosen_end, &$chosen_starter, &$chosen_ender) {
+  $position = strpos($text, $block_start, $start_offset);
+  if ($position !== false) {
+    $position_end = strpos($text, $block_end, $position);
+    if ($position_end === false) {
+      $position = false;
+    }
+    else {
+      if ($position !== false && ($chosen_start === false || $position < $chosen_start)) {
+        $chosen_start = $position;
+        $chosen_end = $position_end;
+        $chosen_starter = $block_start;
+        $chosen_ender = $block_end;
+      }
+    }
+  }
+  return $position;
+}
+
+/**
+ * A regex cannot easily find the end of a <?php (code) ?> block as the terminating ?> may be within a string or multiline comment.
+ * Therefore, php blocks have to be looked at specially and an intelligent guess has to be made as to which ?> actually terminates
+ * the block. To do this, for each possible ending to a block, highlight_string is called to see if things after that ending are
+ * highlighted as non-php-code, and if so, then that must be the correct ending. Otherwise, we move onto the next possible ending
+ * and see what highlight_string thinks about ending there.
+ */
+function codefilter_prepare($text) {
+  $start_offset = 0;
+  
+  while(true) {
+    // Find the next php block, the next mini block and the next code block
+    $position_start = false;
+    $position_end = false;
+    $position_starter = '<?php';
+    $position_ender = '?>';
+    $position_marker = 'php';
+    
+    $position_php = codefilter_prepare_find_block($text, $start_offset, '<?php', '?>', $position_start, $position_end, $position_starter, $position_ender);
+    $position_mini = codefilter_prepare_find_block($text, $start_offset, '<%', '%>', $position_start, $position_end, $position_starter, $position_ender);
+    $position_code = codefilter_prepare_find_block($text, $start_offset, '<code>', '</code>', $position_start, $position_end, $position_starter, $position_ender);
+    
+    if ($position_php === false && $position_mini === false && $position_code === false)
+      break;
+    if ($chosen_starter == '<code>')
+      $position_marker = 'code';
+    
+    $start_offset = $position_end;
+    
+    while($position_end) {
+      // See what highlight_string thinks about ending the block here
+      $to_highlight = substr($text, $position_start + strlen($position_starter), $position_end - $position_start - strlen($position_starter));
+      $highlighted = highlight_string('<?php '. $to_highlight .' ?> ?>', true);
+      /*
+        Iff the block to be highlighted is valid, then the first "?>" will be styled (<span style="color: #0000BB">?&gt;</span>)
+        and the second one will be unstyled (raw), so get the positions of the last styled and the last raw occurance of "?>"
+        and if the styled appears before the raw, then the block is valid. highlight_string has an odd bug where it can remove
+        single quotes (') from a piece of code when the code is invalid, so this is checked for. For example, this call:
+        highlight_string("<?php fred('<?php moo(); ?> ?>");
+        Will remove the single quote, style the penultimate "?>" and leave the final "?>" raw.
+      */
+      $position_styled_close = strrpos($highlighted, '">?&gt;');
+      $position_raw_close = strrpos($highlighted, '?&gt;');
+      if ($position_styled_close !== false && $position_raw_close !== false && ($position_styled_close + 2) < $position_raw_close) {
+        if (substr_count($to_highlight, "'") == substr_count($highlighted, "'")) {
+          break;
+        }
+      }
+      
+      // Move onto the next possible ending
+      $position_end = strpos($text, $position_ender, $position_end + 1);
+    }
+    if ($position_end === false) {
+      // No more possible endings, but highlight_string hasn't been happy yet, so revert back to the first possible ending
+      $position_end = strpos($text, $position_ender, $position_start);
+    }
+    
+    // We've found the ending, so mark this block
+    $before_code = substr($text, 0, $position_start);
+    $position_start = $position_start + strlen($position_starter);
+    $the_code = substr($text, $position_start, $position_end - $position_start);
+    $position_end = $position_end + strlen($position_ender);
+    $after_code = substr($text, $position_end);
+    $text = $before_code ."\xFE". $position_marker . "\xFF". codefilter_escape($the_code) ."\xFE/". $position_marker ."\xFF";
+    $start_offset = strlen($text);
+    $text = $text . $after_code;
+  }
+
+  return $text;
 }
 
 /**
@@ -100,8 +190,7 @@
     case 'prepare':
       // Note: we use the bytes 0xFE and 0xFF to replace < > during the filtering process.
       // These bytes are not valid in UTF-8 data and thus least likely to cause problems.
-      $text = preg_replace('@<code>(.+?)</code>@se', "'\xFEcode\xFF'. codefilter_escape('\\1') .'\xFE/code\xFF'", $text);
-      $text = preg_replace('@[\[<](\?php|%)(.+?)(\?|%)[\]>]@se', "'\xFEphp\xFF'. codefilter_escape('\\2') .'\xFE/php\xFF'", $text);
+      $text = codefilter_prepare($text);
       return $text;
 
     case 'process':
