? geshifilter_process_0.txt
Index: geshifilter.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/geshifilter/geshifilter.module,v
retrieving revision 1.18.2.20
diff -u -u -p -r1.18.2.20 geshifilter.module
--- geshifilter.module	8 Oct 2007 08:59:37 -0000	1.18.2.20
+++ geshifilter.module	12 Oct 2007 23:37:34 -0000
@@ -298,6 +298,9 @@ function geshifilter_filter($op, $delta 
     case 'description':
       return t('Enables syntax highlighting of inline/block source code using the GeSHi engine');
 
+    case 'prepare':
+      return _geshifilter_prepare($format, $text);
+
     case 'process':
       return _geshifilter_process($format, $text);
 
@@ -866,20 +869,13 @@ function _geshifilter_get_tags($format) 
 }
 
 /**
- * geshifilter_filter callback for processing input text.
+ * geshifilter_filter callback for preparing input text.
  */
-function _geshifilter_process($format, $text) {
-  // load GeSHi library (if not already)
-  $geshi_library = _geshifilter_check_geshi_library();
-  if (!$geshi_library['success']) {
-    drupal_set_message($geshi_library['message'], 'error');
-    return $text;
-  }
+function _geshifilter_prepare($format, $text) {
   // get the available tags
   list($generic_code_tags, $language_tags, $tag_to_lang) = _geshifilter_get_tags($format);
   $tags = array_merge($generic_code_tags, $language_tags);
   $tags_string = implode('|', $tags);
-
   // Pattern for matching "<code>...</code>" like stuff
   // Also matches "<code>...$"  where "$" refers to end of string, not end of
   // line (because PCRE_MULTILINE (modifier 'm') is not enabled), so matching
@@ -895,8 +891,46 @@ function _geshifilter_process($format, $
       $pattern = '#[<\[]('. $tags_string .')([^>\]]*)[>\]](.*?)([<\[]/\1\s*[>\]]|$)#s';
       break;
   }
+  // replace the code container tag brackets
+  // and prepare the container content (newline and angle bracket protection)
+  $text = preg_replace_callback($pattern, '_geshifilter_prepare_callback', $text);
+  return $text;
+}
 
-  return preg_replace_callback($pattern, '_geshifilter_replace_callback', $text);
+/**
+ * _geshifilter_prepare callback for preparing input text.
+ * Replaces the code tags brackets with 0xFA and 0xFB
+ * (e.g. "[code]" -> "\xFAcode\xFB").
+ * Replaces newlines with "&#10;" and angle brackets with 0xFC and 0xFD
+ * to prevent messing up by other filters like the HTML filter
+ * and line break filter.
+ * The used bytes 0xFA to 0xFD are not valid UTF-8 data thus least likely to
+ * cause problems (based on the codefilter module).
+ */
+function _geshifilter_prepare_callback($match) {
+  return "\xFA". $match[1] . $match[2] ."\xFB"
+    . str_replace(array("\r", "\n", '<', '>'), array('', '&#10;', "\xFC", "\xFD"), $match[3])
+    ."\xFA/". $match[1]. "\xFB";
+}
+
+/**
+ * geshifilter_filter callback for processing input text.
+ */
+function _geshifilter_process($format, $text) {
+  // load GeSHi library (if not already)
+  $geshi_library = _geshifilter_check_geshi_library();
+  if (!$geshi_library['success']) {
+    drupal_set_message($geshi_library['message'], 'error');
+    return $text;
+  }
+  // get the available tags
+  list($generic_code_tags, $language_tags, $tag_to_lang) = _geshifilter_get_tags($format);
+  $tags = array_merge($generic_code_tags, $language_tags);
+  $tags_string = implode('|', $tags);
+  // Pattern for matching the prepared "<code>...</code>" stuff
+  $pattern = '#\xFA('. $tags_string .')([^\xFB]*)\xFB(.*?)(\xFA/\1\xFB)#s';
+  $text = preg_replace_callback($pattern, '_geshifilter_replace_callback', $text);
+  return $text;
 }
 
 /**
@@ -912,6 +946,9 @@ function _geshifilter_replace_callback($
   $tag_attributes = $match[2];
   $source_code = $match[3];
 
+  // Undo linebreak and angle bracket escaping from preparation phase
+  $source_code = str_replace(array('&#10;', "\xFC", "\xFD"), array("\n", '<', '>'), $source_code);
+
   // get the possible tags and languages
   list($generic_code_tags, $language_tags, $tag_to_lang) = _geshifilter_get_tags($format);
   $enabled_languages = _geshifilter_get_enabled_languages();
@@ -1006,8 +1043,6 @@ function geshifilter_geshi_process($sour
   }
   // decode html entities, GeSHi will reencode these
   $source_code = decode_entities($source_code);
-  // remove newline (<br/>) and paragraph (<p>) formatting, GeSHi will add these again
-  $source_code = preg_replace('@<p>|</p>|<br\s*/>@', "", $source_code);
   // remove leading/trailing newlines
   $source_code = trim($source_code, "\n\r");
   // create GeSHi object
