diff -r -u -F '^function' geshifilter/geshifilter.module geshifilter_mod/geshifilter.module --- geshifilter/geshifilter.module 2007-09-30 10:02:34.000000000 -0500 +++ geshifilter_mod/geshifilter.module 2007-10-06 18:29:20.000000000 -0500 @@ -298,6 +298,9 @@ function geshifilter_filter($op, $delta case 'description': return t('Enables syntax highlighting of inline/block source code using the GeSHi engine'); + case 'prepare': + return _geshifilter_prepare($format, $text); + case 'process': return _geshifilter_process($format, $text); @@ -866,6 +869,44 @@ function _geshifilter_get_tags($format) } /** + * geshifilter_filter callback for preparing input text. + */ +function _geshifilter_prepare($format, $text) { + // get the available tags + list($generic_code_tags, $language_tags, $tag_to_lang) = _geshifilter_get_tags($format); + $tags = array_merge($generic_code_tags, $language_tags); + $tags_string = implode('|', $tags); + + // Pattern for matching "..." like stuff + // Also matches "...$" where "$" refers to end of string, not end of + // line (because PCRE_MULTILINE (modifier 'm') is not enabled), so matching + // still works when teaser view trims inside the source code. + switch (_geshifilter_brackets($format)) { + case GESHIFILTER_BRACKETS_ANGLE: + $pattern = '#<('. $tags_string .')([^>]*)>(.*?)(|$)#s'; + $text = preg_replace_callback($pattern, '_geshifilter_prepare_callback', $text); + break; + case GESHIFILTER_BRACKETS_BOTH: + $pattern = '#[<\[]('. $tags_string .')([^>\]]*)[>\]](.*?)([<\[]/\1\s*[>\]]|$)#s'; + $text = preg_replace_callback($pattern, '_geshifilter_prepare_callback', $text); + break; + } + + return $text; +} + +/** + * geshifilter_filter callback for preparing input text with GESHIFILTER_BRACKETS_ANGLE. + * We need to strip out < and > characters so that other filters don't get confused + * and think that portions of the text are HTML tags that are not allowed (and therefore + * removed). We use the bytes 0xFE and 0xFF to replace < > during the filtering process. + * These bytes are not valid in UTF-8 data and thus least likely to cause problems. + */ +function _geshifilter_prepare_callback($match) { + return "\xFE". $match[1] . $match[2] ."\xFF". geshifilter_escape($match[3]) ."\xFE/". $match[1]. "\xFF"; +} + +/** * geshifilter_filter callback for processing input text. */ function _geshifilter_process($format, $text) { @@ -886,13 +927,13 @@ function _geshifilter_process($format, $ // still works when teaser view trims inside the source code. switch (_geshifilter_brackets($format)) { case GESHIFILTER_BRACKETS_ANGLE: - $pattern = '#<('. $tags_string .')([^>]*)>(.*?)(|$)#s'; + $pattern = '#\xFE('. $tags_string .')([^\xFF]*)\xFF(.*?)(\xFE/\1\s*\xFF|$)#s'; break; case GESHIFILTER_BRACKETS_SQUARE: $pattern = '#\[('. $tags_string .')([^\]]*)\](.*?)(\[/\1\s*\]|$)#s'; break; case GESHIFILTER_BRACKETS_BOTH: - $pattern = '#[<\[]('. $tags_string .')([^>\]]*)[>\]](.*?)([<\[]/\1\s*[>\]]|$)#s'; + $pattern = '#[\xFE\[]('. $tags_string .')([^\xFF\]]*)[\xFF\]](.*?)([\xFE\[]/\1\s*[\xFF\]]|$)#s'; break; } @@ -911,6 +952,9 @@ function _geshifilter_replace_callback($ $tag_name = $match[1]; $tag_attributes = $match[2]; $source_code = $match[3]; + + // Undo linebreak escaping + $source_code = str_replace(' ', "\n", $source_code); // get the possible tags and languages list($generic_code_tags, $language_tags, $tag_to_lang) = _geshifilter_get_tags($format); @@ -1059,3 +1103,10 @@ function geshifilter_clear_available_lan drupal_set_message('Available languages cache cleared.'); drupal_goto(); } + +function geshifilter_escape($text) { + // Note, pay attention to odd preg_replace-with-/e behaviour on slashes + // Protect newlines from linebreak filter + // Copied from codefilter_escape() in codefilter.module + return str_replace(array("\r", "\n"), array('', ' '), check_plain(str_replace('\"', '"', $text))); +} \ No newline at end of file