Index: includes/unicode.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/unicode.inc,v retrieving revision 1.29.2.1 diff -u -p -r1.29.2.1 unicode.inc --- includes/unicode.inc 30 Jun 2010 09:04:38 -0000 1.29.2.1 +++ includes/unicode.inc 4 Nov 2010 09:26:13 -0000 @@ -335,30 +335,30 @@ function _mime_header_decode($matches) { * The input $text, with all HTML entities decoded once. */ function decode_entities($text, $exclude = array()) { - static $table; - // We store named entities in a table for quick processing. - if (!isset($table)) { - // Get all named HTML entities. - $table = array_flip(get_html_translation_table(HTML_ENTITIES)); - // PHP gives us ISO-8859-1 data, we need UTF-8. - $table = array_map('utf8_encode', $table); - // Add apostrophe (XML) - $table['''] = "'"; + static $html_entities; + if (!isset($html_entities)) { + include_once './includes/unicode.entities.inc'; } - $newtable = array_diff($table, $exclude); - // Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice. - return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text); + // Flip the exclude list so that we can do quick lookups later. + $exclude = array_flip($exclude); + + // Use a regexp to select all entities in one pass, to avoid decoding + // double-escaped entities twice. The PREG_REPLACE_EVAL modifier 'e' is + // being used to allow for a callback (see + // http://php.net/manual/en/reference.pcre.pattern.modifiers). + return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $html_entities, $exclude)', $text); } /** * Helper function for decode_entities */ -function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) { +function _decode_entities($prefix, $codepoint, $original, &$html_entities, &$exclude) { // Named entity if (!$prefix) { - if (isset($table[$original])) { - return $table[$original]; + // A named entity not in the exclude list. + if (isset($html_entities[$original]) && !isset($exclude[$html_entities[$original]])) { + return $html_entities[$original]; } else { return $original; @@ -392,7 +392,7 @@ function _decode_entities($prefix, $code . chr(0x80 | ( $codepoint & 0x3F)); } // Check for excluded characters - if (in_array($str, $exclude)) { + if (isset($exclude[$str])) { return $original; } else { Index: includes/unicode.entities.inc =================================================================== RCS file: includes/unicode.entities.inc diff -N includes/unicode.entities.inc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ includes/unicode.entities.inc 7 Jan 2009 19:09:27 -0000 @@ -0,0 +1,266 @@ + 'Á', + 'á' => 'á', + 'Â' => 'Â', + 'â' => 'â', + '´' => '´', + 'Æ' => 'Æ', + 'æ' => 'æ', + 'À' => 'À', + 'à' => 'à', + 'ℵ' => 'ℵ', + 'Α' => 'Α', + 'α' => 'α', + '&' => '&', + '∧' => '∧', + '∠' => '∠', + 'Å' => 'Å', + 'å' => 'å', + '≈' => '≈', + 'Ã' => 'Ã', + 'ã' => 'ã', + 'Ä' => 'Ä', + 'ä' => 'ä', + '„' => '„', + 'Β' => 'Β', + 'β' => 'β', + '¦' => '¦', + '•' => '•', + '∩' => '∩', + 'Ç' => 'Ç', + 'ç' => 'ç', + '¸' => '¸', + '¢' => '¢', + 'Χ' => 'Χ', + 'χ' => 'χ', + 'ˆ' => 'ˆ', + '♣' => '♣', + '≅' => '≅', + '©' => '©', + '↵' => '↵', + '∪' => '∪', + '¤' => '¤', + '†' => '†', + '‡' => '‡', + '↓' => '↓', + '⇓' => '⇓', + '°' => '°', + 'Δ' => 'Δ', + 'δ' => 'δ', + '♦' => '♦', + '÷' => '÷', + 'É' => 'É', + 'é' => 'é', + 'Ê' => 'Ê', + 'ê' => 'ê', + 'È' => 'È', + 'è' => 'è', + '∅' => '∅', + ' ' => ' ', + ' ' => ' ', + 'Ε' => 'Ε', + 'ε' => 'ε', + '≡' => '≡', + 'Η' => 'Η', + 'η' => 'η', + 'Ð' => 'Ð', + 'ð' => 'ð', + 'Ë' => 'Ë', + 'ë' => 'ë', + '€' => '€', + '∃' => '∃', + 'ƒ' => 'ƒ', + '∀' => '∀', + '½' => '½', + '¼' => '¼', + '¾' => '¾', + '⁄' => '⁄', + 'Γ' => 'Γ', + 'γ' => 'γ', + '≥' => '≥', + '↔' => '↔', + '⇔' => '⇔', + '♥' => '♥', + '…' => '…', + 'Í' => 'Í', + 'í' => 'í', + 'Î' => 'Î', + 'î' => 'î', + '¡' => '¡', + 'Ì' => 'Ì', + 'ì' => 'ì', + 'ℑ' => 'ℑ', + '∞' => '∞', + '∫' => '∫', + 'Ι' => 'Ι', + 'ι' => 'ι', + '¿' => '¿', + '∈' => '∈', + 'Ï' => 'Ï', + 'ï' => 'ï', + 'Κ' => 'Κ', + 'κ' => 'κ', + 'Λ' => 'Λ', + 'λ' => 'λ', + '⟨' => '〈', + '«' => '«', + '←' => '←', + '⇐' => '⇐', + '⌈' => '⌈', + '“' => '“', + '≤' => '≤', + '⌊' => '⌊', + '∗' => '∗', + '◊' => '◊', + '‎' => '‎', + '‹' => '‹', + '‘' => '‘', + '¯' => '¯', + '—' => '—', + 'µ' => 'µ', + '·' => '·', + '−' => '−', + 'Μ' => 'Μ', + 'μ' => 'μ', + '∇' => '∇', + ' ' => ' ', + '–' => '–', + '≠' => '≠', + '∋' => '∋', + '¬' => '¬', + '∉' => '∉', + '⊄' => '⊄', + 'Ñ' => 'Ñ', + 'ñ' => 'ñ', + 'Ν' => 'Ν', + 'ν' => 'ν', + 'Ó' => 'Ó', + 'ó' => 'ó', + 'Ô' => 'Ô', + 'ô' => 'ô', + 'Œ' => 'Œ', + 'œ' => 'œ', + 'Ò' => 'Ò', + 'ò' => 'ò', + '‾' => '‾', + 'Ω' => 'Ω', + 'ω' => 'ω', + 'Ο' => 'Ο', + 'ο' => 'ο', + '⊕' => '⊕', + '∨' => '∨', + 'ª' => 'ª', + 'º' => 'º', + 'Ø' => 'Ø', + 'ø' => 'ø', + 'Õ' => 'Õ', + 'õ' => 'õ', + '⊗' => '⊗', + 'Ö' => 'Ö', + 'ö' => 'ö', + '¶' => '¶', + '∂' => '∂', + '‰' => '‰', + '⊥' => '⊥', + 'Φ' => 'Φ', + 'φ' => 'φ', + 'Π' => 'Π', + 'π' => 'π', + 'ϖ' => 'ϖ', + '±' => '±', + '£' => '£', + '′' => '′', + '″' => '″', + '∏' => '∏', + '∝' => '∝', + 'Ψ' => 'Ψ', + 'ψ' => 'ψ', + '√' => '√', + '⟩' => '〉', + '»' => '»', + '→' => '→', + '⇒' => '⇒', + '⌉' => '⌉', + '”' => '”', + 'ℜ' => 'ℜ', + '®' => '®', + '⌋' => '⌋', + 'Ρ' => 'Ρ', + 'ρ' => 'ρ', + '‏' => '‏', + '›' => '›', + '’' => '’', + '‚' => '‚', + 'Š' => 'Š', + 'š' => 'š', + '⋅' => '⋅', + '§' => '§', + '­' => '­', + 'Σ' => 'Σ', + 'σ' => 'σ', + 'ς' => 'ς', + '∼' => '∼', + '♠' => '♠', + '⊂' => '⊂', + '⊆' => '⊆', + '∑' => '∑', + '¹' => '¹', + '²' => '²', + '³' => '³', + '⊃' => '⊃', + '⊇' => '⊇', + 'ß' => 'ß', + 'Τ' => 'Τ', + 'τ' => 'τ', + '∴' => '∴', + 'Θ' => 'Θ', + 'θ' => 'θ', + 'ϑ' => 'ϑ', + ' ' => ' ', + 'Þ' => 'Þ', + 'þ' => 'þ', + '˜' => '˜', + '×' => '×', + '™' => '™', + 'Ú' => 'Ú', + 'ú' => 'ú', + '↑' => '↑', + '⇑' => '⇑', + 'Û' => 'Û', + 'û' => 'û', + 'Ù' => 'Ù', + 'ù' => 'ù', + '¨' => '¨', + 'ϒ' => 'ϒ', + 'Υ' => 'Υ', + 'υ' => 'υ', + 'Ü' => 'Ü', + 'ü' => 'ü', + '℘' => '℘', + 'Ξ' => 'Ξ', + 'ξ' => 'ξ', + 'Ý' => 'Ý', + 'ý' => 'ý', + '¥' => '¥', + 'ÿ' => 'ÿ', + 'Ÿ' => 'Ÿ', + 'Ζ' => 'Ζ', + 'ζ' => 'ζ', + '‍' => '‍', + '‌' => '‌', + '>' => '>', + '<' => '<', + '"' => '"', + // Add apostrophe (XML). + ''' => "'", +);