diff --git a/core/lib/Drupal/Component/Utility/String.php b/core/lib/Drupal/Component/Utility/String.php
index 970436c..761d1fe 100644
--- a/core/lib/Drupal/Component/Utility/String.php
+++ b/core/lib/Drupal/Component/Utility/String.php
@@ -50,6 +50,11 @@ public static function checkPlain($text) {
* The input $text, with all HTML entities decoded once.
*/
public static function decodeEntities($text) {
+ $class = get_called_class();
+ $decoder = function ($matches) use ($class) {
+ return $class::decode($matches, $class);
+ };
+ $text = preg_replace_callback('/([0-9]+);|([0-9a-f]+);/i', $decoder, $text);
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
@@ -132,5 +137,129 @@ public static function placeholder($text) {
return SafeMarkup::set('' . static::checkPlain($text) . '');
}
+ /**
+ * Decodes all not valid HTML numeric entities to regular UTF-8 bytes.
+ * ENT_HTML5 could be replaced with desired flag
+ *
+ * @param array $match
+ * match[1] for decimal and match[2] for hex.
+ *
+ * @param string $class
+ * The called class. This method is called from an anonymous function which
+ * breaks late static binding. See https://bugs.php.net/bug.php?id=66622 for
+ * more information.
+ *
+ * @return string
+ * The input $text, with all HTML entities decoded once.
+ */
+ protected static function decode($match, $class) {
+ $code = 0;
+ if (isset($match[2])) {
+ $code = hexdec($match[2]);
+ }
+ else {
+ $code = intval($match[1]);
+ }
+ if ($code && !$class::unicodeCpIsAllowed($code, ENT_HTML5)) {
+ return $class::utf8($code);
+ }
+ return $match[0];
+ }
+
+ /**
+ * Packs code point to UTF-8 symbol
+ *
+ * @param Integer $k
+ * code point of symbol.
+ *
+ * @return string
+ * Converted symbol.
+ */
+ protected static function utf8($k) {
+ $buf = array();
+ if ($k < 0x80) {
+ $buf[0] = $k;
+ }
+ elseif ($k < 0x800) {
+ $buf[0] = 0xc0 | ($k >> 6);
+ $buf[1] = 0x80 | ($k & 0x3f);
+ }
+ elseif ($k < 0x10000) {
+ $buf[0] = 0xe0 | ($k >> 12);
+ $buf[1] = 0x80 | (($k >> 6) & 0x3f);
+ $buf[2] = 0x80 | ($k & 0x3f);
+ }
+ else {
+ $buf[0] = 0xf0 | ($k >> 18);
+ $buf[1] = 0x80 | (($k >> 12) & 0x3f);
+ $buf[2] = 0x80 | (($k >> 6) & 0x3f);
+ $buf[3] = 0x80 | ($k & 0x3f);
+ }
+ $result = array();
+ foreach ($buf as $charcode) {
+ $result[] = chr($charcode);
+ }
+ return implode($result);
+ }
+ /**
+ * XML 1.0 HTML 4.01 HTML 5
+ * 0x09..0x0A 0x09..0x0A 0x09..0x0A
+ * 0x0D 0x0D 0x0C..0x0D
+ * 0x0020..0xD7FF 0x20..0x7E 0x20..0x7E
+ * 0x00A0..0xD7FF 0x00A0..0xD7FF
+ * 0xE000..0xFFFD 0xE000..0x10FFFF 0xE000..0xFDCF
+ * 0x010000..0x10FFFF 0xFDF0..0x10FFFF (*)
+ *
+ * (*) exclude code points where ((code & 0xFFFF) >= 0xFFFE)
+ *
+ * References:
+ * XML 1.0:
+ * HTML 4.01:
+ * HTML 5:
+ *
+ * Not sure this is the relevant part for HTML 5, though. I opted to
+ * disallow the characters that would result in a parse error when
+ * preprocessing of the input stream. See also section 8.1.3.
+ *
+ * It's unclear if XHTML 1.0 allows C1 characters. I'll opt to apply to
+ * XHTML 1.0 the same rules as for XML 1.0.
+ * See .
+ *
+ * @param Integer $uni_cp
+ * code point value of symbol.
+ *
+ * @param Constant $document_type
+ * predefined document type in PHP.
+ *
+ * @return Boolean
+ * Returns TRUE if code point allowed.
+ */
+
+ protected static function unicodeCpIsAllowed($uni_cp, $document_type) {
+ switch ($document_type) {
+ case ENT_HTML401:
+ return ($uni_cp >= 0x20 && $uni_cp <= 0x7E) ||
+ ($uni_cp == 0x0A || $uni_cp == 0x09 || $uni_cp == 0x0D) ||
+ ($uni_cp >= 0xA0 && $uni_cp <= 0xD7FF) ||
+ ($uni_cp >= 0xE000 && $uni_cp <= 0x10FFFF);
+
+ case ENT_HTML5:
+ return ($uni_cp >= 0x20 && $uni_cp <= 0x7E) ||
+ ($uni_cp >= 0x09 && $uni_cp <= 0x0D && $uni_cp != 0x0B) || /* form feed U+0C allowed */
+ ($uni_cp >= 0xA0 && $uni_cp <= 0xD7FF) ||
+ ($uni_cp >= 0xE000 && $uni_cp <= 0x10FFFF &&
+ (($uni_cp & 0xFFFF) < 0xFFFE) && /* last two of each plane (nonchars) disallowed */
+ ($uni_cp < 0xFDD0 || $uni_cp > 0xFDEF)); /* U+FDD0-U+FDEF (nonchars) disallowed */
+
+ case ENT_XHTML:
+ case ENT_XML1:
+ return ($uni_cp >= 0x20 && $uni_cp <= 0xD7FF) ||
+ ($uni_cp == 0x0A || $uni_cp == 0x09 || $uni_cp == 0x0D) ||
+ ($uni_cp >= 0xE000 && $uni_cp <= 0x10FFFF && $uni_cp != 0xFFFE && $uni_cp != 0xFFFF);
+
+ default:
+ return 1;
+ }
+ }
}
diff --git a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php
index 29209b6..556dbf2 100644
--- a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php
+++ b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php
@@ -147,9 +147,7 @@ public function providerTestFilterXss() {
// @see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Spaces_and_meta_chars_before_the_JavaScript_in_images_for_XSS
// @fixme This dataset currently fails under 5.4 because of
// https://drupal.org/node/1210798. Restore after it's fixed.
- if (version_compare(PHP_VERSION, '5.4.0', '<')) {
- $data[] = array('', '');
- }
+ $data[] = array('', '');
// Non-alpha-non-digit XSS.
// @see https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Non-alpha-non-digit_XSS