diff --git a/core/lib/Drupal/Component/Utility/SafeMarkup.php b/core/lib/Drupal/Component/Utility/SafeMarkup.php index 449e078..a099069 100644 --- a/core/lib/Drupal/Component/Utility/SafeMarkup.php +++ b/core/lib/Drupal/Component/Utility/SafeMarkup.php @@ -219,16 +219,43 @@ public static function xssFilter($string, $html_tags = NULL) { /** * Safely truncates an HTML string. * - * The resulting string rendered in a browser will be free of XSS and shorter - * than the specified length. - * * @param $string - * @param $length - * @param bool|FALSE $wordsafe - * @param bool|FALSE $add_ellipsis - * @param int $min_wordsafe_length - * @param null $html_tags + * The string with raw HTML in it. It will be stripped of everything that + * can cause an XSS attack. The string provided will always be escaped + * regardless of whether the string is already marked as safe. It also will + * be truncated so that the text equivalent of it (ie the HTML tags + * removed) is shorter than $length + * @param int $max_length + * An upper limit on the returned string length, including trailing ellipsis + * if $add_ellipsis is TRUE. + * @param bool $wordsafe + * If TRUE, attempt to truncate on a word boundary. Word boundaries are + * spaces, punctuation, and Unicode characters used as word boundaries in + * non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more + * information. If a word boundary cannot be found that would make the length + * of the returned string fall within length guidelines (see parameters + * $max_length and $min_wordsafe_length), word boundaries are ignored. + * @param bool $add_ellipsis + * If TRUE, add '...' to the end of the truncated string (defaults to + * FALSE). The string length will still fall within $max_length. + * @param bool $min_wordsafe_length + * If $wordsafe is TRUE, the minimum acceptable length for truncation (before + * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe + * is FALSE. This can be used to prevent having a very short resulting string + * that will not be understandable. For instance, if you are truncating the + * string "See myverylongurlexample.com for more information" to a word-safe + * return length of 20, the only available word boundary within 20 characters + * is after the word "See", which wouldn't leave a very informative string. If + * you had set $min_wordsafe_length to 10, though, the function would realise + * that "See" alone is too short, and would then just truncate ignoring word + * boundaries, giving you "See myverylongurl..." (assuming you had set + * $add_ellipses to TRUE). + * @param array $html_tags + * An array of HTML tags. + * * @return string + * An XSS-safe and truncated version of $string, or an empty string if + * $string is not valid UTF-8. The string is marked as safe. * * @see \Drupal\Component\Utility\Xss::truncate() * @see \Drupal\Component\Utility\Xss::filter() diff --git a/core/lib/Drupal/Component/Utility/Xss.php b/core/lib/Drupal/Component/Utility/Xss.php index eace954..595567e 100644 --- a/core/lib/Drupal/Component/Utility/Xss.php +++ b/core/lib/Drupal/Component/Utility/Xss.php @@ -75,20 +75,8 @@ public static function filter($string, $html_tags = array('a', 'em', 'strong', ' if (!Unicode::validateUtf8($string)) { return ''; } - // Remove NULL characters (ignored by some browsers). - $string = str_replace(chr(0), '', $string); - // Remove Netscape 4 JS entities. - $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); + $string = static::prepareFilter($string); - // Defuse all HTML entities. - $string = str_replace('&', '&', $string); - // Change back only well-formed entities in our whitelist: - // Decimal numeric entities. - $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); - // Hexadecimal numeric entities. - $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); - // Named entities. - $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); $html_tags = array_flip($html_tags); // Late static binding does not work inside anonymous functions. $class = get_called_class(); @@ -142,24 +130,49 @@ public static function filterAdmin($string) { * The resulting string rendered in a browser will be free of XSS and shorter * than the specified length. * - * @param $string + * @param string $string * The string with raw HTML in it. It will be stripped of everything that - * can cause an XSS attack and truncated to $length. - * @param $length - * The maximum length of the rendered string. + * can cause an XSS attack. The string provided will always be escaped + * regardless of whether the string is already marked as safe. It also will + * be truncated so that the text equivalent of it (ie the HTML tags + * removed) is shorter than $length + * @param int $max_length + * An upper limit on the returned string length, including trailing ellipsis + * if $add_ellipsis is TRUE. * @param bool $wordsafe - * See \Drupal\Component\Unicode::truncate() for this parameter. + * If TRUE, attempt to truncate on a word boundary. Word boundaries are + * spaces, punctuation, and Unicode characters used as word boundaries in + * non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more + * information. If a word boundary cannot be found that would make the length + * of the returned string fall within length guidelines (see parameters + * $max_length and $min_wordsafe_length), word boundaries are ignored. * @param bool $add_ellipsis - * See \Drupal\Component\Unicode::truncate() for this parameter. - * @param int $min_wordsafe_length - * See \Drupal\Component\Unicode::truncate() for this parameter. + * If TRUE, add '...' to the end of the truncated string (defaults to + * FALSE). The string length will still fall within $max_length. + * @param bool $min_wordsafe_length + * If $wordsafe is TRUE, the minimum acceptable length for truncation (before + * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe + * is FALSE. This can be used to prevent having a very short resulting string + * that will not be understandable. For instance, if you are truncating the + * string "See myverylongurlexample.com for more information" to a word-safe + * return length of 20, the only available word boundary within 20 characters + * is after the word "See", which wouldn't leave a very informative string. If + * you had set $min_wordsafe_length to 10, though, the function would realise + * that "See" alone is too short, and would then just truncate ignoring word + * boundaries, giving you "See myverylongurl..." (assuming you had set + * $add_ellipses to TRUE). * @param array $html_tags - * See \Drupal\Component\Xss::filter() for this parameter. + * An array of HTML tags. * * @return string - * The filtered and truncated string. + * An XSS-safe and truncated version of $string, or an empty string if + * $string is not valid UTF-8. */ public static function truncate($string, $length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1, $html_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) { + if (!Unicode::validateUtf8($string)) { + return ''; + } + $string = static::prepareFilter($string); $html_tags = array_flip($html_tags); $class = get_called_class(); $pieces = preg_split(static::HTML_TAG_PREG, $string, -1 , PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); @@ -398,4 +411,32 @@ public static function getAdminTagList() { return static::$adminTags; } + /** + * Prepare the string for filtering. + * + * @param $string + * The string with raw HTML in it. + * + * @return string + * The same string with NULL characters and Netsscape 4 JS entities removed + * and HTML entities defused. + */ + protected static function prepareFilter($string) { + // Remove NULL characters (ignored by some browsers). + $string = str_replace(chr(0), '', $string); + // Remove Netscape 4 JS entities. + $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); + + // Defuse all HTML entities. + $string = str_replace('&', '&', $string); + // Change back only well-formed entities in our whitelist: + // Decimal numeric entities. + $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); + // Hexadecimal numeric entities. + $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); + // Named entities. + $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); + return $string; + } + } diff --git a/core/modules/dblog/src/Controller/DbLogController.php b/core/modules/dblog/src/Controller/DbLogController.php index ab354fd..712b00a 100644 --- a/core/modules/dblog/src/Controller/DbLogController.php +++ b/core/modules/dblog/src/Controller/DbLogController.php @@ -185,7 +185,7 @@ public function overview() { $message = $this->formatMessage($dblog); if ($message && isset($dblog->wid)) { // Truncate link_text to 56 chars of message. - $log_text = SafeMarkup::xssTruncate($message, 56, TRUE, TRUE); + $log_text = SafeMarkup::xssTruncate($message, 56, TRUE, TRUE, 1, ['em', 'strong', 'cite', 'code']); $message = $this->l($log_text, new Url('dblog.event', array('event_id' => $dblog->wid), array( 'attributes' => array( // Provide a title for the link for useful hover hints. diff --git a/core/modules/dblog/src/Tests/DbLogTest.php b/core/modules/dblog/src/Tests/DbLogTest.php index 8700fca..9d5118e 100644 --- a/core/modules/dblog/src/Tests/DbLogTest.php +++ b/core/modules/dblog/src/Tests/DbLogTest.php @@ -699,14 +699,16 @@ protected function assertLogMessage($log_message, $log_variables, $message) { // After \Drupal\Component\Utility\Xss::filter(), HTML entities should be // converted to their character equivalents because assertLink() uses this // string in xpath() to query the Document Object Model (DOM). - $xpath = "//a[string() = '" . html_entity_decode($stripped_text) . "'"; + $label = html_entity_decode($stripped_text); + $arguments[':label'] = $label; + $xpath = "//a[string() = :label]"; foreach ($log_variables as $key => $variable) { if ($key[0] == '%') { - $xpath .= ' and em[@class="placeholder" and .="' . html_entity_decode($variable) .'"]'; + $xpath .= "[em[@class='placeholder']=$key]"; + $arguments[$key] = html_entity_decode($variable); } } - $xpath .= ']'; - $this->assertTrue($this->xpath($xpath), $message); + $this->assertTrue($this->xpath($xpath, $arguments), $message); } /**