diff --git a/core/lib/Drupal/Component/Utility/SafeMarkup.php b/core/lib/Drupal/Component/Utility/SafeMarkup.php index 4d49266..ebef963 100644 --- a/core/lib/Drupal/Component/Utility/SafeMarkup.php +++ b/core/lib/Drupal/Component/Utility/SafeMarkup.php @@ -207,13 +207,65 @@ public static function checkAdminXss($string) { * @see \Drupal\Component\Utility\SafeMarkup::isSafe() */ public static function xssFilter($string, $html_tags = NULL) { - if (is_null($html_tags)) { - $string = Xss::filter($string); - } - else { - $string = Xss::filter($string, $html_tags); - } - return static::set($string); + return static::set(Xss::filter($string, $html_tags)); + } + + /** + * Safely truncates an HTML string. + * + * @param $string + * The string with raw HTML in it. It will be stripped of everything that + * can cause an XSS attack. The string provided will always be escaped + * regardless of whether the string is already marked as safe. It also will + * be truncated so that the text equivalent of it (ie the HTML tags + * removed) is shorter than $length. For example: + * @code + * Xss::truncate('foobar', 5) + * @endcode + * will return + * @code + * fooba + * @endcode + * @param int $max_length + * An upper limit on the returned string length, including trailing ellipsis + * if $add_ellipsis is TRUE. + * @param bool $wordsafe + * If TRUE, attempt to truncate on a word boundary. Word boundaries are + * spaces, punctuation, and Unicode characters used as word boundaries in + * non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more + * information. If a word boundary cannot be found that would make the length + * of the returned string fall within length guidelines (see parameters + * $max_length and $min_wordsafe_length), word boundaries are ignored. + * @param bool $add_ellipsis + * If TRUE, add '...' to the end of the truncated string (defaults to + * FALSE). The string length will still fall within $max_length. + * @param bool $min_wordsafe_length + * If $wordsafe is TRUE, the minimum acceptable length for truncation (before + * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe + * is FALSE. This can be used to prevent having a very short resulting string + * that will not be understandable. For instance, if you are truncating the + * string "See myverylongurlexample.com for more information" to a word-safe + * return length of 20, the only available word boundary within 20 characters + * is after the word "See", which wouldn't leave a very informative string. If + * you had set $min_wordsafe_length to 10, though, the function would realise + * that "See" alone is too short, and would then just truncate ignoring word + * boundaries, giving you "See myverylongurl..." (assuming you had set + * $add_ellipses to TRUE). + * @param array $html_tags + * An array of HTML tags. + * + * @return string + * An XSS-safe and truncated version of $string, or an empty string if + * $string is not valid UTF-8. The string is marked as safe. + * + * @see \Drupal\Component\Utility\Xss::truncate() + * @see \Drupal\Component\Utility\Xss::filter() + * @see \Drupal\Component\Utility\Xss::getAdminTagList() + * @see \Drupal\Component\Utility\Unicode::truncate() + * @see \Drupal\Component\Utility\SafeMarkup::isSafe() + */ + public static function xssTruncate($string, $length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1, $html_tags = NULL) { + return static::set(Xss::truncate($string, $length, $wordsafe, $add_ellipsis, $min_wordsafe_length, $html_tags)); } /** diff --git a/core/lib/Drupal/Component/Utility/Xss.php b/core/lib/Drupal/Component/Utility/Xss.php index a68ca2b..f695c94 100644 --- a/core/lib/Drupal/Component/Utility/Xss.php +++ b/core/lib/Drupal/Component/Utility/Xss.php @@ -14,6 +14,17 @@ */ class Xss { + const HTML_TAG_PREG = '% + ( + <(?=[^a-zA-Z!/]) # a lone < + | # or + # a comment + | # or + <[^>]*(?:>|$) # a string that starts with a <, up until the > or the end of the string + | # or + > # just a > + )%x'; + /** * The list of html tags allowed by filterAdmin(). * @@ -23,6 +34,15 @@ class Xss { */ protected static $adminTags = array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr'); + /** + * The default list of html tags allowed by filter() and truncate(). + * + * @var array + * + * @see \Drupal\Component\Utility\Xss::filterAdmin() + */ + protected static $defaultFilterTags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd'); + /** * Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities. * @@ -58,26 +78,17 @@ class Xss { * * @ingroup sanitization */ - public static function filter($string, $html_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) { + public static function filter($string, $html_tags = NULL) { // Only operate on valid UTF-8 strings. This is necessary to prevent cross // site scripting issues on Internet Explorer 6. if (!Unicode::validateUtf8($string)) { return ''; } - // Remove NULL characters (ignored by some browsers). - $string = str_replace(chr(0), '', $string); - // Remove Netscape 4 JS entities. - $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); + if (!isset($html_tags)) { + $html_tags = static::$defaultFilterTags; + } + $string = static::prepareFilter($string); - // Defuse all HTML entities. - $string = str_replace('&', '&', $string); - // Change back only well-formed entities in our whitelist: - // Decimal numeric entities. - $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); - // Hexadecimal numeric entities. - $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); - // Named entities. - $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); $html_tags = array_flip($html_tags); // Late static binding does not work inside anonymous functions. $class = get_called_class(); @@ -88,16 +99,7 @@ public static function filter($string, $html_tags = array('a', 'em', 'strong', ' // for output. All other known XSS vectors have been filtered out by this // point and any HTML tags remaining will have been deliberately allowed, so // it is acceptable to call SafeMarkup::set() on the resultant string. - return preg_replace_callback('% - ( - <(?=[^a-zA-Z!/]) # a lone < - | # or - # a comment - | # or - <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string - | # or - > # just a > - )%x', $splitter, $string); + return preg_replace_callback(static::HTML_TAG_PREG, $splitter, $string); } /** @@ -135,6 +137,90 @@ public static function filterAdmin($string) { } /** + * Safely truncates an HTML string. + * + * The resulting string rendered in a browser will be free of XSS and shorter + * than the specified length. + * + * @param string $string + * The string with raw HTML in it. It will be stripped of everything that + * can cause an XSS attack. The string provided will always be escaped + * regardless of whether the string is already marked as safe. It also will + * be truncated so that the text equivalent of it (ie the HTML tags + * removed) is shorter than $length. For example: + * @code + * Xss::truncate('foobar', 5) + * @endcode + * will return + * @code + * fooba + * @endcode + * @param int $max_length + * An upper limit on the returned string length, including trailing ellipsis + * if $add_ellipsis is TRUE. + * @param bool $wordsafe + * If TRUE, attempt to truncate on a word boundary. Word boundaries are + * spaces, punctuation, and Unicode characters used as word boundaries in + * non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more + * information. If a word boundary cannot be found that would make the length + * of the returned string fall within length guidelines (see parameters + * $max_length and $min_wordsafe_length), word boundaries are ignored. + * @param bool $add_ellipsis + * If TRUE, add '...' to the end of the truncated string (defaults to + * FALSE). The string length will still fall within $max_length. + * @param bool $min_wordsafe_length + * If $wordsafe is TRUE, the minimum acceptable length for truncation (before + * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe + * is FALSE. This can be used to prevent having a very short resulting string + * that will not be understandable. For instance, if you are truncating the + * string "See myverylongurlexample.com for more information" to a word-safe + * return length of 20, the only available word boundary within 20 characters + * is after the word "See", which wouldn't leave a very informative string. If + * you had set $min_wordsafe_length to 10, though, the function would realise + * that "See" alone is too short, and would then just truncate ignoring word + * boundaries, giving you "See myverylongurl..." (assuming you had set + * $add_ellipses to TRUE). + * @param array $html_tags + * An array of HTML tags. + * + * @return string + * An XSS-safe and truncated version of $string, or an empty string if + * $string is not valid UTF-8. + */ + public static function truncate($string, $length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1, $html_tags = NULL) { + if (!Unicode::validateUtf8($string)) { + return ''; + } + if (!isset($html_tags)) { + $html_tags = static::$defaultFilterTags; + } + $string = static::prepareFilter($string); + $html_tags = array_flip($html_tags); + $class = get_called_class(); + $pieces = preg_split(static::HTML_TAG_PREG, $string, -1 , PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); + $final = ''; + $remaining_length = $length; + foreach ($pieces as $piece) { + // If it is a HTML tag, pass it to the normal XSS filter pipeline. + if ($piece[0] == '<' || $piece[0] == '>') { + $piece = static::split($piece, $html_tags, $class); + } + else { + // Would adding this piece of text make the string too long? + $current_length = Unicode::strlen($piece); + if ($current_length > $remaining_length) { + $final .= Unicode::truncate($piece, $remaining_length, $wordsafe, $add_ellipsis, $min_wordsafe_length); + break; + } + $remaining_length -= $current_length; + } + $final .= $piece; + } + // Close the tags. + return Html::normalize($final); + } + + /** * Processes an HTML tag. * * @param string $string @@ -347,4 +433,32 @@ public static function getAdminTagList() { return static::$adminTags; } + /** + * Prepare the string for filtering. + * + * @param $string + * The string with raw HTML in it. + * + * @return string + * The same string with NULL characters and Netsscape 4 JS entities removed + * and HTML entities defused. + */ + protected static function prepareFilter($string) { + // Remove NULL characters (ignored by some browsers). + $string = str_replace(chr(0), '', $string); + // Remove Netscape 4 JS entities. + $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); + + // Defuse all HTML entities. + $string = str_replace('&', '&', $string); + // Change back only well-formed entities in our whitelist: + // Decimal numeric entities. + $string = preg_replace('/&#([0-9]+;)/', '&#\1', $string); + // Hexadecimal numeric entities. + $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string); + // Named entities. + $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string); + return $string; + } + } diff --git a/core/modules/dblog/src/Controller/DbLogController.php b/core/modules/dblog/src/Controller/DbLogController.php index 3982da6..712b00a 100644 --- a/core/modules/dblog/src/Controller/DbLogController.php +++ b/core/modules/dblog/src/Controller/DbLogController.php @@ -185,9 +185,7 @@ public function overview() { $message = $this->formatMessage($dblog); if ($message && isset($dblog->wid)) { // Truncate link_text to 56 chars of message. - // @todo Reevaluate the SafeMarkup::set() in - // https://www.drupal.org/node/2399261. - $log_text = SafeMarkup::set(Unicode::truncate(Xss::filter($message, array()), 56, TRUE, TRUE)); + $log_text = SafeMarkup::xssTruncate($message, 56, TRUE, TRUE, 1, ['em', 'strong', 'cite', 'code']); $message = $this->l($log_text, new Url('dblog.event', array('event_id' => $dblog->wid), array( 'attributes' => array( // Provide a title for the link for useful hover hints. diff --git a/core/modules/dblog/src/Tests/DbLogTest.php b/core/modules/dblog/src/Tests/DbLogTest.php index 820b8e0..9d5118e 100644 --- a/core/modules/dblog/src/Tests/DbLogTest.php +++ b/core/modules/dblog/src/Tests/DbLogTest.php @@ -7,8 +7,8 @@ namespace Drupal\dblog\Tests; +use Drupal\Component\Utility\SafeMarkup; use Drupal\Component\Utility\Unicode; -use Drupal\Component\Utility\Xss; use Drupal\Core\Logger\RfcLogLevel; use Drupal\Core\Url; use Drupal\dblog\Controller\DbLogController; @@ -332,17 +332,17 @@ private function doUser() { // Add user. // Default display includes name and email address; if too long, the email // address is replaced by three periods. - $this->assertLogMessage(t('New user: %name %email.', array('%name' => $name, '%email' => '<' . $user->getEmail() . '>')), 'DBLog event was recorded: [add user]'); + $this->assertLogMessage('New user: %name %email.', array('%name' => $name, '%email' => '<' . $user->getEmail() . '>'), 'DBLog event was recorded: [add user]'); // Login user. - $this->assertLogMessage(t('Session opened for %name.', array('%name' => $name)), 'DBLog event was recorded: [login user]'); + $this->assertLogMessage('Session opened for %name.', array('%name' => $name), 'DBLog event was recorded: [login user]'); // Logout user. - $this->assertLogMessage(t('Session closed for %name.', array('%name' => $name)), 'DBLog event was recorded: [logout user]'); + $this->assertLogMessage('Session closed for %name.', array('%name' => $name), 'DBLog event was recorded: [logout user]'); // Delete user. $message = t('Deleted user: %name %email.', array('%name' => $name, '%email' => '<' . $user->getEmail() . '>')); - $message_text = Unicode::truncate(Xss::filter($message, array()), 56, TRUE, TRUE); + $message_text = SafeMarkup::xssTruncate($message, 56, TRUE, TRUE); // Verify that the full message displays on the details page. $link = FALSE; - if ($links = $this->xpath('//a[text()="' . html_entity_decode($message_text) . '"]')) { + if ($links = $this->xpath('//a[string()="' . html_entity_decode(strip_tags($message_text)) . '"]')) { // Found link with the message text. $links = array_shift($links); foreach ($links->attributes() as $attr => $value) { @@ -412,11 +412,11 @@ private function doNode($type) { // Verify that node events were recorded. // Was node content added? - $this->assertLogMessage(t('@type: added %title.', array('@type' => $type, '%title' => $title)), 'DBLog event was recorded: [content added]'); + $this->assertLogMessage('@type: added %title.', array('@type' => $type, '%title' => $title), 'DBLog event was recorded: [content added]'); // Was node content updated? - $this->assertLogMessage(t('@type: updated %title.', array('@type' => $type, '%title' => $title)), 'DBLog event was recorded: [content updated]'); + $this->assertLogMessage('@type: updated %title.', array('@type' => $type, '%title' => $title), 'DBLog event was recorded: [content updated]'); // Was node content deleted? - $this->assertLogMessage(t('@type: deleted %title.', array('@type' => $type, '%title' => $title)), 'DBLog event was recorded: [content deleted]'); + $this->assertLogMessage('@type: deleted %title.', array('@type' => $type, '%title' => $title), 'DBLog event was recorded: [content deleted]'); // View the database log access-denied report page. $this->drupalGet('admin/reports/access-denied'); @@ -694,12 +694,21 @@ protected function asText(\SimpleXMLElement $element) { * @param string $message * The message to pass to simpletest. */ - protected function assertLogMessage($log_message, $message) { - $message_text = Unicode::truncate(Xss::filter($log_message, array()), 56, TRUE, TRUE); + protected function assertLogMessage($log_message, $log_variables, $message) { + $stripped_text = SafeMarkup::xssTruncate(SafeMarkup::format($log_message, $log_variables), 56, TRUE, TRUE, 1, []); // After \Drupal\Component\Utility\Xss::filter(), HTML entities should be // converted to their character equivalents because assertLink() uses this // string in xpath() to query the Document Object Model (DOM). - $this->assertLink(html_entity_decode($message_text), 0, $message); + $label = html_entity_decode($stripped_text); + $arguments[':label'] = $label; + $xpath = "//a[string() = :label]"; + foreach ($log_variables as $key => $variable) { + if ($key[0] == '%') { + $xpath .= "[em[@class='placeholder']=$key]"; + $arguments[$key] = html_entity_decode($variable); + } + } + $this->assertTrue($this->xpath($xpath, $arguments), $message); } /** diff --git a/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php b/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php index a99ea45..5f6b1e4 100644 --- a/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php +++ b/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php @@ -7,6 +7,7 @@ namespace Drupal\search\Tests; +use Drupal\Component\Utility\SafeMarkup; use Drupal\Core\Url; /** @@ -100,7 +101,7 @@ function testSearchSettingsPage() { $text = $this->randomMachineName(5); $this->drupalPostForm('search/node', array('keys' => $text), t('Search')); $this->drupalGet('admin/reports/dblog'); - $this->assertLink('Searched Content for ' . $text . '.', 0, 'Search was logged'); + $this->assertRaw('>' . SafeMarkup::format('Searched %type for %keys.', ['%type' => 'Content', '%keys' => $text]) . '', 0, 'Search was logged'); } diff --git a/core/tests/Drupal/Tests/Component/Utility/XssTest.php b/core/tests/Drupal/Tests/Component/Utility/XssTest.php index 7ca4be8..15387e1 100644 --- a/core/tests/Drupal/Tests/Component/Utility/XssTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/XssTest.php @@ -8,6 +8,7 @@ namespace Drupal\Tests\Component\Utility; use Drupal\Component\Utility\Html; +use Drupal\Component\Utility\Unicode; use Drupal\Component\Utility\UrlHelper; use Drupal\Component\Utility\Xss; use Drupal\Tests\UnitTestCase; @@ -606,4 +607,68 @@ protected function assertNotNormalized($haystack, $needle, $message = '', $group $this->assertTrue(strpos(strtolower(Html::decodeEntities($haystack)), $needle) === FALSE, $message, $group); } + + /** + * @param $value + * @param array $allowed_tags + * @return string + */ + protected function convertFilterToTruncate($value, &$expected, $allowed_tags = NULL) { + if (!isset($allowed_tags)) { + $r = (new \ReflectionClass('\Drupal\Component\Utility\Xss'))->getProperty('defaultFilterTags'); + $r->setAccessible(TRUE); + $allowed_tags = $r->getValue(); + } + $filtered_length = Unicode::strlen(strip_tags(Xss::filter($value, $allowed_tags))); + if ($filtered_length) { + $allowed_tags[] = 'test'; + $value .= 'fluffybunnies'; + $expected .= 'fluffybunn'; + $filtered_length += 10; + } + return Xss::truncate($value, $filtered_length, FALSE, FALSE, 1, $allowed_tags); + } + + /** + * @covers ::truncate + * + * @dataProvider providerTestFilterXssNormalized + */ + public function testTruncateXssNormalized($value, $expected, $message, array $allowed_tags = NULL) { + $value = $this->convertFilterToTruncate($value, $expected, $allowed_tags); + $this->assertNormalized($value, $expected, $message); + } + + /** + * @covers ::truncate + * + * @dataProvider providerTestFilterXssNotNormalized + */ + public function testTruncateXssNotNormalized($value, $expected, $message, array $allowed_tags = NULL) { + $value = $this->convertFilterToTruncate($value, $expected, $allowed_tags); + $this->assertNotNormalized($value, $expected, $message); + } + + /** + * @covers ::truncate + * + * @dataProvider providerTestInvalidMultiByte + */ + public function testTruncateInvalidMultiByte($value, $expected, $message) { + $value = $this->convertFilterToTruncate($value, $expected); + $this->assertEquals($value, $expected, $message); + } + + /** + * @covers ::truncate + * + * @dataProvider providerTestAttributes + */ + public function testTruncateAttribute($value, $expected, $message, $allowed_tags = NULL) { + $value = $this->convertFilterToTruncate($value, $expected, $allowed_tags); + // truncate, unlike filter calls normalize() so add a closing slash. + $expected = preg_replace('#(?$$#', ' />', $expected); + $this->assertEquals($expected, $value, $message); + } + }