diff --git a/core/lib/Drupal/Component/Utility/SafeMarkup.php b/core/lib/Drupal/Component/Utility/SafeMarkup.php
index 4d49266..ebef963 100644
--- a/core/lib/Drupal/Component/Utility/SafeMarkup.php
+++ b/core/lib/Drupal/Component/Utility/SafeMarkup.php
@@ -207,13 +207,65 @@ public static function checkAdminXss($string) {
* @see \Drupal\Component\Utility\SafeMarkup::isSafe()
*/
public static function xssFilter($string, $html_tags = NULL) {
- if (is_null($html_tags)) {
- $string = Xss::filter($string);
- }
- else {
- $string = Xss::filter($string, $html_tags);
- }
- return static::set($string);
+ return static::set(Xss::filter($string, $html_tags));
+ }
+
+ /**
+ * Safely truncates an HTML string.
+ *
+ * @param $string
+ * The string with raw HTML in it. It will be stripped of everything that
+ * can cause an XSS attack. The string provided will always be escaped
+ * regardless of whether the string is already marked as safe. It also will
+ * be truncated so that the text equivalent of it (ie the HTML tags
+ * removed) is shorter than $length. For example:
+ * @code
+ * Xss::truncate('foobar', 5)
+ * @endcode
+ * will return
+ * @code
+ * fooba
+ * @endcode
+ * @param int $max_length
+ * An upper limit on the returned string length, including trailing ellipsis
+ * if $add_ellipsis is TRUE.
+ * @param bool $wordsafe
+ * If TRUE, attempt to truncate on a word boundary. Word boundaries are
+ * spaces, punctuation, and Unicode characters used as word boundaries in
+ * non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more
+ * information. If a word boundary cannot be found that would make the length
+ * of the returned string fall within length guidelines (see parameters
+ * $max_length and $min_wordsafe_length), word boundaries are ignored.
+ * @param bool $add_ellipsis
+ * If TRUE, add '...' to the end of the truncated string (defaults to
+ * FALSE). The string length will still fall within $max_length.
+ * @param bool $min_wordsafe_length
+ * If $wordsafe is TRUE, the minimum acceptable length for truncation (before
+ * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe
+ * is FALSE. This can be used to prevent having a very short resulting string
+ * that will not be understandable. For instance, if you are truncating the
+ * string "See myverylongurlexample.com for more information" to a word-safe
+ * return length of 20, the only available word boundary within 20 characters
+ * is after the word "See", which wouldn't leave a very informative string. If
+ * you had set $min_wordsafe_length to 10, though, the function would realise
+ * that "See" alone is too short, and would then just truncate ignoring word
+ * boundaries, giving you "See myverylongurl..." (assuming you had set
+ * $add_ellipses to TRUE).
+ * @param array $html_tags
+ * An array of HTML tags.
+ *
+ * @return string
+ * An XSS-safe and truncated version of $string, or an empty string if
+ * $string is not valid UTF-8. The string is marked as safe.
+ *
+ * @see \Drupal\Component\Utility\Xss::truncate()
+ * @see \Drupal\Component\Utility\Xss::filter()
+ * @see \Drupal\Component\Utility\Xss::getAdminTagList()
+ * @see \Drupal\Component\Utility\Unicode::truncate()
+ * @see \Drupal\Component\Utility\SafeMarkup::isSafe()
+ */
+ public static function xssTruncate($string, $length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1, $html_tags = NULL) {
+ return static::set(Xss::truncate($string, $length, $wordsafe, $add_ellipsis, $min_wordsafe_length, $html_tags));
}
/**
diff --git a/core/lib/Drupal/Component/Utility/Xss.php b/core/lib/Drupal/Component/Utility/Xss.php
index a68ca2b..9657960 100644
--- a/core/lib/Drupal/Component/Utility/Xss.php
+++ b/core/lib/Drupal/Component/Utility/Xss.php
@@ -14,6 +14,17 @@
*/
class Xss {
+ const HTML_TAG_PREG = '%
+ (
+ <(?=[^a-zA-Z!/]) # a lone <
+ | # or
+ # a comment
+ | # or
+ <[^>]*(?:>|$) # a string that starts with a <, up until the > or the end of the string
+ | # or
+ > # just a >
+ )%x';
+
/**
* The list of html tags allowed by filterAdmin().
*
@@ -23,6 +34,15 @@ class Xss {
*/
protected static $adminTags = array('a', 'abbr', 'acronym', 'address', 'article', 'aside', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'command', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption', 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'mark', 'menu', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'section', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr');
+ /**
+ * The default list of html tags allowed by filter() and truncate().
+ *
+ * @var array
+ *
+ * @see \Drupal\Component\Utility\Xss::filterAdmin()
+ */
+ protected static $defaultFilterTags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd');
+
/**
* Filters HTML to prevent cross-site-scripting (XSS) vulnerabilities.
*
@@ -58,26 +78,17 @@ class Xss {
*
* @ingroup sanitization
*/
- public static function filter($string, $html_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
+ public static function filter($string, $html_tags = NULL) {
// Only operate on valid UTF-8 strings. This is necessary to prevent cross
// site scripting issues on Internet Explorer 6.
if (!Unicode::validateUtf8($string)) {
return '';
}
- // Remove NULL characters (ignored by some browsers).
- $string = str_replace(chr(0), '', $string);
- // Remove Netscape 4 JS entities.
- $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+ if (!isset($html_tags)) {
+ $html_tags = static::$defaultFilterTags;
+ }
+ $string = static::prepareFilter($string);
- // Defuse all HTML entities.
- $string = str_replace('&', '&', $string);
- // Change back only well-formed entities in our whitelist:
- // Decimal numeric entities.
- $string = preg_replace('/&#([0-9]+;)/', '\1', $string);
- // Hexadecimal numeric entities.
- $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '\1', $string);
- // Named entities.
- $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
$html_tags = array_flip($html_tags);
// Late static binding does not work inside anonymous functions.
$class = get_called_class();
@@ -88,16 +99,7 @@ public static function filter($string, $html_tags = array('a', 'em', 'strong', '
// for output. All other known XSS vectors have been filtered out by this
// point and any HTML tags remaining will have been deliberately allowed, so
// it is acceptable to call SafeMarkup::set() on the resultant string.
- return preg_replace_callback('%
- (
- <(?=[^a-zA-Z!/]) # a lone <
- | # or
- # a comment
- | # or
- <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
- | # or
- > # just a >
- )%x', $splitter, $string);
+ return preg_replace_callback(static::HTML_TAG_PREG, $splitter, $string);
}
/**
@@ -135,6 +137,90 @@ public static function filterAdmin($string) {
}
/**
+ * Safely truncates an HTML string.
+ *
+ * The resulting string rendered in a browser will be free of XSS and shorter
+ * than the specified length.
+ *
+ * @param string $string
+ * The string with raw HTML in it. It will be stripped of everything that
+ * can cause an XSS attack. The string provided will always be escaped
+ * regardless of whether the string is already marked as safe. It also will
+ * be truncated so that the text equivalent of it (ie the HTML tags
+ * removed) is shorter than $length. For example:
+ * @code
+ * Xss::truncate('foobar', 5)
+ * @endcode
+ * will return
+ * @code
+ * fooba
+ * @endcode
+ * @param int $max_length
+ * An upper limit on the returned string length, including trailing ellipsis
+ * if $add_ellipsis is TRUE.
+ * @param bool $wordsafe
+ * If TRUE, attempt to truncate on a word boundary. Word boundaries are
+ * spaces, punctuation, and Unicode characters used as word boundaries in
+ * non-Latin languages; see Unicode::PREG_CLASS_WORD_BOUNDARY for more
+ * information. If a word boundary cannot be found that would make the length
+ * of the returned string fall within length guidelines (see parameters
+ * $max_length and $min_wordsafe_length), word boundaries are ignored.
+ * @param bool $add_ellipsis
+ * If TRUE, add '...' to the end of the truncated string (defaults to
+ * FALSE). The string length will still fall within $max_length.
+ * @param bool $min_wordsafe_length
+ * If $wordsafe is TRUE, the minimum acceptable length for truncation (before
+ * adding an ellipsis, if $add_ellipsis is TRUE). Has no effect if $wordsafe
+ * is FALSE. This can be used to prevent having a very short resulting string
+ * that will not be understandable. For instance, if you are truncating the
+ * string "See myverylongurlexample.com for more information" to a word-safe
+ * return length of 20, the only available word boundary within 20 characters
+ * is after the word "See", which wouldn't leave a very informative string. If
+ * you had set $min_wordsafe_length to 10, though, the function would realise
+ * that "See" alone is too short, and would then just truncate ignoring word
+ * boundaries, giving you "See myverylongurl..." (assuming you had set
+ * $add_ellipses to TRUE).
+ * @param array $html_tags
+ * An array of HTML tags.
+ *
+ * @return string
+ * An XSS-safe and truncated version of $string, or an empty string if
+ * $string is not valid UTF-8.
+ */
+ public static function truncate($string, $length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1, $html_tags = NULL) {
+ if (!Unicode::validateUtf8($string)) {
+ return '';
+ }
+ if (!isset($html_tags)) {
+ $html_tags = static::$defaultFilterTags;
+ }
+ $string = static::prepareFilter($string);
+ $html_tags = array_flip($html_tags);
+ $class = get_called_class();
+ $pieces = preg_split(static::HTML_TAG_PREG, $string, -1 , PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
+ $final = '';
+ $remaining_length = $length;
+ foreach ($pieces as $piece) {
+ // If it is a HTML tag, pass it to the normal XSS filter pipeline.
+ if ($piece[0] == '<' || $piece[0] == '>') {
+ $piece = static::split($piece, $html_tags, $class);
+ }
+ else {
+ // Would adding this piece of text make the string too long?
+ $current_length = Unicode::strlen($piece);
+ if ($current_length > $remaining_length) {
+ $final .= Unicode::truncate($piece, $remaining_length, $wordsafe, $add_ellipsis, $min_wordsafe_length);
+ break;
+ }
+ $remaining_length -= $current_length;
+ }
+ $final .= $piece;
+ }
+ // Close the tags.
+ return Html::normalize($final);
+ }
+
+ /**
* Processes an HTML tag.
*
* @param string $string
@@ -347,4 +433,32 @@ public static function getAdminTagList() {
return static::$adminTags;
}
+ /**
+ * Prepare the string for filtering.
+ *
+ * @param $string
+ * The string with raw HTML in it.
+ *
+ * @return string
+ * The same string with NULL characters and Netsscape 4 JS entities removed
+ * and HTML entities defused.
+ */
+ protected static function prepareFilter($string) {
+ // Remove NULL characters (ignored by some browsers).
+ $string = str_replace(chr(0), '', $string);
+ // Remove Netscape 4 JS entities.
+ $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+ // Defuse all HTML entities.
+ $string = str_replace('&', '&', $string);
+ // Change back only well-formed entities in our whitelist:
+ // Decimal numeric entities.
+ $string = preg_replace('/&#([0-9]+;)/', '\1', $string);
+ // Hexadecimal numeric entities.
+ $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '\1', $string);
+ // Named entities.
+ $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+ return $string;
+ }
+
}
diff --git a/core/modules/dblog/src/Controller/DbLogController.php b/core/modules/dblog/src/Controller/DbLogController.php
index 3982da6..712b00a 100644
--- a/core/modules/dblog/src/Controller/DbLogController.php
+++ b/core/modules/dblog/src/Controller/DbLogController.php
@@ -185,9 +185,7 @@ public function overview() {
$message = $this->formatMessage($dblog);
if ($message && isset($dblog->wid)) {
// Truncate link_text to 56 chars of message.
- // @todo Reevaluate the SafeMarkup::set() in
- // https://www.drupal.org/node/2399261.
- $log_text = SafeMarkup::set(Unicode::truncate(Xss::filter($message, array()), 56, TRUE, TRUE));
+ $log_text = SafeMarkup::xssTruncate($message, 56, TRUE, TRUE, 1, ['em', 'strong', 'cite', 'code']);
$message = $this->l($log_text, new Url('dblog.event', array('event_id' => $dblog->wid), array(
'attributes' => array(
// Provide a title for the link for useful hover hints.
diff --git a/core/modules/dblog/src/Tests/DbLogTest.php b/core/modules/dblog/src/Tests/DbLogTest.php
index 820b8e0..9d5118e 100644
--- a/core/modules/dblog/src/Tests/DbLogTest.php
+++ b/core/modules/dblog/src/Tests/DbLogTest.php
@@ -7,8 +7,8 @@
namespace Drupal\dblog\Tests;
+use Drupal\Component\Utility\SafeMarkup;
use Drupal\Component\Utility\Unicode;
-use Drupal\Component\Utility\Xss;
use Drupal\Core\Logger\RfcLogLevel;
use Drupal\Core\Url;
use Drupal\dblog\Controller\DbLogController;
@@ -332,17 +332,17 @@ private function doUser() {
// Add user.
// Default display includes name and email address; if too long, the email
// address is replaced by three periods.
- $this->assertLogMessage(t('New user: %name %email.', array('%name' => $name, '%email' => '<' . $user->getEmail() . '>')), 'DBLog event was recorded: [add user]');
+ $this->assertLogMessage('New user: %name %email.', array('%name' => $name, '%email' => '<' . $user->getEmail() . '>'), 'DBLog event was recorded: [add user]');
// Login user.
- $this->assertLogMessage(t('Session opened for %name.', array('%name' => $name)), 'DBLog event was recorded: [login user]');
+ $this->assertLogMessage('Session opened for %name.', array('%name' => $name), 'DBLog event was recorded: [login user]');
// Logout user.
- $this->assertLogMessage(t('Session closed for %name.', array('%name' => $name)), 'DBLog event was recorded: [logout user]');
+ $this->assertLogMessage('Session closed for %name.', array('%name' => $name), 'DBLog event was recorded: [logout user]');
// Delete user.
$message = t('Deleted user: %name %email.', array('%name' => $name, '%email' => '<' . $user->getEmail() . '>'));
- $message_text = Unicode::truncate(Xss::filter($message, array()), 56, TRUE, TRUE);
+ $message_text = SafeMarkup::xssTruncate($message, 56, TRUE, TRUE);
// Verify that the full message displays on the details page.
$link = FALSE;
- if ($links = $this->xpath('//a[text()="' . html_entity_decode($message_text) . '"]')) {
+ if ($links = $this->xpath('//a[string()="' . html_entity_decode(strip_tags($message_text)) . '"]')) {
// Found link with the message text.
$links = array_shift($links);
foreach ($links->attributes() as $attr => $value) {
@@ -412,11 +412,11 @@ private function doNode($type) {
// Verify that node events were recorded.
// Was node content added?
- $this->assertLogMessage(t('@type: added %title.', array('@type' => $type, '%title' => $title)), 'DBLog event was recorded: [content added]');
+ $this->assertLogMessage('@type: added %title.', array('@type' => $type, '%title' => $title), 'DBLog event was recorded: [content added]');
// Was node content updated?
- $this->assertLogMessage(t('@type: updated %title.', array('@type' => $type, '%title' => $title)), 'DBLog event was recorded: [content updated]');
+ $this->assertLogMessage('@type: updated %title.', array('@type' => $type, '%title' => $title), 'DBLog event was recorded: [content updated]');
// Was node content deleted?
- $this->assertLogMessage(t('@type: deleted %title.', array('@type' => $type, '%title' => $title)), 'DBLog event was recorded: [content deleted]');
+ $this->assertLogMessage('@type: deleted %title.', array('@type' => $type, '%title' => $title), 'DBLog event was recorded: [content deleted]');
// View the database log access-denied report page.
$this->drupalGet('admin/reports/access-denied');
@@ -694,12 +694,21 @@ protected function asText(\SimpleXMLElement $element) {
* @param string $message
* The message to pass to simpletest.
*/
- protected function assertLogMessage($log_message, $message) {
- $message_text = Unicode::truncate(Xss::filter($log_message, array()), 56, TRUE, TRUE);
+ protected function assertLogMessage($log_message, $log_variables, $message) {
+ $stripped_text = SafeMarkup::xssTruncate(SafeMarkup::format($log_message, $log_variables), 56, TRUE, TRUE, 1, []);
// After \Drupal\Component\Utility\Xss::filter(), HTML entities should be
// converted to their character equivalents because assertLink() uses this
// string in xpath() to query the Document Object Model (DOM).
- $this->assertLink(html_entity_decode($message_text), 0, $message);
+ $label = html_entity_decode($stripped_text);
+ $arguments[':label'] = $label;
+ $xpath = "//a[string() = :label]";
+ foreach ($log_variables as $key => $variable) {
+ if ($key[0] == '%') {
+ $xpath .= "[em[@class='placeholder']=$key]";
+ $arguments[$key] = html_entity_decode($variable);
+ }
+ }
+ $this->assertTrue($this->xpath($xpath, $arguments), $message);
}
/**
diff --git a/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php b/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php
index a99ea45..5f6b1e4 100644
--- a/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php
+++ b/core/modules/search/src/Tests/SearchConfigSettingsFormTest.php
@@ -7,6 +7,7 @@
namespace Drupal\search\Tests;
+use Drupal\Component\Utility\SafeMarkup;
use Drupal\Core\Url;
/**
@@ -100,7 +101,7 @@ function testSearchSettingsPage() {
$text = $this->randomMachineName(5);
$this->drupalPostForm('search/node', array('keys' => $text), t('Search'));
$this->drupalGet('admin/reports/dblog');
- $this->assertLink('Searched Content for ' . $text . '.', 0, 'Search was logged');
+ $this->assertRaw('>' . SafeMarkup::format('Searched %type for %keys.', ['%type' => 'Content', '%keys' => $text]) . '', 0, 'Search was logged');
}
diff --git a/core/tests/Drupal/Tests/Component/Utility/XssTest.php b/core/tests/Drupal/Tests/Component/Utility/XssTest.php
index 7ca4be8..15387e1 100644
--- a/core/tests/Drupal/Tests/Component/Utility/XssTest.php
+++ b/core/tests/Drupal/Tests/Component/Utility/XssTest.php
@@ -8,6 +8,7 @@
namespace Drupal\Tests\Component\Utility;
use Drupal\Component\Utility\Html;
+use Drupal\Component\Utility\Unicode;
use Drupal\Component\Utility\UrlHelper;
use Drupal\Component\Utility\Xss;
use Drupal\Tests\UnitTestCase;
@@ -606,4 +607,68 @@ protected function assertNotNormalized($haystack, $needle, $message = '', $group
$this->assertTrue(strpos(strtolower(Html::decodeEntities($haystack)), $needle) === FALSE, $message, $group);
}
+
+ /**
+ * @param $value
+ * @param array $allowed_tags
+ * @return string
+ */
+ protected function convertFilterToTruncate($value, &$expected, $allowed_tags = NULL) {
+ if (!isset($allowed_tags)) {
+ $r = (new \ReflectionClass('\Drupal\Component\Utility\Xss'))->getProperty('defaultFilterTags');
+ $r->setAccessible(TRUE);
+ $allowed_tags = $r->getValue();
+ }
+ $filtered_length = Unicode::strlen(strip_tags(Xss::filter($value, $allowed_tags)));
+ if ($filtered_length) {
+ $allowed_tags[] = 'test';
+ $value .= 'fluffybunnies';
+ $expected .= 'fluffybunn';
+ $filtered_length += 10;
+ }
+ return Xss::truncate($value, $filtered_length, FALSE, FALSE, 1, $allowed_tags);
+ }
+
+ /**
+ * @covers ::truncate
+ *
+ * @dataProvider providerTestFilterXssNormalized
+ */
+ public function testTruncateXssNormalized($value, $expected, $message, array $allowed_tags = NULL) {
+ $value = $this->convertFilterToTruncate($value, $expected, $allowed_tags);
+ $this->assertNormalized($value, $expected, $message);
+ }
+
+ /**
+ * @covers ::truncate
+ *
+ * @dataProvider providerTestFilterXssNotNormalized
+ */
+ public function testTruncateXssNotNormalized($value, $expected, $message, array $allowed_tags = NULL) {
+ $value = $this->convertFilterToTruncate($value, $expected, $allowed_tags);
+ $this->assertNotNormalized($value, $expected, $message);
+ }
+
+ /**
+ * @covers ::truncate
+ *
+ * @dataProvider providerTestInvalidMultiByte
+ */
+ public function testTruncateInvalidMultiByte($value, $expected, $message) {
+ $value = $this->convertFilterToTruncate($value, $expected);
+ $this->assertEquals($value, $expected, $message);
+ }
+
+ /**
+ * @covers ::truncate
+ *
+ * @dataProvider providerTestAttributes
+ */
+ public function testTruncateAttribute($value, $expected, $message, $allowed_tags = NULL) {
+ $value = $this->convertFilterToTruncate($value, $expected, $allowed_tags);
+ // truncate, unlike filter calls normalize() so add a closing slash.
+ $expected = preg_replace('#(?$$#', ' />', $expected);
+ $this->assertEquals($expected, $value, $message);
+ }
+
}