core/modules/filter/filter.module | 197 +++++++++++++++++++- .../lib/Drupal/filter/Tests/FilterAPITest.php | 143 ++++++++++++++ .../lib/Drupal/filter/Tests/FilterSecurityTest.php | 19 +- core/modules/php/php.module | 1 + .../lib/Drupal/rdf/Tests/CommentAttributesTest.php | 2 +- core/profiles/standard/standard.install | 20 -- 6 files changed, 358 insertions(+), 24 deletions(-) diff --git a/core/modules/filter/filter.module b/core/modules/filter/filter.module index 6c54fae..a73e67b 100644 --- a/core/modules/filter/filter.module +++ b/core/modules/filter/filter.module @@ -6,6 +6,77 @@ */ use Drupal\Core\Template\Attribute; + + + +/** + * HTML generator filters -- MUST generate HTML. + * + * Formats using filters of this type may not be able to use WYSIWYG editors. + * + * WYSIWYG use case: ability to detect non-HTML formats, such as Markdown, where + * no WYSIWYG editor should be used because it would be impossible to go back to + * the original text format. + */ +define('FILTER_TYPE_HTML_GENERATOR', 'html generator'); + +/** + * Security filters -- strip HTML tags that the user MAY NOT use. + * + * WYSIWYG use case: all filters of this type MUST be applied, the user MAY NOT + * be presented processed text without all filters of this type. Security + * exploits might otherwise occur. + */ +define('FILTER_TYPE_SECURITY', 'security'); + +/** + * DOM transformation filters -- DOM-based, reliably reversible transformations. + * + * Filters SHOULD NOT use regular expressions when they can use DOM manipulation + * instead. This makes filters as robust as possible. + * + * WYSIWYG use case: these filters MUST NOT be applied when feeding a piece of + * text into the WYSIWYG editor. Instead, they MAY be re-implemented in + * JavaScript for each supported WYSIWYG editor. + * E.g. `` may be (reversibly!) transformed to + * ``. + */ +define('FILTER_TYPE_TRANSFORM_DOM', 'DOM transformation'); + +/** + * Text transformation filters -- text-based, irreversible transformations. + * + * WYSIWYG use case: these filters MUST NOT be applied when feeding a piece of + * text into the WYSIWYG editor. Furthermore, they MUST NOT be re-implemented + * in JavaScript. + * E.g.: the Typogrify filter would transform `WYSIWYG` and `I said "foo"!` into + * `WYSIWYG` and `I said “foo”!`, respectively. Text + * link ad systems would transform `fancy car` into something like + * `fancy car`. Neither of those text-based + * transformations make sense when doing WYSIWYG editing, nor is it possible to + * reliably reverse them. + */ +define('FILTER_TYPE_TRANSFORM_TEXT', 'text transformation'); + +/** + * All of the above implies: + * - if a format uses >=1 filters of type FILTER_TYPE_HTML_GENERATOR, no WYSIWYG + * editor can be used. + * - if a format uses >=1 filters of type FILTER_TYPE_SECURITY, and a user saves + * modified text through his WYSIWYG editor, any disallowed tags will be lost. + * This seems like a minor annoyance and appears acceptable. + * - if a format uses >=1 filters of type FILTER_TYPE_TRANSFORM_TEXT, these + * transformations will not be visible while editing, but will be visible when + * viewing. + * - if a format uses >=1 filters of type FILTER_TYPE_TRANSFORM_DOM, these + * transformations may not be visible while editing (it is up to the filter to + * implement support for the WYSIWYG editor, by re-implementing the filtering + * in JavaScript), but will be visible when viewing. + */ + + + + /** * Implements hook_cache_flush(). */ @@ -546,6 +617,99 @@ function filter_default_format($account = NULL) { } /** + * Retrieves all filter types that are used in a given text format. + * + * @param string $format_id + * A text format ID. + * @return array + * All filter types used by filters of a given text format. + */ +function filter_get_filter_types_by_format($format_id) { + $filter_types = array(); + + $filters = filter_list_format($format_id); + + // Ignore filters that are disabled. + $filters = array_filter($filters, function($filter) { + return $filter->status; + }); + + $filters_metadata = filter_get_filters(); + foreach ($filters as $filter) { + // @todo: Remove the fallback for when no filter type is defined. We don't + // fail, we just ignore these right now. + $type = FALSE; + if (empty($filters_metadata[$filter->name]['type'])) { + drupal_set_message(t('Filter !filter has no type specified!', array('!filter' => $filter->name)), 'error'); + } + else { + $type = $filters_metadata[$filter->name]['type']; + } + if ($type && !in_array($type, $filter_types)) { + $filter_types[] = $type; + } + } + + return $filter_types; +} + +/** + * Retrieve all tags that are allowed by a given text format. + * + * @param string $format_id + * A text format ID. + * @return array|TRUE + * An array of HTML tags (in "p", not "
" format) that are allowed by the
+ * text format. The empty array implies no tags are allowed. TRUE implies all
+ * tags are allowed.
+ */
+function filter_get_allowed_tags_by_format($format_id) {
+ $filters = filter_list_format($format_id);
+
+ // Ignore filters that are disabled or don't have an "allowed tags" setting.
+ $filters = array_filter($filters, function($filter) {
+ if (!$filter->status) {
+ return FALSE;
+ }
+
+ $filters_metadata = filter_get_filters();
+ if (!empty($filters_metadata[$filter->name]['allowed tags setting'])) {
+ return TRUE;
+ }
+ });
+
+ if (empty($filters)) {
+ return TRUE;
+ }
+ else {
+ // From the set of remaining filters (they were filtered by array_filter()
+ // above), collect the list of tags that is allowed by *all* filters, i.e.
+ // the intersection of all allowed tags.
+ $allowed_tags = array_reduce($filters, function($result, $filter) {
+ $allowed_tags = array();
+ $filters_metadata = filter_get_filters();
+
+ $setting_name = $filters_metadata[$filter->name]['allowed tags setting'];
+ $allowed_tags = preg_split('/\s+|<|>/', $filter->settings[$setting_name], -1, PREG_SPLIT_NO_EMPTY);
+
+ // The first filter with an "allowed tags" setting provides the initial
+ // set.
+ if (!isset($result)) {
+ return $allowed_tags;
+ }
+ // Subsequent filters with an "allowed tags" setting must be intersected
+ // with the existing set, to ensure we only end up with the tags that are
+ // allowed by *all* filters with an "allowed tags" setting.
+ else {
+ return array_intersect($result, $allowed_tags);
+ }
+ }, NULL);
+
+ return $allowed_tags;
+ }
+}
+
+/**
* Returns the ID of the fallback text format that all users have access to.
*
* The fallback text format is a regular text format in every respect, except
@@ -755,13 +919,18 @@ function filter_list_format($format_id) {
* Boolean whether to cache the filtered output in the {cache_filter} table.
* The caller may set this to FALSE when the output is already cached
* elsewhere to avoid duplicate cache lookups and storage.
+ * @param array $filter_types_to_skip
+ * An array of filter types to skip, or the empty array (default) to skip no
+ * filter types. All of the format's filters will be applied, except for
+ * filters of the types that are marked to be skipped. FILTER_TYPE_SECURITY is
+ * the only type that cannot be skipped.
*
* @return
* The filtered text.
*
* @ingroup sanitization
*/
-function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) {
+function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE, $filter_types_to_skip = array()) {
if (!isset($format_id)) {
$format_id = filter_fallback_format();
}
@@ -771,6 +940,16 @@ function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE)
return '';
}
+ // Prevent FILTER_TYPE_SECURITY from being skipped.
+ if (in_array(FILTER_TYPE_SECURITY, $filter_types_to_skip)) {
+ $filter_types_to_skip = array_diff($filter_types_to_skip, array(FILTER_TYPE_SECURITY));
+ }
+
+ // When certain filters should be skipped, don't perform caching.
+ if ($filter_types_to_skip) {
+ $cache = FALSE;
+ }
+
// Check for a cached version of this piece of text.
$cache = $cache && !empty($format->cache);
$cache_id = '';
@@ -791,6 +970,10 @@ function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE)
// Give filters the chance to escape HTML-like data such as code or formulas.
foreach ($filters as $name => $filter) {
+ // If necessary, skip filters of a certain type.
+ if ($filter_types_to_skip && in_array($filter_info[$name]['type'], $filter_types_to_skip)) {
+ continue;
+ }
if ($filter->status && isset($filter_info[$name]['prepare callback'])) {
$function = $filter_info[$name]['prepare callback'];
$text = $function($text, $filter, $format, $langcode, $cache, $cache_id);
@@ -799,6 +982,10 @@ function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE)
// Perform filtering.
foreach ($filters as $name => $filter) {
+ // If necessary, skip filters of a certain type.
+ if ($filter_types_to_skip && in_array($filter_info[$name]['type'], $filter_types_to_skip)) {
+ continue;
+ }
if ($filter->status && isset($filter_info[$name]['process callback'])) {
$function = $filter_info[$name]['process callback'];
$text = $function($text, $filter, $format, $langcode, $cache, $cache_id);
@@ -1221,10 +1408,12 @@ function theme_filter_guidelines($variables) {
function filter_filter_info() {
$filters['filter_html'] = array(
'title' => t('Limit allowed HTML tags'),
+ 'type' => FILTER_TYPE_SECURITY,
+ 'allowed tags setting' => 'allowed_html',
'process callback' => '_filter_html',
'settings callback' => '_filter_html_settings',
'default settings' => array(
- 'allowed_html' => '
<br>
and <p>
)'),
+ 'type' => FILTER_TYPE_HTML_GENERATOR,
'process callback' => '_filter_autop',
'tips callback' => '_filter_autop_tips',
);
$filters['filter_url'] = array(
'title' => t('Convert URLs into links'),
+ 'type' => FILTER_TYPE_HTML_GENERATOR,
'process callback' => '_filter_url',
'settings callback' => '_filter_url_settings',
'default settings' => array(
@@ -1247,11 +1438,13 @@ function filter_filter_info() {
);
$filters['filter_htmlcorrector'] = array(
'title' => t('Correct faulty and chopped off HTML'),
+ 'type' => FILTER_TYPE_SECURITY,
'process callback' => '_filter_htmlcorrector',
'weight' => 10,
);
$filters['filter_html_escape'] = array(
'title' => t('Display any HTML as plain text'),
+ 'type' => FILTER_TYPE_HTML_GENERATOR,
'process callback' => '_filter_html_escape',
'tips callback' => '_filter_html_escape_tips',
'weight' => -10,
diff --git a/core/modules/filter/lib/Drupal/filter/Tests/FilterAPITest.php b/core/modules/filter/lib/Drupal/filter/Tests/FilterAPITest.php
new file mode 100644
index 0000000..8101410
--- /dev/null
+++ b/core/modules/filter/lib/Drupal/filter/Tests/FilterAPITest.php
@@ -0,0 +1,143 @@
+ 'API',
+ 'description' => 'Test the behavior of the API of the Filter module.',
+ 'group' => 'Filter',
+ );
+ }
+
+ function setUp() {
+ parent::setUp();
+
+ // Create Filtered HTML format.
+ $filtered_html_format = array(
+ 'format' => 'filtered_html',
+ 'name' => 'Filtered HTML',
+ 'filters' => array(
+ // Note that the filter_html filter is of the type FILTER_TYPE_HTML_GENERATOR.
+ 'filter_url' => array(
+ 'weight' => -1,
+ 'status' => 1,
+ ),
+ // Note that the filter_html filter is of the type FILTER_TYPE_SECURITY.
+ 'filter_html' => array(
+ 'status' => 1,
+ ),
+ )
+ );
+ $filtered_html_format = (object) $filtered_html_format;
+ filter_format_save($filtered_html_format);
+
+ // Create Full HTML format.
+ $full_html_format = array(
+ 'format' => 'full_html',
+ 'name' => 'Full HTML',
+ 'weight' => 1,
+ 'filters' => array(
+ 'filter_htmlcorrector' => array(
+ 'weight' => 10,
+ 'status' => 1,
+ ),
+ ),
+ );
+ $full_html_format = (object) $full_html_format;
+ filter_format_save($full_html_format);
+ }
+
+ /**
+ * Tests the ability to apply only a subset of filters.
+ */
+ function testCheckMarkup() {
+ $text = "Text with a URL: http://drupal.org!";
+ $expected_filtered_text = "Text with evil content and a URL: http://drupal.org!";
+ $expected_filter_text_without_html_generators = "Text with evil content and a URL: http://drupal.org!";
+
+ $this->assertIdentical(
+ check_markup($text, 'filtered_html', '', FALSE, array()),
+ $expected_filtered_text,
+ t('Expected filter result.')
+ );
+ $this->assertIdentical(
+ check_markup($text, 'filtered_html', '', FALSE, array(FILTER_TYPE_HTML_GENERATOR)),
+ $expected_filter_text_without_html_generators,
+ t('Expected filter result when skipping FILTER_TYPE_HTML_GENERATOR filters.')
+ );
+ // Related to @see FilterSecurityTest.php/testSkipSecurityFilters(), but
+ // this check focuses on the ability to filter multiple filter types at once.
+ // Drupal core only ships with these two types of filters, so this is the
+ // most extensive test possible.
+ $this->assertIdentical(
+ check_markup($text, 'filtered_html', '', FALSE, array(FILTER_TYPE_SECURITY, FILTER_TYPE_HTML_GENERATOR)),
+ $expected_filter_text_without_html_generators,
+ t('Expected filter result when skipping FILTER_TYPE_HTML_GENERATOR filters, even when trying to disable filters of the FILTER_TYPE_SECURITY type.')
+ );
+ }
+
+ function testFilterFormatAPI() {
+ // Test on filtered_html.
+ $this->assertEqual(
+ filter_get_allowed_tags_by_format('filtered_html'),
+ array('p', 'br', 'a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd'),
+ t('filter_get_allowed_tags_by_format() works as expected for the filtered_html format.')
+ );
+ $this->assertEqual(
+ filter_get_filter_types_by_format('filtered_html'),
+ array(FILTER_TYPE_SECURITY, FILTER_TYPE_HTML_GENERATOR),
+ t('filter_get_filter_types_by_format() works as expected for the filtered_html format.')
+ );
+
+ // Test on full_html.
+ $this->assertEqual(
+ filter_get_allowed_tags_by_format('full_html'),
+ TRUE, // Every tag is allowed.
+ t('filter_get_allowed_tags_by_format() works as expected for the full_html format.')
+ );
+ $this->assertEqual(
+ filter_get_filter_types_by_format('full_html'),
+ array(FILTER_TYPE_SECURITY),
+ t('filter_get_filter_types_by_format() works as expected for the full_html format.')
+ );
+
+ // Test on stupid_filtered_html.
+ $stupid_filtered_html_format = array(
+ 'format' => 'stupid_filtered_html',
+ 'name' => 'Stupid Filtered HTML',
+ 'filters' => array(
+ // Note that the filter_html filter is of the type FILTER_TYPE_SECURITY.
+ 'filter_html' => array(
+ 'status' => 1,
+ 'settings' => array(
+ 'allowed_html' => '', // Nothing is allowed.
+ )
+ ),
+ )
+ );
+ $stupid_filtered_html_format = (object) $stupid_filtered_html_format;
+ filter_format_save($stupid_filtered_html_format);
+ $this->assertEqual(
+ filter_get_allowed_tags_by_format('stupid_filtered_html'),
+ array(), // No tag is allowed.
+ t('filter_get_allowed_tags_by_format() works as expected for the stupid_filtered_html format.')
+ );
+ $this->assertEqual(
+ filter_get_filter_types_by_format('stupid_filtered_html'),
+ array(FILTER_TYPE_SECURITY),
+ t('filter_get_filter_types_by_format() works as expected for the stupid_filtered_html format.')
+ );
+ }
+}
diff --git a/core/modules/filter/lib/Drupal/filter/Tests/FilterSecurityTest.php b/core/modules/filter/lib/Drupal/filter/Tests/FilterSecurityTest.php
index 3c35629..4f5252d 100644
--- a/core/modules/filter/lib/Drupal/filter/Tests/FilterSecurityTest.php
+++ b/core/modules/filter/lib/Drupal/filter/Tests/FilterSecurityTest.php
@@ -24,7 +24,7 @@ class FilterSecurityTest extends WebTestBase {
public static function getInfo() {
return array(
'name' => 'Security',
- 'description' => 'Test the behavior of check_markup() when a filter or text format vanishes.',
+ 'description' => 'Test the behavior of check_markup() when a filter or text format vanishes, or when check_markup() is called in such a way that it is instructed to skip all filters of the "FILTER_TYPE_SECURITY" type.',
'group' => 'Filter',
);
}
@@ -39,6 +39,12 @@ class FilterSecurityTest extends WebTestBase {
$filtered_html_format = array(
'format' => 'filtered_html',
'name' => 'Filtered HTML',
+ 'filters' => array(
+ // Note that the filter_html filter is of the type FILTER_TYPE_SECURITY.
+ 'filter_html' => array(
+ 'status' => 1,
+ ),
+ )
);
$filtered_html_format = (object) $filtered_html_format;
filter_format_save($filtered_html_format);
@@ -79,4 +85,15 @@ class FilterSecurityTest extends WebTestBase {
$this->drupalGet('node/' . $node->nid);
$this->assertNoText($body_raw, t('Node body not found.'));
}
+
+ /**
+ * Tests that when security filters are marked to be skipped, they are still
+ * enforced anyway.
+ */
+ function testSkipSecurityFilters() {
+ $text = "Text with some disallowed tags: , , .";
+ $expected_filtered_text = "Text with some disallowed tags: , unicorn, .";
+ $this->assertEqual(check_markup($text, 'filtered_html', '', FALSE, array()), $expected_filtered_text, t('Expected filter result.'));
+ $this->assertEqual(check_markup($text, 'filtered_html', '', FALSE, array(FILTER_TYPE_SECURITY)), $expected_filtered_text, t('Expected filter result, even when trying to disable filters of the FILTER_TYPE_SECURITY type.'));
+ }
}
diff --git a/core/modules/php/php.module b/core/modules/php/php.module
index 73db6d0..8385490 100644
--- a/core/modules/php/php.module
+++ b/core/modules/php/php.module
@@ -138,6 +138,7 @@ else {
function php_filter_info() {
$filters['php_code'] = array(
'title' => t('PHP evaluator'),
+ 'type' => FILTER_TYPE_HTML_GENERATOR,
'description' => t('Executes a piece of PHP code. The usage of this filter should be restricted to administrators only!'),
'process callback' => 'php_eval',
'tips callback' => '_php_filter_tips',
diff --git a/core/modules/rdf/lib/Drupal/rdf/Tests/CommentAttributesTest.php b/core/modules/rdf/lib/Drupal/rdf/Tests/CommentAttributesTest.php
index bb712b6..b58f45a 100644
--- a/core/modules/rdf/lib/Drupal/rdf/Tests/CommentAttributesTest.php
+++ b/core/modules/rdf/lib/Drupal/rdf/Tests/CommentAttributesTest.php
@@ -180,6 +180,6 @@ class CommentAttributesTest extends CommentTestBase {
$name = empty($account["name"]) ? $this->web_user->name : $account["name"] . " (not verified)";
$this->assertEqual((string)$comment_author[0], $name, t("RDFa markup for the comment author found."));
$comment_body = $this->xpath('//div[contains(@class, "comment") and contains(@typeof, "sioct:Comment")]//div[@class="content"]//div[contains(@class, "comment-body")]//div[@property="content:encoded"]');
- $this->assertEqual((string)$comment_body[0]->p, $comment->comment, t("RDFa markup for the comment body found."));
+ $this->assertEqual((string)$comment_body[0], $comment->comment, t("RDFa markup for the comment body found."));
}
}
diff --git a/core/profiles/standard/standard.install b/core/profiles/standard/standard.install
index f3eaf29..9e4dd7d 100644
--- a/core/profiles/standard/standard.install
+++ b/core/profiles/standard/standard.install
@@ -18,21 +18,11 @@ function standard_install() {
'name' => 'Filtered HTML',
'weight' => 0,
'filters' => array(
- // URL filter.
- 'filter_url' => array(
- 'weight' => 0,
- 'status' => 1,
- ),
// HTML filter.
'filter_html' => array(
'weight' => 1,
'status' => 1,
),
- // Line break filter.
- 'filter_autop' => array(
- 'weight' => 2,
- 'status' => 1,
- ),
// HTML corrector filter.
'filter_htmlcorrector' => array(
'weight' => 10,
@@ -48,16 +38,6 @@ function standard_install() {
'name' => 'Full HTML',
'weight' => 1,
'filters' => array(
- // URL filter.
- 'filter_url' => array(
- 'weight' => 0,
- 'status' => 1,
- ),
- // Line break filter.
- 'filter_autop' => array(
- 'weight' => 1,
- 'status' => 1,
- ),
// HTML corrector filter.
'filter_htmlcorrector' => array(
'weight' => 10,