-
-
-
";
print_r(orig($settings));
print_r(changed($settings));
$start = microtime(TRUE);
for ($i = 0; $i < 10000; $i++) {
orig($settings);
}
$end = microtime(TRUE);
print($end - $start);
print("\n");
$start = microtime(TRUE);
for ($i = 0; $i < 10000; $i++) {
changed($settings);
}
$end = microtime(TRUE);
print($end - $start);
print("\n");
function orig($settings) {
$restrictions = ['allowed' => []];
// Parse the allowed HTML setting, and gradually make the whitelist more
// specific.
$matches = [];
preg_match_all('(<[^>]+>)', $settings['allowed_html'], $matches);
$allowed_tags = $matches[0];
foreach ($allowed_tags as $allowed_tag) {
$node = Html_load($allowed_tag)->getElementsByTagName('body')->item(0)->firstChild;
// First, mark the tag as allowed, but with no attributes allowed.
$tag = $node->tagName;
$restrictions['allowed'][$tag] = FALSE;
// Second, get the attributes, and if any exist, prepare for allowing
// specific attributes.
$attributes = $node->attributes;
if ($attributes->length) {
$restrictions['allowed'][$tag] = [];
}
// Third, iterate over the attributes, and mark them as allowed.
foreach ($node->attributes as $name => $attribute) {
$restrictions['allowed'][$tag][$name] = TRUE;
// Fourth, if the attribute value is not the empty string, this means an
// actual attribute value is assigned, mark each of the specified
// attribute values as allowed.
if ($attribute->value !== '') {
$restrictions['allowed'][$tag][$name] = array_filter(explode(' ', $attribute->value));
}
}
}
return $restrictions;
}
function changed($settings) {
$restrictions = ['allowed' => []];
// Parse the allowed HTML setting, and gradually make the whitelist more
// specific.
$matches = [];
$html = strtr($settings['allowed_html'], ['>' => ' />']);
foreach (Html_load($html)->getElementsByTagName('body')->item(0)->childNodes as $node) {
if ($node->nodeType !== XML_ELEMENT_NODE) {
// Skip the empty text nodes inside tags
continue;
}
// First, mark the tag as allowed, but with no attributes allowed.
$tag = $node->tagName;
$restrictions['allowed'][$tag] = FALSE;
// Second, get the attributes, and if any exist, prepare for allowing
// specific attributes.
$attributes = $node->attributes;
if ($attributes->length) {
$restrictions['allowed'][$tag] = [];
}
// Third, iterate over the attributes, and mark them as allowed.
foreach ($node->attributes as $name => $attribute) {
$restrictions['allowed'][$tag][$name] = TRUE;
// Fourth, if the attribute value is not the empty string, this means an
// actual attribute value is assigned, mark each of the specified
// attribute values as allowed.
if ($attribute->value !== '') {
$restrictions['allowed'][$tag][$name] = array_filter(explode(' ', $attribute->value));
}
}
}
return $restrictions;
}
function Html_load($html) {
$document = <<
!html
EOD;
// PHP's \DOMDocument serialization adds extra whitespace when the markup
// of the wrapping document contains newlines, so ensure we remove all
// newlines before injecting the actual HTML body to be processed.
$document = strtr($document, array("\n" => '', '!html' => $html));
$dom = new \DOMDocument();
// Ignore warnings during HTML soup loading.
@$dom->loadHTML($document);
return $dom;
}