diff --git a/core/modules/system/tests/modules/form_test/src/Form/FormTestCheckboxesZeroForm.php b/core/modules/system/tests/modules/form_test/src/Form/FormTestCheckboxesZeroForm.php index ccd752f..e7d0aa6 100644 --- a/core/modules/system/tests/modules/form_test/src/Form/FormTestCheckboxesZeroForm.php +++ b/core/modules/system/tests/modules/form_test/src/Form/FormTestCheckboxesZeroForm.php @@ -56,7 +56,7 @@ public function buildForm(array $form, FormStateInterface $form_state, $json = T * {@inheritdoc} */ public function submitForm(array &$form, FormStateInterface $form_state) { - if ($form_state->has('json')) { + if ($form_state->get('json')) { $form_state->setResponse(new JsonResponse($form_state->getValues())); } else { diff --git a/core/lib/Drupal/Core/Entity/Entity.php b/core/lib/Drupal/Core/Entity/Entity.php index 24a0ebf..50fc7c3 100644 --- a/core/lib/Drupal/Core/Entity/Entity.php +++ b/core/lib/Drupal/Core/Entity/Entity.php @@ -147,7 +147,7 @@ public function label() { $label = call_user_func($label_callback, $this); } elseif (($label_key = $entity_type->getKey('label')) && isset($this->{$label_key})) { - $label = $this->{$label_key}; + $label = (string) $this->{$label_key}; } return $label; } diff --git a/core/modules/views/src/Tests/Plugin/StyleOpmlTest.php b/core/modules/views/src/Tests/Plugin/StyleOpmlTest.php index 6580c0c..df008c5 100644 --- a/core/modules/views/src/Tests/Plugin/StyleOpmlTest.php +++ b/core/modules/views/src/Tests/Plugin/StyleOpmlTest.php @@ -60,7 +60,7 @@ public function testOpmlOutput() { $outline = $this->xpath('//outline[1]'); $this->assertEqual($outline[0]['type'], 'rss', 'The correct type attribute is used for rss OPML.'); $this->assertEqual($outline[0]['text'], $feed->label(), 'The correct text attribute is used for rss OPML.'); - $this->assertEqual($outline[0]['xmlurl'], $feed->getUrl(), 'The correct xmlUrl attribute is used for rss OPML.'); + $this->assertEqual($outline[0]['xmlUrl'], $feed->getUrl(), 'The correct xmlUrl attribute is used for rss OPML.'); $view = $this->container->get('entity.manager') ->getStorage('view') diff --git a/core/modules/aggregator/src/Tests/AggregatorRenderingTest.php b/core/modules/aggregator/src/Tests/AggregatorRenderingTest.php index 6e42fa8..5189573 100644 --- a/core/modules/aggregator/src/Tests/AggregatorRenderingTest.php +++ b/core/modules/aggregator/src/Tests/AggregatorRenderingTest.php @@ -129,7 +129,7 @@ public function testFeedPage() { $outline = $this->xpath('//outline[1]'); $this->assertEqual($outline[0]['type'], 'rss', 'The correct type attribute is used for rss OPML.'); $this->assertEqual($outline[0]['text'], $feed->label(), 'The correct text attribute is used for rss OPML.'); - $this->assertEqual($outline[0]['xmlurl'], $feed->getUrl(), 'The correct xmlUrl attribute is used for rss OPML.'); + $this->assertEqual($outline[0]['xmlUrl'], $feed->getUrl(), 'The correct xmlUrl attribute is used for rss OPML.'); // Check for the presence of a pager. $this->drupalGet('aggregator/sources/' . $feed->id()); diff --git a/core/modules/comment/src/Tests/Views/RowRssTest.php b/core/modules/comment/src/Tests/Views/RowRssTest.php index 89b9c1b..4536652 100644 --- a/core/modules/comment/src/Tests/Views/RowRssTest.php +++ b/core/modules/comment/src/Tests/Views/RowRssTest.php @@ -31,7 +31,7 @@ public function testRssRow() { $result = $this->xpath('//item'); $this->assertEqual(count($result), 1, 'Just one comment was found in the rss output.'); - $this->assertEqual($result[0]->pubdate, gmdate('r', $this->comment->getCreatedTime()), 'The right pubDate appears in the rss output.'); + $this->assertEqual($result[0]->pubDate, gmdate('r', $this->comment->getCreatedTime()), 'The right pubDate appears in the rss output.'); } } diff --git a/core/modules/editor/editor.admin.inc b/core/modules/editor/editor.admin.inc index 83bc2cf..67ebedf 100644 --- a/core/modules/editor/editor.admin.inc +++ b/core/modules/editor/editor.admin.inc @@ -92,8 +92,8 @@ function editor_image_upload_settings_form(Editor $editor) { $form['max_dimensions'] = array( '#type' => 'item', '#title' => t('Maximum dimensions'), - '#field_prefix' => '
', - '#field_suffix' => '
', + // '#field_prefix' => '
', + // '#field_suffix' => '
', '#description' => t('Images larger than these dimensions will be scaled down.'), '#states' => $show_if_image_uploads_enabled, ); diff --git a/core/modules/image/src/Plugin/Field/FieldType/ImageItem.php b/core/modules/image/src/Plugin/Field/FieldType/ImageItem.php index 4e49b81..0e5ee51 100644 --- a/core/modules/image/src/Plugin/Field/FieldType/ImageItem.php +++ b/core/modules/image/src/Plugin/Field/FieldType/ImageItem.php @@ -206,8 +206,8 @@ public function fieldSettingsForm(array $form, FormStateInterface $form_state) { '#title' => t('Maximum image resolution'), '#element_validate' => array(array(get_class($this), 'validateResolution')), '#weight' => 4.1, - '#field_prefix' => '
', - '#field_suffix' => '
', + // '#field_prefix' => '
', + // '#field_suffix' => '
', '#description' => t('The maximum allowed image size expressed as WIDTH×HEIGHT (e.g. 640×480). Leave blank for no restriction. If a larger image is uploaded, it will be resized to reflect the given width and height. Resizing images on upload will cause the loss of EXIF data in the image.', array('@url' => 'http://en.wikipedia.org/wiki/Exchangeable_image_file_format')), ); $element['max_resolution']['x'] = array( @@ -233,8 +233,8 @@ public function fieldSettingsForm(array $form, FormStateInterface $form_state) { '#title' => t('Minimum image resolution'), '#element_validate' => array(array(get_class($this), 'validateResolution')), '#weight' => 4.2, - '#field_prefix' => '
', - '#field_suffix' => '
', + // '#field_prefix' => '
', + // '#field_suffix' => '
', '#description' => t('The minimum allowed image size expressed as WIDTH×HEIGHT (e.g. 640×480). Leave blank for no restriction. If a smaller image is uploaded, it will be rejected.'), ); $element['min_resolution']['x'] = array( diff --git a/core/modules/config/src/Tests/ConfigEntityListTest.php b/core/modules/config/src/Tests/ConfigEntityListTest.php index c578ff2..8f90ff9 100644 --- a/core/modules/config/src/Tests/ConfigEntityListTest.php +++ b/core/modules/config/src/Tests/ConfigEntityListTest.php @@ -174,7 +174,7 @@ function testListUI() { // operations list. $this->assertIdentical((string) $elements[0], 'Default'); $this->assertIdentical((string) $elements[1], 'dotted.default'); - $this->assertTrue($elements[2]->children()->xpath('//ul'), 'Operations list found.'); + $this->assertTrue($elements[2]->children()->xpath($this->localizeXpath('//ul')), 'Operations list found.'); // Add a new entity using the operations link. $this->assertLink('Add test configuration'); diff --git a/core/modules/system/src/Tests/Common/RenderTest.php b/core/modules/system/src/Tests/Common/RenderTest.php index b137d19..010c296 100644 --- a/core/modules/system/src/Tests/Common/RenderTest.php +++ b/core/modules/system/src/Tests/Common/RenderTest.php @@ -652,9 +652,9 @@ function testDrupalRenderChildrenPostRenderCache() { $dom = Html::load($cached_element['#markup']); $xpath = new \DOMXPath($dom); - $parent = $xpath->query('//details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Parent"]')->length; - $child = $xpath->query('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Child"]')->length; - $subchild = $xpath->query('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/div [@class="details-wrapper" and text()="Subchild"]')->length; + $parent = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Parent"]'))->length; + $child = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Child"]'))->length; + $subchild = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper" and text()="Subchild"]'))->length; $this->assertTrue($parent && $child && $subchild, 'The correct data is cached: the stored #markup is not affected by #post_render_cache callbacks.'); // Remove markup because it's compared above in the xpath. @@ -719,9 +719,9 @@ function testDrupalRenderChildrenPostRenderCache() { $dom = Html::load($cached_parent_element['#markup']); $xpath = new \DOMXPath($dom); - $parent = $xpath->query('//details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Parent"]')->length; - $child = $xpath->query('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Child"]')->length; - $subchild = $xpath->query('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/div [@class="details-wrapper" and text()="Subchild"]')->length; + $parent = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Parent"]'))->length; + $child = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Child"]'))->length; + $subchild = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper"]/details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper" and text()="Subchild"]'))->length; $this->assertTrue($parent && $child && $subchild, 'The correct data is cached for the parent: the stored #markup is not affected by #post_render_cache callbacks.'); // Remove markup because it's compared above in the xpath. @@ -745,8 +745,8 @@ function testDrupalRenderChildrenPostRenderCache() { $dom = Html::load($cached_child_element['#markup']); $xpath = new \DOMXPath($dom); - $child = $xpath->query('//details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Child"]')->length; - $subchild = $xpath->query('//details[@class="form-wrapper" and @open="open"]/div [@class="details-wrapper" and text()="Subchild"]')->length; + $child = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/summary[@role="button" and @aria-expanded and text()="Child"]'))->length; + $subchild = $xpath->query($this->localizeXpath('//details[@class="form-wrapper" and @open="open"]/div[@class="details-wrapper" and text()="Subchild"]'))->length; $this->assertTrue($child && $subchild, 'The correct data is cached for the child: the stored #markup is not affected by #post_render_cache callbacks.'); // Remove markup because it's compared above in the xpath. diff --git a/core/modules/simpletest/src/WebTestBase.php b/core/modules/simpletest/src/WebTestBase.php index e27b690..9e638cf 100644 --- a/core/modules/simpletest/src/WebTestBase.php +++ b/core/modules/simpletest/src/WebTestBase.php @@ -28,6 +28,7 @@ use Drupal\Core\Datetime\DrupalDateTime; use Drupal\block\Entity\Block; use Drupal\Core\Url; +use Masterminds\HTML5; use Symfony\Component\HttpFoundation\Request; use Drupal\user\Entity\Role; @@ -1862,8 +1863,8 @@ protected function drupalProcessAjaxResponse($content, array $ajax_response, arr ); // DOM can load HTML soup. But, HTML soup can throw warnings, suppress // them. - $dom = new \DOMDocument(); - @$dom->loadHTML($content); + $html5 = new HTML5(); + $dom = $html5->loadHTML($content); // XPath allows for finding wrapper nodes better than DOM does. $xpath = new \DOMXPath($dom); foreach ($ajax_response as $command) { @@ -1888,12 +1889,11 @@ protected function drupalProcessAjaxResponse($content, array $ajax_response, arr // and 'body', since these are used by // \Drupal\Core\Ajax\AjaxResponse::ajaxRender(). elseif (in_array($command['selector'], array('head', 'body'))) { - $wrapperNode = $xpath->query('//' . $command['selector'])->item(0); + $wrapperNode = $xpath->query($this->localizeXpath('//' . $command['selector']))->item(0); } if ($wrapperNode) { // ajax.js adds an enclosing DIV to work around a Safari bug. - $newDom = new \DOMDocument(); - @$newDom->loadHTML('
' . $command['data'] . '
'); + $newDom = $html5->loadHTML('
' . trim($command['data']) . '
'); $newNode = $dom->importNode($newDom->documentElement->firstChild->firstChild, TRUE); $method = isset($command['method']) ? $command['method'] : $ajax_settings['method']; // The "method" is a jQuery DOM manipulation function. Emulate @@ -1943,14 +1943,14 @@ protected function drupalProcessAjaxResponse($content, array $ajax_response, arr case 'add_css': break; case 'update_build_id': - $buildId = $xpath->query('//input[@name="form_build_id" and @value="' . $command['old'] . '"]')->item(0); + $buildId = $xpath->query($this->localizeXpath('//input[@name="form_build_id" and @value="' . $command['old'] . '"]'))->item(0); if ($buildId) { $buildId->setAttribute('value', $command['new']); } break; } } - $content = $dom->saveHTML(); + $content = $html5->saveHTML($dom); $this->setRawContent($content); $this->setDrupalSettings($drupal_settings); } @@ -2067,11 +2067,11 @@ protected function cronRun() { */ protected function checkForMetaRefresh() { if (strpos($this->getRawContent(), 'parse()) { - $refresh = $this->xpath('//meta[@http-equiv="Refresh"]'); - if (!empty($refresh)) { + preg_match('||', $this->getRawContent(), $matches); + if ($matches) { // Parse the content attribute of the meta tag for the format: // "[delay]: URL=[page_to_redirect_to]". - if (preg_match('/\d+;\s*URL=(?.*)/i', $refresh[0]['content'], $match)) { + if (preg_match('/\d+;\s*URL=(?.*)/i', $matches[1], $match)) { return $this->drupalGet($this->getAbsoluteUrl(String::decodeEntities($match['url']))); } } @@ -2129,7 +2129,7 @@ protected function drupalHead($path, array $options = array(), array $headers = */ protected function handleForm(&$post, &$edit, &$upload, $submit, $form) { // Retrieve the form elements. - $elements = $form->xpath('.//input[not(@disabled)]|.//textarea[not(@disabled)]|.//select[not(@disabled)]'); + $elements = $form->xpath($this->localizeXpath('.//input[not(@disabled)]|.//textarea[not(@disabled)]|.//select[not(@disabled)]')); $submit_matches = FALSE; foreach ($elements as $element) { // SimpleXML objects need string casting all the time. diff --git a/core/lib/Drupal/Component/XpathHelper/Lexer.php b/core/lib/Drupal/Component/XpathHelper/Lexer.php new file mode 100644 index 0000000..8876aac --- /dev/null +++ b/core/lib/Drupal/Component/XpathHelper/Lexer.php @@ -0,0 +1,240 @@ + TRUE, + ']' => TRUE, + '=' => TRUE, + '(' => TRUE, + ')' => TRUE, + '.' => TRUE, + '<' => TRUE, + '>' => TRUE, + '*' => TRUE, + '+' => TRUE, + // Used in element names and functions. It's easier to just make a special + // case in the parser than to have the minus be a word boundary. + // '-' => TRUE, + '!' => TRUE, + '|' => TRUE, + ',' => TRUE, + ' ' => TRUE, + '"' => TRUE, + "'" => TRUE, + ':' => TRUE, + '::' => TRUE, + '/' => TRUE, + '//' => TRUE, + '@' => TRUE, + ); + + /** + * Lexes an XPath expression. + * + * @param string $expression + * An XPath expression. + * + * @return array + * A list of tokens from the XPath expression. + */ + public function lex($expression) { + $this->expression = $expression; + $this->length = strlen($expression); + $this->cursor = 0; + + $tokens = array(); + while (TRUE) { + $token = $this->readToken(); + if ($token === '') { + break; + } + $tokens[] = $token; + } + + return $tokens; + } + + /** + * Determines if a token is boundary for a word. + * + * @param string $token + * The token. + * + * @return bool + * Returns true if the token is a word boundary, and false if not. + */ + public static function isWordBoundary($token) { + return isset(static::$wordBoundaries[$token]); + } + + /** + * Reads the next token from the expression. + * + * @return string + * The next token, or an empty string on completion. + */ + protected function readToken() { + while ($this->cursor < $this->length) { + $char = $this->expression[$this->cursor]; + + if ($char === '/') { + return $this->readOneOrTwoSlashes($char); + } + + if ($char === '"' || $char === "'") { + return $this->consumeQuotes($char); + } + + if ($char === ':') { + return $this->readNamespaceOrAxis(); + } + + if ($char === '@') { + return $this->readAttribute(); + } + + if ($this->isWordBoundary($char)) { + $this->cursor++; + return $char; + } + + return $this->readWord(); + } + + return ''; + } + + /** + * Reads the next word from the expression. + * + * A word is considered anything that isn't a word boundary. + * + * @return string + * The next word. + */ + protected function readWord() { + $word = ''; + + while ($this->cursor < $this->length) { + $char = $this->expression[$this->cursor]; + + // Found a boundary. + if ($this->isWordBoundary($char)) { + break; + } + + $word .= $char; + $this->cursor++; + } + + return $word; + } + + /** + * Reads a quoted string from an XPath expression. + * + * @param string $start_quote + * The character that started the quoted string. + * + * @return string + * The quoted string. + */ + protected function consumeQuotes($start_quote) { + $output = $start_quote; + do { + $next_char = $this->readNextChar(); + $output .= $next_char; + } while ($next_char !== '' && $next_char !== $start_quote); + + $this->cursor++; + return $output; + } + + /** + * Reads a namespace token or an axis token. + * + * @return string + * Either a namespace separator or an axis separator. One or two colons. + */ + protected function readNamespaceOrAxis() { + if ($this->readNextChar() === ':') { + $this->cursor++; + return '::'; + } + return ':'; + } + + /** + * Reads on or two slashes. + * + * @return string + * Returns / or //. + */ + protected function readOneOrTwoSlashes() { + if ($this->readNextChar() === '/') { + $this->cursor++; + return '//'; + } + return '/'; + } + + /** + * Reads a shorthand attribute. + * + * @return string + * An attribute string starting with @. + */ + protected function readAttribute() { + $this->cursor++; + return '@' . $this->readWord(); + } + + /** + * Returns the next character advancing the cursor. + * + * @return string + * The next character. + */ + protected function readNextChar() { + $this->cursor++; + return isset($this->expression[$this->cursor]) ? $this->expression[$this->cursor] : ''; + } + +} diff --git a/core/lib/Drupal/Component/XpathHelper/Namespacer.php b/core/lib/Drupal/Component/XpathHelper/Namespacer.php new file mode 100644 index 0000000..a5a2af7 --- /dev/null +++ b/core/lib/Drupal/Component/XpathHelper/Namespacer.php @@ -0,0 +1,351 @@ + TRUE, + 'and' => TRUE, + 'div' => TRUE, + 'mod' => TRUE, + ); + + /** + * Tokens that come before elements. + * + * @var array + */ + protected static $precedesElement = array( + '/' => TRUE, + '//' => TRUE, + '::' => TRUE, + '(' => TRUE, + ',' => TRUE, + '[' => TRUE, + ); + + /** + * Prefixes an XPath expression. + * + * Converts an expression from //div/a to //x:div/x:a. + * + * @param string $xpath + * The XPath expression to prefix. + * @param string $prefix + * (optional) The prefix to use. Defaults to "x". + * + * @return string + * The prefixed XPath expression. + */ + public static function prefix($xpath, $prefix = 'x') { + if (!isset(static::$cache[$prefix][$xpath])) { + $parser = new static($xpath, $prefix, new Lexer()); + static::$cache[$prefix][$xpath] = $parser->parse(); + } + + return static::$cache[$prefix][$xpath]; + } + + /** + * Localizes an XPath expression. + * + * Converts an expression from //div/a to + * //*[local-name() = "div"]/*[local-name() = "a"]. + * + * @param string $xpath + * The XPath expression to prefix. + * + * @return string + * The localized XPath expression. + */ + public static function localize($xpath) { + return static::prefix($xpath, NULL); + } + + /** + * Constructs a Namespacer object. + * + * @param string $expression + * The XPath expression. + * @param string $prefix + * The prefix to use. + * @param \Drupal\Component\XpathHelper\Lexer $lexer + * The lexer that will produce tokens. + */ + public function __construct($expression, $prefix, Lexer $lexer) { + $this->prefix = $prefix; + $this->tokens = $lexer->lex($expression); + } + + /** + * Parses an XPath expression. + * + * @return string + * The rewritten XPath expression. + */ + public function parse() { + $output = ''; + + $token_count = count($this->tokens); + + for ( ; $this->cursor < $token_count; $this->cursor++) { + $token = $this->tokens[$this->cursor]; + + // A token that should be copied directly to the output. + if ($this->shouldCopy($token)) { + $output .= $token; + } + // A namespaced element. + elseif ($element = $this->getNamespacedElement($token)) { + $output .= $element; + } + // Namespace the element. + else { + $output .= $this->rewrite($token); + } + } + + return $output; + } + + /** + * Rewrites the token. + * + * Either in the form prefix:element or *[local-name() = "element"] + * + * @param string $token + * The element to rewrite. + * + * @return string + * The rewritten string. + */ + protected function rewrite($element) { + if ($this->prefix) { + return $this->prefix . ':' . $element; + } + return '*[local-name() = "' . $element . '"]'; + } + + /** + * Determines if a token should be copied as-is to the output. + * + * @param string $token + * The token. + * + * @return bool + * Returns true if the token should be copied, and false if not. + */ + protected function shouldCopy($token) { + if (Lexer::isWordBoundary($token)) { + return TRUE; + } + // Attribute or quoted string. + elseif ($token[0] === '@' || $token[0] === '"' || $token[0] === "'") { + return TRUE; + } + elseif (is_numeric($token) || is_numeric($token[0])) { + return TRUE; + } + elseif ($this->isFunctionCall()) { + return TRUE; + } + elseif ($this->isOperator($token)) { + return TRUE; + } + elseif ($this->isAxis()) { + return TRUE; + } + elseif ($this->wasAttributeAxis()) { + return TRUE; + } + // Handles the edge case where subtraction is written like 2 - 1. + elseif ($token === '-') { + return TRUE; + } + + return FALSE; + } + + /** + * Returns the namespaced element. + * + * @param string $token + * The token. + * + * @return string|bool + * The namespaced element, or false if it doesn't exist. + */ + protected function getNamespacedElement($token) { + if ($this->peek(1) !== ':') { + return FALSE; + } + + // Build the namespaced element, prefix:element. + $token .= ':' . $this->peek(2); + $this->cursor += 2; + return $token; + } + + /** + * Determines if the current token is a function call. + * + * @param string $token + * The token. + * + * @return bool + * Returns true if the token is a function call and false if not. + */ + protected function isFunctionCall() { + // Spaces before the opening parens of a function call are valid. + // Ex: //div[contains (@id, "thing")] + return $this->nextNonSpace() === '('; + } + + /** + * Checks if a token is an operator, one of div, or, and, mod. + * + * @param string $token + * The token to check. + * + * @return bool + * Returns true if the token is an operator, and false if not. + */ + protected function isOperator($token) { + if (!isset(static::$operators[$token])) { + return FALSE; + } + + $prev = $this->prevNonSpace(); + return $prev && !isset(static::$precedesElement[$prev]); + } + + /** + * Determines whether this token is an axis. + * + * descendant-or-self, attribute, etc. + * + * @return bool + * True if the token is an axis, false if not. + */ + protected function isAxis() { + return $this->nextNonSpace() === '::'; + } + + /** + * Determines whether the preceding token was an attribute axis. + * + * attribute:: + * + * @return bool + * True if the preceding token was an attribute axis, false if not. + */ + protected function wasAttributeAxis() { + return $this->prevNonSpace() === '::' && $this->prevNonSpace(2) === 'attribute'; + } + + /** + * Returns the next non-space token. + * + * @param int $delta + * (optional) The delta of the next non-space character. Defaults to 1. + * + * @return string + * The nth next non-space character. + */ + protected function nextNonSpace($delta = 1) { + $n = 1; + + for ($i = 0; $i < $delta; $i++) { + do { + $next = $this->peek($n); + $n++; + } while ($next === ' '); + } + + return $next; + } + + /** + * Returns the previous non-space token. + * + * @param int $delta + * (optional) The delta of the previous non-space character. Defaults to 1. + * + * @return string + * The nth previous non-space character. + */ + protected function prevNonSpace($delta = 1) { + $n = -1; + + for ($i = 0; $i < $delta; $i++) { + do { + $prev = $this->peek($n); + $n--; + } while ($prev === ' '); + } + + return $prev; + } + + /** + * Returns a token from an offset of the current position. + * + * @param int $offset + * The offset from the current position. + * + * @return string + * Returns the token at the given offset. + */ + protected function peek($offset) { + return isset($this->tokens[$this->cursor + $offset]) ? $this->tokens[$this->cursor + $offset] : ''; + } + +} diff --git a/core/modules/simpletest/src/AssertContentTrait.php b/core/modules/simpletest/src/AssertContentTrait.php index e114ebf..942b3e1 100644 --- a/core/modules/simpletest/src/AssertContentTrait.php +++ b/core/modules/simpletest/src/AssertContentTrait.php @@ -10,6 +10,8 @@ use Drupal\Component\Serialization\Json; use Drupal\Component\Utility\String; use Drupal\Component\Utility\Xss; +use Drupal\Component\XpathHelper\Namespacer; +use Masterminds\HTML5; use Symfony\Component\CssSelector\CssSelector; /** @@ -122,15 +124,22 @@ protected function setDrupalSettings($settings) { */ protected function parse() { if (!isset($this->elements)) { - // DOM can load HTML soup. But, HTML soup can throw warnings, suppress - // them. - $html_dom = new \DOMDocument(); - @$html_dom->loadHTML('' . $this->getRawContent()); - if ($html_dom) { - $this->pass(String::format('Valid HTML found on "@path"', array('@path' => $this->getUrl())), 'Browser'); + + // Check for XML preamble. + if (substr($this->getRawContent(), 0, 5) === 'loadXML($this->getRawContent()); + } + else { + $html5 = new HTML5(); + $dom = $html5->loadHTML($this->getRawContent()); + } + + if ($dom) { + $this->pass(String::format('Valid markup found on "@path"', array('@path' => $this->getUrl())), 'Browser'); // It's much easier to work with simplexml than DOM, luckily enough // we can just simply import our DOM tree. - $this->elements = simplexml_import_dom($html_dom); + $this->elements = simplexml_import_dom($dom); } } if ($this->elements === FALSE) { @@ -219,6 +228,14 @@ protected function buildXPathQuery($xpath, array $args = array()) { protected function xpath($xpath, array $arguments = array()) { if ($this->parse()) { $xpath = $this->buildXPathQuery($xpath, $arguments); + + // Register the default namespace if it exists. + $namespaces = $this->elements->getDocNamespaces(); + if (!empty($namespaces[''])) { + $xpath = $this->prefixXpath($xpath); + $this->elements->registerXPathNamespace('x', $namespaces['']); + } + $result = $this->elements->xpath($xpath); // Some combinations of PHP / libxml versions return an empty array // instead of the documented FALSE. Forcefully convert any falsish values @@ -231,6 +248,32 @@ protected function xpath($xpath, array $arguments = array()) { } /** + * Prefixes an xpath expression. + * + * @param string $xpath + * The xpath expression. + * + * @return string + * The prefixed xpath expression. + */ + protected function prefixXpath($xpath) { + return Namespacer::prefix($xpath); + } + + /** + * Localizes an xpath expression. + * + * @param string $xpath + * The xpath expression. + * + * @return string + * The localized xpath expression. + */ + protected function localizeXpath($xpath) { + return Namespacer::localize($xpath); + } + + /** * Searches elements using a CSS selector in the raw content. * * The search is relative to the root element (HTML tag normally) of the page. diff --git a/core/tests/Drupal/Tests/Component/XpathHelper/LexerTest.php b/core/tests/Drupal/Tests/Component/XpathHelper/LexerTest.php new file mode 100755 index 0000000..4c4ca78 --- /dev/null +++ b/core/tests/Drupal/Tests/Component/XpathHelper/LexerTest.php @@ -0,0 +1,53 @@ +assertSame($expected, $lexer->lex($input)); + } + + /** + * Data provider for testLex(). + * + * @return array + * - An xpath argument to lex(). + * - Expected output from lex(). + */ + public function providerLex() { + return [ + ['cat', ['cat']], + ['/cow/barn', ['/', 'cow', '/', 'barn']], + ['""', ['""']], + ['/cow/barn[@id = "asdfsaf"]', ['/', 'cow', '/', 'barn', '[', '@id', ' ', '=', ' ', '"asdfsaf"', ']']], + ['/cow/barn[@id=chair]', ['/', 'cow', '/', 'barn', '[', '@id', '=', 'chair', ']']], + ['/cow:asdf', ['/', 'cow', ':', 'asdf']], + ['@cow', ['@cow']], + ['starts-with(@id, "cat")', ['starts-with', '(', '@id' , ',', ' ', '"cat"', ')']], + ['starts-with(cat/dog/fire:breather, "cat")', ['starts-with', '(', 'cat', '/', 'dog', '/', 'fire' , ':', 'breather', ',', ' ', '"cat"', ')']], + ['child::book', ['child', '::', 'book']], + ["//a[@href='javascript:void(0)']", ['//', 'a', '[', '@href', '=', "'javascript:void(0)'", ']']], + ['1+1', ['1', '+', '1']], + ['//a[@id="id"and 1]', ['//', 'a', '[', '@id', '=', '"id"', 'and', ' ', '1', ']']], + ['0', ['0']], + ]; + } + +} diff --git a/core/tests/Drupal/Tests/Component/XpathHelper/NamespacerTest.php b/core/tests/Drupal/Tests/Component/XpathHelper/NamespacerTest.php new file mode 100755 index 0000000..fa02cc8 --- /dev/null +++ b/core/tests/Drupal/Tests/Component/XpathHelper/NamespacerTest.php @@ -0,0 +1,310 @@ +assertSame($expected, Namespacer::localize($xpath)); + } + + /** + * @covers ::prefix + * @dataProvider providerPrefixDefaultPrefix + */ + public function testPrefixDefaultPrefix($xpath, $expected) { + $this->assertSame($expected, Namespacer::prefix($xpath)); + } + + /** + * Data provider for testPrefixDefaultPrefix(). + * + * Gives us data which we can use to test prefix()'s default prefix. + * + * @return array + * - An xpath in need of a prefix. + * - The expected xpath with the default prefix added. + */ + public function providerPrefixDefaultPrefix() { + $tests = [ + ['cow', 'x:cow'], + ['/cow/barn', '/x:cow/x:barn'], + ['/cow/barn[@id = "asdfsaf"]', '/x:cow/x:barn[@id = "asdfsaf"]'], + ['/cow/barn [@id = "asdfsaf"]', '/x:cow/x:barn [@id = "asdfsaf"]'], + ['/cow/barn[@id=chair]', '/x:cow/x:barn[@id=x:chair]'], + ['/cow:asdf', '/cow:asdf'], + ['@cow', '@cow'], + ['starts-with(@id, "cat")', 'starts-with(@id, "cat")'], + ['starts-with(cat/dog/fire:breather, "cat")', 'starts-with(x:cat/x:dog/fire:breather, "cat")'], + ['//state[@id = ../city[name="CityName"]/state_id]/name', '//x:state[@id = ../x:city[x:name="CityName"]/x:state_id]/x:name'], + ['attribute::lang', 'attribute::lang'], + ['attribute:: lang', 'attribute:: lang'], + ['attribute ::lang', 'attribute ::lang'], + ['attribute :: lang', 'attribute :: lang'], + ['child::book', 'child::x:book'], + ['child :: book', 'child :: x:book'], + ['child::*', 'child::*'], + ['child:: *', 'child:: *'], + ['child ::*', 'child ::*'], + ['child :: *', 'child :: *'], + ['child::text()', 'child::text()'], + ['child::text ()', 'child::text ()'], + ['ancestor-or-self::book', 'ancestor-or-self::x:book'], + ['child::*/child::price', 'child::*/child::x:price'], + ["/asdfasfd[@id = 'a' or @id='b']", "/x:asdfasfd[@id = 'a' or @id='b']"], + ["id('yui-gen2')/x:div[3]/x:div/x:a[1]", "id('yui-gen2')/x:div[3]/x:div/x:a[1]"], + ["/descendant::a[@class='buttonCheckout']", "/descendant::x:a[@class='buttonCheckout']"], + ["//a[@href='javascript:void(0)']", "//x:a[@href='javascript:void(0)']"], + ['//*/@attribute', '//*/@attribute'], + ['/descendant::*[attribute::attribute]', '/descendant::*[attribute::attribute]'], + ['//Event[not(System/Level = preceding::Level) or not(System/Task = preceding::Task)]', '//x:Event[not(x:System/x:Level = preceding::x:Level) or not(x:System/x:Task = preceding::x:Task)]'], + ["section[@type='cover']/line/@page", "x:section[@type='cover']/x:line/@page"], + ['/articles/article/*[name()="title" or name()="short"]', '/x:articles/x:article/*[name()="title" or name()="short"]'], + ["/*/article[@id='2']/*[self::title or self::short]", "/*/x:article[@id='2']/*[self::x:title or self::x:short]"], + ['not(/asdfasfd/asdfasf//asdfasdf) | /asdfasf/sadfasf/@asdf', 'not(/x:asdfasfd/x:asdfasf//x:asdfasdf) | /x:asdfasf/x:sadfasf/@asdf'], + ['Ülküdak', 'x:Ülküdak'], + ['//textarea[@name="style[type]"]|//input[@name="style[type]"]|//select[@name="style[type]"]', '//x:textarea[@name="style[type]"]|//x:input[@name="style[type]"]|//x:select[@name="style[type]"]'], + ['//a[@id="id"and 1]', '//x:a[@id="id"and 1]'], + ['//*[@id and@class]', '//*[@id and@class]'], + ['/or', '/x:or'], + ['//and', '//x:and'], + ['div', 'x:div'], + ['a-1', 'x:a-1'], + ['//element [contains(@id, "1234")and contains(@id, 345)]', '//x:element [contains(@id, "1234")and contains(@id, 345)]'], + ['following-sibling::div', 'following-sibling::x:div'], + ['// div / a / @href', '// x:div / x:a / @href'], + ['a[contains(div, div)', 'x:a[contains(x:div, x:div)'], + ]; + + // Math related. + foreach (['+', '-', '*', '=', '!=', '<', '>', '<=', '>='] as $op) { + $tests[] = ["1{$op}2", "1{$op}2"]; + $tests[] = ["1 {$op}2", "1 {$op}2"]; + $tests[] = ["1{$op} 2", "1{$op} 2"]; + $tests[] = ["1 {$op} 2", "1 {$op} 2"]; + } + + foreach (['and', 'or', 'mod', 'div'] as $op) { + $tests[] = ["1{$op} 2", "1{$op} 2"]; + $tests[] = ["1 {$op} 2", "1 {$op} 2"]; + } + + return $tests; + } + + /** + * Data provider for testParse(). + * + * @return array + * - Expected parsed text. + * - Array of tokens to parse. + * - Return value for mocked shouldCopy(). + * - Return value for mocked getNamespacedElement(). FALSE means return + * false, while any other value means return the string 'namespaced'. + */ + public function providerParse() { + return [ + ['', [], FALSE, FALSE], + ['', [], TRUE, TRUE], + ['token', ['token'], TRUE, TRUE], + ['tokentoken', ['token', 'token'], TRUE, TRUE], + ['namespaced', ['token'], FALSE, TRUE], + ['namespacednamespaced', ['token', 'token'], FALSE, TRUE], + ['rewritten', ['token'], FALSE, FALSE], + ['rewrittenrewritten', ['token', 'token'], FALSE, FALSE], + ]; + } + + /** + * @covers ::parse + * @dataProvider providerParse + */ + public function testParse($expected, $token_array, $should_copy, $get_namespaced_element) { + // Create a mocked Namespacer object. + $mock_namespacer = $this->getMockBuilder('\Drupal\Component\XpathHelper\Namespacer') + ->disableOriginalConstructor() + ->setMethods(array('shouldCopy', 'getNamespacedElement', 'rewrite')) + ->getMock(); + + // Set expectations for shouldCopy(). It gets called on every token in the + // array. + $mock_namespacer->expects($this->exactly(count($token_array))) + ->method('shouldCopy') + ->willReturn($should_copy); + + // Set expectations for getNamespacedElement(). It's called for any token + // in the array that shouldn't copy. We'll can just use $shouldCopy + // to turn expectations on or off. + $get_namespaced_element_count = 0; + if (!$should_copy) { + $get_namespaced_element_count = count($token_array); + } + // If getNamespacedElement() returns FALSE, we'll call rewrite(), so we have + // to manage that. + $get_namespaced_element_value = $get_namespaced_element; + if ($get_namespaced_element) { + $get_namespaced_element_value = 'namespaced'; + } + // Finally assemble all this in the method. + $mock_namespacer->expects($this->exactly($get_namespaced_element_count)) + ->method('getNamespacedElement') + ->willReturn($get_namespaced_element_value); + + // Set expectations for rewrite(). + $rewrite_count = 0; + if (!$should_copy && !$get_namespaced_element) { + $rewrite_count = count($token_array); + } + $mock_namespacer->expects($this->exactly($rewrite_count)) + ->method('rewrite') + ->willReturn('rewritten'); + + // Set $tokens. $tokens is protected so we must use reflection. + $ref_tokens = new \ReflectionProperty($mock_namespacer, 'tokens'); + $ref_tokens->setAccessible(TRUE); + $ref_tokens->setValue($mock_namespacer, $token_array); + + // Set $cursor so our parse always starts from the beginning. + $ref_cursor = new \ReflectionProperty($mock_namespacer, 'cursor'); + $ref_cursor->setAccessible(TRUE); + $ref_cursor->setValue($mock_namespacer, 0); + + // Exercise parse(). + $this->assertEquals($expected, $mock_namespacer->parse()); + } + + /** + * Data provider for testShouldCopy(). + * + * The expectation of how many times a method will be called is encoded. + * Positive numbers are how many time the method will be called and will + * return TRUE. Negative numbers are the number of times the method will be + * callled and return FALSE. + * + * @return array + * - Expected bool result. + * - Token to test against. + * - (optional) Integer expectation of how many times isFunctionCall() will + * be called. + * - (optional) Integer expectation of how many times isOperator() will be + * called. + * - (optional) Integer expectation of how many times isAxis() will be + * called. + * - (optional) Integer expectation of how many times wasAttributeAxis() + * will be called. + */ + public function providerShouldCopy() { + return [ + // Values for Lexer::isWordBoundary(). + [TRUE, '['], + [TRUE, '['], + [TRUE, ']'], + [TRUE, '='], + [TRUE, '('], + [TRUE, ')'], + [TRUE, '.'], + [TRUE, '<'], + [TRUE, '>'], + [TRUE, '*'], + [TRUE, '+'], + [TRUE, '!'], + [TRUE, '|'], + [TRUE, ','], + [TRUE, ' '], + [TRUE, '"'], + [TRUE, "'"], + [TRUE, ':'], + [TRUE, '::'], + [TRUE, '/'], + [TRUE, '//'], + [TRUE, '@'], + [TRUE, '@attribute'], + [TRUE, '"quoted"'], + [TRUE, "'quoted'"], + // Numeric. + [TRUE, '5'], + [TRUE, '23skidoo'], + // These hit our various method calls. They are named after the methods + // they hit, which happen to also be tokens that would return FALSE from + // shouldCopy(). + [TRUE, 'isFunctionCall', 1], + [TRUE, 'isOperator', -1, 1], + [TRUE, 'isAxis', -1, -1, 1], + [TRUE, 'wasAttributeAxis', -1, -1, -1, 1], + // Special case for minus. + [TRUE, '-', -1, -1, -1, -1], + [FALSE, 'a token which should not be copied', -1, -1, -1, -1], + ]; + } + + /** + * @covers ::shouldCopy + * @dataProvider providerShouldCopy + */ + public function testShouldCopy($expected, $token, $is_function_call = 0, $is_operator = 0, $is_axis = 0, $was_attribute_axis = 0) { + // Create a mocked Namespacer object. + $mock_namespacer = $this->getMockBuilder('\Drupal\Component\XpathHelper\Namespacer') + ->disableOriginalConstructor() + ->setMethods(['isFunctionCall', 'isOperator', 'isAxis', 'wasAttributeAxis']) + ->getMock(); + + // Set expectations for the dependency methods. The various parameters map + // out our expectations. The expectation of how many times a method will be + // called is encoded. Positive numbers are how many time the method will be + // called and will return TRUE. Negative numbers are the number of times the + // method will be callled and return FALSE. + $method_expectations_array = [ + 'isFunctionCall' => $is_function_call, + 'isOperator' => $is_operator, + 'isAxis' => $is_axis, + 'wasAttributeAxis' => $was_attribute_axis, + ]; + foreach ($method_expectations_array as $method => $expectation) { + $count = abs($expectation); + // Positive expectations return TRUE, negative expectations return FALSE. + $value = $expectation > 0; + $mock_namespacer->expects($this->exactly($count)) + ->method($method) + ->willReturn($value); + } + + // Since shouldCopy() is protected, we have to un-protect it. + $ref_should_copy = new \ReflectionMethod($mock_namespacer, 'shouldCopy'); + $ref_should_copy->setAccessible(TRUE); + + // Finally exercise shouldCopy(). + $this->assertSame( + $expected, + $ref_should_copy->invoke($mock_namespacer, $token) + ); + } + +} diff --git a/core/composer.json b/core/composer.json index 74e9bc9..1b76e06 100644 --- a/core/composer.json +++ b/core/composer.json @@ -27,7 +27,8 @@ "zendframework/zend-feed": "2.3.*", "mikey179/vfsStream": "1.*", "stack/builder": "1.0.*", - "egulias/email-validator": "1.2.*" + "egulias/email-validator": "1.2.*", + "masterminds/html5": "~2.1" }, "autoload": { "psr-4": { diff --git a/core/composer.lock b/core/composer.lock index 9c24eb3..e90a7d2 100644 --- a/core/composer.lock +++ b/core/composer.lock @@ -4,7 +4,7 @@ "Read more about it at http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "c977649e8e1a8b93301fa83283672a06", + "hash": "18d8754c94d9038119bc5a03bcb83478", "packages": [ { "name": "doctrine/annotations", @@ -729,6 +729,71 @@ "time": "2014-10-12 19:18:40" }, { + "name": "masterminds/html5", + "version": "2.1.0", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "a10f8d392e1aad0b500f7b440c8f0d3bc9189704" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/a10f8d392e1aad0b500f7b440c8f0d3bc9189704", + "reference": "a10f8d392e1aad0b500f7b440c8f0d3bc9189704", + "shasum": "" + }, + "require": { + "ext-libxml": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "4.*", + "sami/sami": "~2.0", + "satooshi/php-coveralls": "0.6.*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.1-dev" + } + }, + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ], + "time": "2015-02-09 16:26:00" + }, + { "name": "mikey179/vfsStream", "version": "v1.4.0", "source": { diff --git a/core/vendor/composer/ClassLoader.php b/core/vendor/composer/ClassLoader.php index 70d78bc..5e1469e 100644 --- a/core/vendor/composer/ClassLoader.php +++ b/core/vendor/composer/ClassLoader.php @@ -54,6 +54,8 @@ class ClassLoader private $useIncludePath = false; private $classMap = array(); + private $classMapAuthoritative = false; + public function getPrefixes() { if (!empty($this->prefixesPsr0)) { @@ -249,6 +251,27 @@ public function getUseIncludePath() } /** + * Turns off searching the prefix and fallback directories for classes + * that have not been registered with the class map. + * + * @param bool $classMapAuthoritative + */ + public function setClassMapAuthoritative($classMapAuthoritative) + { + $this->classMapAuthoritative = $classMapAuthoritative; + } + + /** + * Should class lookup fail if not found in the current class map? + * + * @return bool + */ + public function isClassMapAuthoritative() + { + return $this->classMapAuthoritative; + } + + /** * Registers this instance as an autoloader. * * @param bool $prepend Whether to prepend the autoloader or not @@ -299,6 +322,9 @@ public function findFile($class) if (isset($this->classMap[$class])) { return $this->classMap[$class]; } + if ($this->classMapAuthoritative) { + return false; + } $file = $this->findFileWithExtension($class, '.php'); diff --git a/core/vendor/composer/autoload_psr4.php b/core/vendor/composer/autoload_psr4.php index 8b6150b..a75b107 100644 --- a/core/vendor/composer/autoload_psr4.php +++ b/core/vendor/composer/autoload_psr4.php @@ -8,6 +8,7 @@ return array( 'Symfony\\Cmf\\Component\\Routing\\' => array($vendorDir . '/symfony-cmf/routing'), 'React\\Promise\\' => array($vendorDir . '/react/promise/src'), + 'Masterminds\\' => array($vendorDir . '/masterminds/html5/src'), 'GuzzleHttp\\Stream\\' => array($vendorDir . '/guzzlehttp/streams/src'), 'GuzzleHttp\\Ring\\' => array($vendorDir . '/guzzlehttp/ringphp/src'), 'GuzzleHttp\\' => array($vendorDir . '/guzzlehttp/guzzle/src'), diff --git a/core/vendor/composer/installed.json b/core/vendor/composer/installed.json index 2e99ee1..685e2f1 100644 --- a/core/vendor/composer/installed.json +++ b/core/vendor/composer/installed.json @@ -2735,5 +2735,72 @@ ], "description": "Symfony Yaml Component", "homepage": "http://symfony.com" + }, + { + "name": "masterminds/html5", + "version": "2.1.0", + "version_normalized": "2.1.0.0", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "a10f8d392e1aad0b500f7b440c8f0d3bc9189704" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/a10f8d392e1aad0b500f7b440c8f0d3bc9189704", + "reference": "a10f8d392e1aad0b500f7b440c8f0d3bc9189704", + "shasum": "" + }, + "require": { + "ext-libxml": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "4.*", + "sami/sami": "~2.0", + "satooshi/php-coveralls": "0.6.*" + }, + "time": "2015-02-09 16:26:00", + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.1-dev" + } + }, + "installation-source": "dist", + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ] } ] diff --git a/core/vendor/masterminds/html5/.gitignore b/core/vendor/masterminds/html5/.gitignore new file mode 100644 index 0000000..f9ead4a --- /dev/null +++ b/core/vendor/masterminds/html5/.gitignore @@ -0,0 +1,4 @@ +vendor/ +scratch.php +composer.lock +build/ \ No newline at end of file diff --git a/core/vendor/masterminds/html5/.travis.yml b/core/vendor/masterminds/html5/.travis.yml new file mode 100644 index 0000000..5b31d71 --- /dev/null +++ b/core/vendor/masterminds/html5/.travis.yml @@ -0,0 +1,28 @@ +language: php + +# Setting sudo access to false will let Travis CI use containers rather than +# VMs to run the tests. For more details see: +# - http://docs.travis-ci.com/user/workers/container-based-infrastructure/ +# - http://docs.travis-ci.com/user/workers/standard-infrastructure/ +sudo: false + +php: + - 5.3 + - 5.4 + - 5.5 + - 5.6 + - hhvm + +notifications: + irc: "irc.freenode.net#masterminds" + +before_script: + - composer self-update + - composer install --dev + +script: + - mkdir -p build/logs + - ./vendor/bin/phpunit -c phpunit.xml.dist + +after_script: + - php vendor/bin/coveralls -v diff --git a/core/vendor/masterminds/html5/CREDITS b/core/vendor/masterminds/html5/CREDITS new file mode 100644 index 0000000..c2dbc4b --- /dev/null +++ b/core/vendor/masterminds/html5/CREDITS @@ -0,0 +1,11 @@ +Matt Butcher [technosophos] (lead) +Matt Farina [mattfarina] (lead) +Asmir Mustafic [goetas] (contributor) +Edward Z. Yang [ezyang] (contributor) +Geoffrey Sneddon [gsnedders] (contributor) +Kukhar Vasily [ngreduce] (contributor) +Rune Christensen [MrElectronic] (contributor) +Mišo Belica [miso-belica] (contributor) +Asmir Mustafic [goetas] (contributor) +KITAITI Makoto [KitaitiMakoto] (contributor) +Jacob Floyd [cognifloyd] (contributor) diff --git a/core/vendor/masterminds/html5/LICENSE.txt b/core/vendor/masterminds/html5/LICENSE.txt new file mode 100644 index 0000000..3c275b5 --- /dev/null +++ b/core/vendor/masterminds/html5/LICENSE.txt @@ -0,0 +1,66 @@ +## HTML5-PHP License + +Copyright (c) 2013 The Authors of HTML5-PHP + +Matt Butcher - mattbutcher@google.com +Matt Farina - matt@mattfarina.com +Asmir Mustafic - goetas@gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +## HTML5Lib License + +Portions of this are based on html5lib's PHP version, which was a +sub-project of html5lib. The following is the list of contributors from +html5lib: + +html5lib: + +Copyright (c) 2006-2009 The Authors + +Contributors: +James Graham - jg307@cam.ac.uk +Anne van Kesteren - annevankesteren@gmail.com +Lachlan Hunt - lachlan.hunt@lachy.id.au +Matt McDonald - kanashii@kanashii.ca +Sam Ruby - rubys@intertwingly.net +Ian Hickson (Google) - ian@hixie.ch +Thomas Broyer - t.broyer@ltgt.net +Jacques Distler - distler@golem.ph.utexas.edu +Henri Sivonen - hsivonen@iki.fi +Adam Barth - abarth@webkit.org +Eric Seidel - eric@webkit.org +The Mozilla Foundation (contributions from Henri Sivonen since 2008) +David Flanagan (Mozilla) - dflanagan@mozilla.com + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/core/vendor/masterminds/html5/README.md b/core/vendor/masterminds/html5/README.md new file mode 100644 index 0000000..1a2a96b --- /dev/null +++ b/core/vendor/masterminds/html5/README.md @@ -0,0 +1,253 @@ +# HTML5-PHP + +The need for an HTML5 parser in PHP is clear. This project initially +began with the seemingly abandoned `html5lib` project [original source](https://code.google.com/p/html5lib/source/checkout). +But after some initial refactoring work, we began a new parser. + +- An HTML5 serializer +- Support for PHP namespaces +- Composer support +- Event-based (SAX-like) parser +- DOM tree builder +- Interoperability with QueryPath [[in progress](https://github.com/technosophos/querypath/issues/114)] +- Runs on **PHP** 5.3.0 or newer and **HHVM** 3.2 or newer + +[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) [![Coverage Status](https://coveralls.io/repos/Masterminds/html5-php/badge.png?branch=master)](https://coveralls.io/r/Masterminds/html5-php?branch=master) + +## Installation + +Install HTML5-PHP using [composer](http://getcomposer.org/). + +To install, add `masterminds/html5` to your `composer.json` file: + +``` +{ + "require" : { + "masterminds/html5": "2.*" + }, +} +``` + +(You may substitute `2.*` for a more specific release tag, of +course.) + +From there, use the `composer install` or `composer update` commands to +install. + +## Basic Usage + +HTML5-PHP has a high-level API and a low-level API. + +Here is how you use the high-level `HTML5` library API: + +```php + + + TEST + + +

Hello World

+

This is a test of the HTML5 parser.

+ + +HERE; + +// Parse the document. $dom is a DOMDocument. +$html5 = new HTML5(); +$dom = $html5->loadHTML($html); + +// Render it as HTML5: +print $html5->saveHTML($dom); + +// Or save it to a file: +$html5->save($dom, 'out.html'); + +?> +``` + +The `$dom` created by the parser is a full `DOMDocument` object. And the +`save()` and `saveHTML()` methods will take any DOMDocument. + +### Options + +It is possible to pass in an array of configuration options when loading +an HTML5 document. + +```php +// An associative array of options +$options = array( + 'option_name' => 'option_value', +); + +// Provide the options to the constructor +$html5 = new HTML5($options); + +$dom = $html5->loadHTML($html); +``` + +The following options are supported: + +* `encode_entities` (boolean): Indicates that the serializer should aggressively + encode characters as entities. Without this, it only encodes the bare + minimum. +* `disable_html_ns` (boolean): Prevents the parser from automatically + assigning the HTML5 namespace to the DOM document. This is for + non-namespace aware DOM tools. +* `target_doc` (\DOMDocument): A DOM document that will be used as the + destination for the parsed nodes. +* `implicit_namespaces` (array): An assoc array of namespaces that should be + used by the parser. Name is tag prefix, value is NS URI. + +## The Low-Level API + +This library provides the following low-level APIs that you can use to +create more customized HTML5 tools: + +- An `InputStream` abstraction that can work with different kinds of +input source (not just files and strings). +- A SAX-like event-based parser that you can hook into for special kinds +of parsing. +- A flexible error-reporting mechanism that can be tuned to document +syntax checking. +- A DOM implementation that uses PHP's built-in DOM library. + +The unit tests exercise each piece of the API, and every public function +is well-documented. + +### Parser Design + +The parser is designed as follows: + +- The `InputStream` portion handles direct I/O. +- The `Scanner` handles scanning on behalf of the parser. +- The `Tokenizer` requests data off of the scanner, parses it, clasifies +it, and sends it to an `EventHandler`. It is a *recursive descent parser.* +- The `EventHandler` receives notifications and data for each specific +semantic event that occurs during tokenization. +- The `DOMBuilder` is an `EventHandler` that listens for tokenizing +events and builds a document tree (`DOMDocument`) based on the events. + +### Serializer Design + +The serializer takes a data structure (the `DOMDocument`) and transforms +it into a character representation -- an HTML5 document. + +The serializer is broken into three parts: + +- The `OutputRules` contain the rules to turn DOM elements into strings. The +rules are an implementation of the interface `RulesInterface` allowing for +different rule sets to be used. +- The `Traverser`, which is a special-purpose tree walker. It visits +each node node in the tree and uses the `OutputRules` to transform the node +into a string. +- `HTML5` manages the `Traverser` and stores the resultant data +in the correct place. + +The serializer (`save()`, `saveHTML()`) follows the +[section 8.9 of the HTML 5.0 spec](http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments). +So tags are serialized according to these rules: + +- A tag with children: <foo>CHILDREN</foo> +- A tag that cannot have content: <foo> (no closing tag) +- A tag that could have content, but doesn't: <foo></foo> + +## Known Issues (Or, Things We Designed Against the Spec) + +Please check the issue queue for a full list, but the following are +issues known issues that are not presently on the roadmap: + +- Namespaces: HTML5 only [supports a selected list of namespaces](http://www.w3.org/TR/html5/infrastructure.html#namespaces) + and they do not operate in the same way as XML namespaces. A `:` has no special + meaning. + By default the parser does not support XML style namespaces via `:`; + to enable the XML namespaces see the [XML Namespaces section](#xml-namespaces) +- Scripts: This parser does not contain a JavaScript or a CSS + interpreter. While one may be supplied, not all features will be + supported. +- Rentrance: The current parser is not re-entrant. (Thus you can't pause + the parser to modify the HTML string mid-parse.) +- Validation: The current tree builder is **not** a validating parser. + While it will correct some HTML, it does not check that the HTML + conforms to the standard. (Should you wish, you can build a validating + parser by extending DOMTree or building your own EventHandler + implementation.) + * There is limited support for insertion modes. + * Some autocorrection is done automatically. + * Per the spec, many legacy tags are admitted and correctly handled, + even though they are technically not part of HTML5. +- Attribute names and values: Due to the implementation details of the + PHP implementation of DOM, attribute names that do not follow the + XML 1.0 standard are not inserted into the DOM. (Effectively, they + are ignored.) If you've got a clever fix for this, jump in! +- Processor Instructions: The HTML5 spec does not allow processor + instructions. We do. Since this is a server-side library, we think + this is useful. And that means, dear reader, that in some cases you + can parse the HTML from a mixed PHP/HTML document. This, however, + is an incidental feature, not a core feature. +- HTML manifests: Unsupported. +- PLAINTEXT: Unsupported. +- Adoption Agency Algorithm: Not yet implemented. (8.2.5.4.7) + +##XML Namespaces + +To use XML style namespaces you have to configure well the main `HTML5` instance. + +```php +use Masterminds\HTML5; +$html = new HTML5(array( + "xmlNamespaces" => true +)); + +$dom = $html->loadHTML(''); + +$dom->documentElement->namespaceURI; // http://www.example.com + +``` + +You can also add some default prefixes that will not require the namespace declaration, +but it's elements will be namespaced. + +```php +use Masterminds\HTML5; +$html = new HTML5(array( + "implicitNamespaces"=>array( + "t"=>"http://www.example.com" + ) +)); + +$dom = $html->loadHTML(''); + +$dom->documentElement->namespaceURI; // http://www.example.com + +``` + +## Thanks to... + +The dedicated (and patient) contributors of patches small and large, +who have already made this library better.See the CREDITS file for +a list of contributors. + +We owe a huge debt of gratitude to the original authors of html5lib. + +While not much of the orignal parser remains, we learned a lot from +reading the html5lib library. And some pieces remain here. In +particular, much of the UTF-8 and Unicode handling is derived from the +html5lib project. + +## License + +This software is released under the MIT license. The original html5lib +library was also released under the MIT license. + +See LICENSE.txt + +Certain files contain copyright assertions by specific individuals +involved with html5lib. Those have been retained where appropriate. diff --git a/core/vendor/masterminds/html5/RELEASE.md b/core/vendor/masterminds/html5/RELEASE.md new file mode 100644 index 0000000..36d1630 --- /dev/null +++ b/core/vendor/masterminds/html5/RELEASE.md @@ -0,0 +1,60 @@ +# Release Notes + +2.1.0 (2015-02-01) +- #74: Added `disable_html_ns` and `target_doc` dom parsing options +- Unified option names +- #73: Fixed alphabet, ß now can be detected +- #75 and #76: Allow whitespace in RCDATA tags +- #77: Fixed parsing blunder for json embeds +- #72: Add options to HTML methods + +2.0.2 (2014-12-17) +- #50: empty document handling +- #63: tags with strange capitalization +- #65: dashes and underscores as allowed characters in tag names +- #68: Fixed issue with non-inline elements inside inline containers + +2.0.1 (2014-09-23) +- #59: Fixed issue parsing some fragments. +- #56: Incorrectly saw 0 as empty string +- Sami as new documentation generator + +2.0.0 (2014-07-28) +- #53: Improved boolean attributes handling +- #52: Facebook HHVM compatibility +- #48: Adopted PSR-2 as coding standard +- #47: Moved everything to Masterminds namespace +- #45: Added custom namespaces +- #44: Added support to XML-style namespaces +- #37: Refactored HTML5 class removing static methods + +1.0.5 (2014-06-10) +- #38: Set the dev-master branch as the 1.0.x branch for composer (goetas) +- #34: Tests use PSR-4 for autoloading. (goetas) +- #40, #41: Fix entity handling in RCDATA sections. (KitaitiMakoto) +- #32: Fixed issue where wharacter references were being incorrectly encoded in style tags. + +1.0.4 (2014-04-29) +- #30/#31 Don't throw an exception for invalid tag names. + +1.0.3 (2014-02-28) +- #23 and #29: Ignore attributes with illegal chars in name for the PHP DOM. + +1.0.2 (2014-02-12) +- #23: Handle missing tag close in attribute list. +- #25: Fixed text escaping in the serializer (HTML% 8.3). +- #27: Fixed tests on Windows: changed "\n" -> PHP_EOL. +- #28: Fixed infinite loop for char "&" in unquoted attribute in parser. +- #26: Updated tag name case handling to deal with uppercase usage. +- #24: Newlines and tabs are allowed inside quoted attributes (HTML5 8.2.4). +- Fixed Travis CI testing. + +1.0.1 (2013-11-07) +- CDATA encoding is improved. (Non-standard; Issue #19) +- Some parser rules were not returning the new current element. (Issue #20) +- Added, to the README, details on code test coverage and to packagist version. +- Fixed processor instructions. +- Improved test coverage and documentation coverage. + +1.0.0 (2013-10-02) +- Initial release. diff --git a/core/vendor/masterminds/html5/UPGRADING.md b/core/vendor/masterminds/html5/UPGRADING.md new file mode 100644 index 0000000..76e3a19 --- /dev/null +++ b/core/vendor/masterminds/html5/UPGRADING.md @@ -0,0 +1,21 @@ +From 1.x to 2.x +================= + +- All classes uses `Masterminds` namespace. +- All public static methods has been removed from `HTML5` class and the general API to access the HTML5 functionalities has changed. + + Before: + + $dom = \HTML5::loadHTML('....'); + \HTML5::saveHTML($dom); + + After: + + use Masterminds\HTML5; + + $html5 = new HTML5(); + + $dom = $html5->loadHTML('....'); + echo $html5->saveHTML($dom); + + diff --git a/core/vendor/masterminds/html5/bin/entities.php b/core/vendor/masterminds/html5/bin/entities.php new file mode 100644 index 0000000..0fbd6ec --- /dev/null +++ b/core/vendor/masterminds/html5/bin/entities.php @@ -0,0 +1,26 @@ + $obj) { + $sname = substr($name, 1, -1); + $table[$sname] = $obj->characters; +} + +print '=5.3.0" + }, + "require-dev": { + "satooshi/php-coveralls": "0.6.*", + "phpunit/phpunit" : "4.*", + "sami/sami": "~2.0" + }, + "autoload": { + "psr-4": {"Masterminds\\": "src"} + }, + "autoload-dev": { + "psr-4": {"Masterminds\\HTML5\\Tests\\": "test/HTML5"} + }, + "extra": { + "branch-alias": { + "dev-master": "2.1-dev" + } + } +} diff --git a/core/vendor/masterminds/html5/example.php b/core/vendor/masterminds/html5/example.php new file mode 100644 index 0000000..5e3b25f --- /dev/null +++ b/core/vendor/masterminds/html5/example.php @@ -0,0 +1,33 @@ + + + TEST + + + + +
+

Hello World

This is a test of the HTML5 parser.

+
+ & Nobody nowhere. +
+ TEST + + © + +HERE; + +$html5 = new HTML5(); +$dom = $html5->loadHTML($html); + +print "Converting to HTML 5\n"; + +$html5->save($dom, fopen("php://stdin", 'w')); diff --git a/core/vendor/masterminds/html5/phpunit.xml.dist b/core/vendor/masterminds/html5/phpunit.xml.dist new file mode 100644 index 0000000..c344880 --- /dev/null +++ b/core/vendor/masterminds/html5/phpunit.xml.dist @@ -0,0 +1,30 @@ + + + + + test/HTML5/ + + + + + systemlib.phpreflection_hni + src/HTML5/Parser/InputStream.php + src/HTML5/Serializer/RulesInterface.php + src/HTML5/Entities.php + src/HTML5/Serializer/HTML5Entities.php + + + + + + + diff --git a/core/vendor/masterminds/html5/sami.php b/core/vendor/masterminds/html5/sami.php new file mode 100644 index 0000000..d0dd80b --- /dev/null +++ b/core/vendor/masterminds/html5/sami.php @@ -0,0 +1,10 @@ + 'HTML5-PHP API', + 'build_dir' => __DIR__.'/build/apidoc', + 'cache_dir' => __DIR__.'/build/sami-cache', + 'default_opened_level' => 1, +)); \ No newline at end of file diff --git a/core/vendor/masterminds/html5/src/HTML5.php b/core/vendor/masterminds/html5/src/HTML5.php new file mode 100644 index 0000000..1c46c2b --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5.php @@ -0,0 +1,250 @@ + false + ); + + protected $errors = array(); + + public function __construct(array $options = array()) + { + $this->options = array_merge($this->options, $options); + } + + /** + * Get the default options. + * + * @return array The default options. + */ + public function getOptions() + { + return $this->options; + } + + /** + * Load and parse an HTML file. + * + * This will apply the HTML5 parser, which is tolerant of many + * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML + * 3. Note that in these cases, not all of the old data will be + * preserved. For example, XHTML's XML declaration will be removed. + * + * The rules governing parsing are set out in the HTML 5 spec. + * + * @param string $file + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first + * byte of input. + * @param array $options + * Configuration options when parsing the HTML + * @return \DOMDocument A DOM document. These object type is defined by the libxml + * library, and should have been included with your version of PHP. + */ + public function load($file, array $options = array()) + { + // Handle the case where file is a resource. + if (is_resource($file)) { + // FIXME: We need a StreamInputStream class. + return $this->loadHTML(stream_get_contents($file), $options); + } + + $input = new FileInputStream($file); + + return $this->parse($input, $options); + } + + /** + * Parse a HTML Document from a string. + * + * Take a string of HTML 5 (or earlier) and parse it into a + * DOMDocument. + * + * @param string $string + * A html5 document as a string. + * @param array $options + * Configuration options when parsing the HTML + * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with + * almost all distribtions of PHP. + */ + public function loadHTML($string, array $options = array()) + { + $input = new StringInputStream($string); + + return $this->parse($input, $options); + } + + /** + * Convenience function to load an HTML file. + * + * This is here to provide backwards compatibility with the + * PHP DOM implementation. It simply calls load(). + * + * @param string $file + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first + * byte of input. + * @param array $options + * Configuration options when parsing the HTML + * + * @return \DOMDocument A DOM document. These object type is defined by the libxml + * library, and should have been included with your version of PHP. + */ + public function loadHTMLFile($file, array $options = array()) + { + return $this->load($file, $options); + } + + /** + * Parse a HTML fragment from a string. + * + * @param string $string + * The html5 fragment as a string. + * @param array $options + * Configuration options when parsing the HTML + * + * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with + * almost all distributions of PHP. + */ + public function loadHTMLFragment($string, array $options = array()) + { + $input = new StringInputStream($string); + + return $this->parseFragment($input, $options); + } + + /** + * Return all errors encountered into parsing phase + * + * @return array + */ + public function getErrors() + { + return $this->errors; + } + + /** + * Return true it some errors were encountered into parsing phase + * + * @return bool + */ + public function hasErrors() + { + return count($this->errors) > 0; + } + + /** + * Parse an input stream. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ + public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) + { + $this->errors = array(); + $events = new DOMTreeBuilder(false, array_merge($this->getOptions(), $options)); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events); + + $parser->parse(); + $this->errors = $events->getErrors(); + + return $events->document(); + } + + /** + * Parse an input stream where the stream is a fragment. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ + public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) + { + $events = new DOMTreeBuilder(true, array_merge($this->getOptions(), $options)); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events); + + $parser->parse(); + $this->errors = $events->getErrors(); + + return $events->fragment(); + } + + /** + * Save a DOM into a given file as HTML5. + * + * @param mixed $dom + * The DOM to be serialized. + * @param string $file + * The filename to be written. + * @param array $options + * Configuration options when serializing the DOM. These include: + * - encode_entities: Text written to the output is escaped by default and not all + * entities are encoded. If this is set to true all entities will be encoded. + * Defaults to false. + */ + public function save($dom, $file, $options = array()) + { + $close = true; + if (is_resource($file)) { + $stream = $file; + $close = false; + } else { + $stream = fopen($file, 'w'); + } + $options = array_merge($this->getOptions(), $options); + $rules = new OutputRules($stream, $options); + $trav = new Traverser($dom, $stream, $rules, $options); + + $trav->walk(); + + if ($close) { + fclose($stream); + } + } + + /** + * Convert a DOM into an HTML5 string. + * + * @param mixed $dom + * The DOM to be serialized. + * @param array $options + * Configuration options when serializing the DOM. These include: + * - encode_entities: Text written to the output is escaped by default and not all + * entities are encoded. If this is set to true all entities will be encoded. + * Defaults to false. + * + * @return string A HTML5 documented generated from the DOM. + */ + public function saveHTML($dom, $options = array()) + { + $stream = fopen('php://temp', 'w'); + $this->save($dom, $stream, array_merge($this->getOptions(), $options)); + + return stream_get_contents($stream, - 1, 0); + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Elements.php b/core/vendor/masterminds/html5/src/HTML5/Elements.php new file mode 100644 index 0000000..6cf72aa --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Elements.php @@ -0,0 +1,634 @@ + 1, + "abbr" => 1, + "address" => 89, // NORMAL | VOID_TAG | AUTOCLOSE_P | BLOCK_TAG + "area" => 9, // NORMAL | VOID_TAG + "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "audio" => 65, // NORMAL | BLOCK_TAG + "b" => 1, + "base" => 9, // NORMAL | VOID_TAG + "bdi" => 1, + "bdo" => 1, + "blockquote" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "body" => 1, + "br" => 9, // NORMAL | VOID_TAG + "button" => 1, + "canvas" => 65, // NORMAL | BLOCK_TAG + "caption" => 1, + "cite" => 1, + "code" => 1, + "col" => 9, // NORMAL | VOID_TAG + "colgroup" => 1, + "command" => 9, // NORMAL | VOID_TAG + // "data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data + "datalist" => 1, + "dd" => 65, // NORMAL | BLOCK_TAG + "del" => 1, + "details" => 17, // NORMAL | AUTOCLOSE_P, + "dfn" => 1, + "dialog" => 17, // NORMAL | AUTOCLOSE_P, + "div" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "dl" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "dt" => 1, + "em" => 1, + "embed" => 9, // NORMAL | VOID_TAG + "fieldset" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "figcaption" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "figure" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "footer" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "form" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "h1" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "h2" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "h3" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "h4" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "h5" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "h6" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "head" => 1, + "header" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "hgroup" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "hr" => 73, // NORMAL | VOID_TAG + "html" => 1, + "i" => 1, + "iframe" => 3, // NORMAL | TEXT_RAW + "img" => 9, // NORMAL | VOID_TAG + "input" => 9, // NORMAL | VOID_TAG + "kbd" => 1, + "ins" => 1, + "keygen" => 9, // NORMAL | VOID_TAG + "label" => 1, + "legend" => 1, + "li" => 1, + "link" => 9, // NORMAL | VOID_TAG + "map" => 1, + "mark" => 1, + "menu" => 17, // NORMAL | AUTOCLOSE_P, + "meta" => 9, // NORMAL | VOID_TAG + "meter" => 1, + "nav" => 17, // NORMAL | AUTOCLOSE_P, + "noscript" => 67, // NORMAL | TEXT_RAW | BLOCK_TAG + "object" => 1, + "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "optgroup" => 1, + "option" => 1, + "output" => 65, // NORMAL | BLOCK_TAG + "p" => 209, // NORMAL | AUTOCLOSE_P | BLOCK_TAG | BLOCK_ONLY_INLINE + "param" => 9, // NORMAL | VOID_TAG + "pre" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "progress" => 1, + "q" => 1, + "rp" => 1, + "rt" => 1, + "ruby" => 1, + "s" => 1, + "samp" => 1, + "script" => 3, // NORMAL | TEXT_RAW + "section" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "select" => 1, + "small" => 1, + "source" => 9, // NORMAL | VOID_TAG + "span" => 1, + "strong" => 1, + "style" => 3, // NORMAL | TEXT_RAW + "sub" => 1, + "summary" => 17, // NORMAL | AUTOCLOSE_P, + "sup" => 1, + "table" => 65, // NORMAL | BLOCK_TAG + "tbody" => 1, + "td" => 1, + "textarea" => 5, // NORMAL | TEXT_RCDATA + "tfoot" => 65, // NORMAL | BLOCK_TAG + "th" => 1, + "thead" => 1, + "time" => 1, + "title" => 5, // NORMAL | TEXT_RCDATA + "tr" => 1, + "track" => 9, // NORMAL | VOID_TAG + "u" => 1, + "ul" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG + "var" => 1, + "video" => 65, // NORMAL | BLOCK_TAG + "wbr" => 9, // NORMAL | VOID_TAG + + // Legacy? + 'basefont' => 8, // VOID_TAG + 'bgsound' => 8, // VOID_TAG + 'noframes' => 2, // RAW_TEXT + 'frame' => 9, // NORMAL | VOID_TAG + 'frameset' => 1, + 'center' => 16, + 'dir' => 16, + 'listing' => 16, // AUTOCLOSE_P + 'plaintext' => 48, // AUTOCLOSE_P | TEXT_PLAINTEXT + 'applet' => 0, + 'marquee' => 0, + 'isindex' => 8, // VOID_TAG + 'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT + 'noembed' => 2 // RAW_TEXT + ); + + /** + * The MathML elements. + * See http://www.w3.org/wiki/MathML/Elements. + * + * In our case we are only concerned with presentation MathML and not content + * MathML. There is a nice list of this subset at https://developer.mozilla.org/en-US/docs/MathML/Element. + * + * @var array + */ + public static $mathml = array( + "maction" => 1, + "maligngroup" => 1, + "malignmark" => 1, + "math" => 1, + "menclose" => 1, + "merror" => 1, + "mfenced" => 1, + "mfrac" => 1, + "mglyph" => 1, + "mi" => 1, + "mlabeledtr" => 1, + "mlongdiv" => 1, + "mmultiscripts" => 1, + "mn" => 1, + "mo" => 1, + "mover" => 1, + "mpadded" => 1, + "mphantom" => 1, + "mroot" => 1, + "mrow" => 1, + "ms" => 1, + "mscarries" => 1, + "mscarry" => 1, + "msgroup" => 1, + "msline" => 1, + "mspace" => 1, + "msqrt" => 1, + "msrow" => 1, + "mstack" => 1, + "mstyle" => 1, + "msub" => 1, + "msup" => 1, + "msubsup" => 1, + "mtable" => 1, + "mtd" => 1, + "mtext" => 1, + "mtr" => 1, + "munder" => 1, + "munderover" => 1 + ); + + /** + * The svg elements. + * + * The Mozilla documentation has a good list at https://developer.mozilla.org/en-US/docs/SVG/Element. + * The w3c list appears to be lacking in some areas like filter effect elements. + * That list can be found at http://www.w3.org/wiki/SVG/Elements. + * + * Note, FireFox appears to do a better job rendering filter effects than chrome. + * While they are in the spec I'm not sure how widely implemented they are. + * + * @var array + */ + public static $svg = array( + "a" => 1, + "altGlyph" => 1, + "altGlyphDef" => 1, + "altGlyphItem" => 1, + "animate" => 1, + "animateColor" => 1, + "animateMotion" => 1, + "animateTransform" => 1, + "circle" => 1, + "clipPath" => 1, + "color-profile" => 1, + "cursor" => 1, + "defs" => 1, + "desc" => 1, + "ellipse" => 1, + "feBlend" => 1, + "feColorMatrix" => 1, + "feComponentTransfer" => 1, + "feComposite" => 1, + "feConvolveMatrix" => 1, + "feDiffuseLighting" => 1, + "feDisplacementMap" => 1, + "feDistantLight" => 1, + "feFlood" => 1, + "feFuncA" => 1, + "feFuncB" => 1, + "feFuncG" => 1, + "feFuncR" => 1, + "feGaussianBlur" => 1, + "feImage" => 1, + "feMerge" => 1, + "feMergeNode" => 1, + "feMorphology" => 1, + "feOffset" => 1, + "fePointLight" => 1, + "feSpecularLighting" => 1, + "feSpotLight" => 1, + "feTile" => 1, + "feTurbulence" => 1, + "filter" => 1, + "font" => 1, + "font-face" => 1, + "font-face-format" => 1, + "font-face-name" => 1, + "font-face-src" => 1, + "font-face-uri" => 1, + "foreignObject" => 1, + "g" => 1, + "glyph" => 1, + "glyphRef" => 1, + "hkern" => 1, + "image" => 1, + "line" => 1, + "linearGradient" => 1, + "marker" => 1, + "mask" => 1, + "metadata" => 1, + "missing-glyph" => 1, + "mpath" => 1, + "path" => 1, + "pattern" => 1, + "polygon" => 1, + "polyline" => 1, + "radialGradient" => 1, + "rect" => 1, + "script" => 3, // NORMAL | RAW_TEXT + "set" => 1, + "stop" => 1, + "style" => 3, // NORMAL | RAW_TEXT + "svg" => 1, + "switch" => 1, + "symbol" => 1, + "text" => 1, + "textPath" => 1, + "title" => 1, + "tref" => 1, + "tspan" => 1, + "use" => 1, + "view" => 1, + "vkern" => 1 + ); + + /** + * Some attributes in SVG are case sensetitive. + * + * This map contains key/value pairs with the key as the lowercase attribute + * name and the value with the correct casing. + */ + public static $svgCaseSensitiveAttributeMap = array( + 'attributename' => 'attributeName', + 'attributetype' => 'attributeType', + 'basefrequency' => 'baseFrequency', + 'baseprofile' => 'baseProfile', + 'calcmode' => 'calcMode', + 'clippathunits' => 'clipPathUnits', + 'contentscripttype' => 'contentScriptType', + 'contentstyletype' => 'contentStyleType', + 'diffuseconstant' => 'diffuseConstant', + 'edgemode' => 'edgeMode', + 'externalresourcesrequired' => 'externalResourcesRequired', + 'filterres' => 'filterRes', + 'filterunits' => 'filterUnits', + 'glyphref' => 'glyphRef', + 'gradienttransform' => 'gradientTransform', + 'gradientunits' => 'gradientUnits', + 'kernelmatrix' => 'kernelMatrix', + 'kernelunitlength' => 'kernelUnitLength', + 'keypoints' => 'keyPoints', + 'keysplines' => 'keySplines', + 'keytimes' => 'keyTimes', + 'lengthadjust' => 'lengthAdjust', + 'limitingconeangle' => 'limitingConeAngle', + 'markerheight' => 'markerHeight', + 'markerunits' => 'markerUnits', + 'markerwidth' => 'markerWidth', + 'maskcontentunits' => 'maskContentUnits', + 'maskunits' => 'maskUnits', + 'numoctaves' => 'numOctaves', + 'pathlength' => 'pathLength', + 'patterncontentunits' => 'patternContentUnits', + 'patterntransform' => 'patternTransform', + 'patternunits' => 'patternUnits', + 'pointsatx' => 'pointsAtX', + 'pointsaty' => 'pointsAtY', + 'pointsatz' => 'pointsAtZ', + 'preservealpha' => 'preserveAlpha', + 'preserveaspectratio' => 'preserveAspectRatio', + 'primitiveunits' => 'primitiveUnits', + 'refx' => 'refX', + 'refy' => 'refY', + 'repeatcount' => 'repeatCount', + 'repeatdur' => 'repeatDur', + 'requiredextensions' => 'requiredExtensions', + 'requiredfeatures' => 'requiredFeatures', + 'specularconstant' => 'specularConstant', + 'specularexponent' => 'specularExponent', + 'spreadmethod' => 'spreadMethod', + 'startoffset' => 'startOffset', + 'stddeviation' => 'stdDeviation', + 'stitchtiles' => 'stitchTiles', + 'surfacescale' => 'surfaceScale', + 'systemlanguage' => 'systemLanguage', + 'tablevalues' => 'tableValues', + 'targetx' => 'targetX', + 'targety' => 'targetY', + 'textlength' => 'textLength', + 'viewbox' => 'viewBox', + 'viewtarget' => 'viewTarget', + 'xchannelselector' => 'xChannelSelector', + 'ychannelselector' => 'yChannelSelector', + 'zoomandpan' => 'zoomAndPan' + ); + + /** + * Some SVG elements are case sensetitive. + * This map contains these. + * + * The map contains key/value store of the name is lowercase as the keys and + * the correct casing as the value. + */ + public static $svgCaseSensitiveElementMap = array( + 'altglyph' => 'altGlyph', + 'altglyphdef' => 'altGlyphDef', + 'altglyphitem' => 'altGlyphItem', + 'animatecolor' => 'animateColor', + 'animatemotion' => 'animateMotion', + 'animatetransform' => 'animateTransform', + 'clippath' => 'clipPath', + 'feblend' => 'feBlend', + 'fecolormatrix' => 'feColorMatrix', + 'fecomponenttransfer' => 'feComponentTransfer', + 'fecomposite' => 'feComposite', + 'feconvolvematrix' => 'feConvolveMatrix', + 'fediffuselighting' => 'feDiffuseLighting', + 'fedisplacementmap' => 'feDisplacementMap', + 'fedistantlight' => 'feDistantLight', + 'feflood' => 'feFlood', + 'fefunca' => 'feFuncA', + 'fefuncb' => 'feFuncB', + 'fefuncg' => 'feFuncG', + 'fefuncr' => 'feFuncR', + 'fegaussianblur' => 'feGaussianBlur', + 'feimage' => 'feImage', + 'femerge' => 'feMerge', + 'femergenode' => 'feMergeNode', + 'femorphology' => 'feMorphology', + 'feoffset' => 'feOffset', + 'fepointlight' => 'fePointLight', + 'fespecularlighting' => 'feSpecularLighting', + 'fespotlight' => 'feSpotLight', + 'fetile' => 'feTile', + 'feturbulence' => 'feTurbulence', + 'foreignobject' => 'foreignObject', + 'glyphref' => 'glyphRef', + 'lineargradient' => 'linearGradient', + 'radialgradient' => 'radialGradient', + 'textpath' => 'textPath' + ); + + /** + * Check whether the given element meets the given criterion. + * + * Example: + * + * Elements::isA('script', Elements::TEXT_RAW); // Returns true. + * + * Elements::isA('script', Elements::TEXT_RCDATA); // Returns false. + * + * @param string $name + * The element name. + * @param int $mask + * One of the constants on this class. + * @return boolean true if the element matches the mask, false otherwise. + */ + public static function isA($name, $mask) + { + if (! static::isElement($name)) { + return false; + } + + return (static::element($name) & $mask) == $mask; + } + + /** + * Test if an element is a valid html5 element. + * + * @param string $name + * The name of the element. + * + * @return bool True if a html5 element and false otherwise. + */ + public static function isHtml5Element($name) + { + // html5 element names are case insensetitive. Forcing lowercase for the check. + // Do we need this check or will all data passed here already be lowercase? + return isset(static::$html5[strtolower($name)]); + } + + /** + * Test if an element name is a valid MathML presentation element. + * + * @param string $name + * The name of the element. + * + * @return bool True if a MathML name and false otherwise. + */ + public static function isMathMLElement($name) + { + // MathML is case-sensetitive unlike html5 elements. + return isset(static::$mathml[$name]); + } + + /** + * Test if an element is a valid SVG element. + * + * @param string $name + * The name of the element. + * + * @return boolean True if a SVG element and false otherise. + */ + public static function isSvgElement($name) + { + // SVG is case-sensetitive unlike html5 elements. + return isset(static::$svg[$name]); + } + + /** + * Is an element name valid in an html5 document. + * + * This includes html5 elements along with other allowed embedded content + * such as svg and mathml. + * + * @param string $name + * The name of the element. + * + * @return bool True if valid and false otherwise. + */ + public static function isElement($name) + { + return static::isHtml5Element($name) || static::isMathMLElement($name) || static::isSvgElement($name); + } + + /** + * Get the element mask for the given element name. + * + * @param string $name + * The name of the element. + * + * @return int The element mask. + */ + public static function element($name) + { + if (isset(static::$html5[$name])) { + return static::$html5[$name]; + } + if (isset(static::$svg[$name])) { + return static::$svg[$name]; + } + if (isset(static::$mathml[$name])) { + return static::$mathml[$name]; + } + + return false; + } + + /** + * Normalize a SVG element name to its proper case and form. + * + * @param string $name + * The name of the element. + * + * @return string The normalized form of the element name. + */ + public static function normalizeSvgElement($name) + { + $name = strtolower($name); + if (isset(static::$svgCaseSensitiveElementMap[$name])) { + $name = static::$svgCaseSensitiveElementMap[$name]; + } + + return $name; + } + + /** + * Normalize a SVG attribute name to its proper case and form. + * + * @param string $name + * The name of the attribute. + * + * @return string The normalized form of the attribute name. + */ + public static function normalizeSvgAttribute($name) + { + $name = strtolower($name); + if (isset(static::$svgCaseSensitiveAttributeMap[$name])) { + $name = static::$svgCaseSensitiveAttributeMap[$name]; + } + + return $name; + } + + /** + * Normalize a MathML attribute name to its proper case and form. + * + * Note, all MathML element names are lowercase. + * + * @param string $name + * The name of the attribute. + * + * @return string The normalized form of the attribute name. + */ + public static function normalizeMathMlAttribute($name) + { + $name = strtolower($name); + + // Only one attribute has a mixed case form for MathML. + if ($name == 'definitionurl') { + $name = 'definitionURL'; + } + + return $name; + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Entities.php b/core/vendor/masterminds/html5/src/HTML5/Entities.php new file mode 100644 index 0000000..2e605d6 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Entities.php @@ -0,0 +1,2236 @@ + 'Á', + 'Aacut' => 'Á', + 'aacute' => 'á', + 'aacut' => 'á', + 'Abreve' => 'Ă', + 'abreve' => 'ă', + 'ac' => '∾', + 'acd' => '∿', + 'acE' => '∾̳', + 'Acirc' => 'Â', + 'Acir' => 'Â', + 'acirc' => 'â', + 'acir' => 'â', + 'acute' => '´', + 'acut' => '´', + 'Acy' => 'А', + 'acy' => 'а', + 'AElig' => 'Æ', + 'AEli' => 'Æ', + 'aelig' => 'æ', + 'aeli' => 'æ', + 'af' => '⁡', + 'Afr' => '𝔄', + 'afr' => '𝔞', + 'Agrave' => 'À', + 'Agrav' => 'À', + 'agrave' => 'à', + 'agrav' => 'à', + 'alefsym' => 'ℵ', + 'aleph' => 'ℵ', + 'Alpha' => 'Α', + 'alpha' => 'α', + 'Amacr' => 'Ā', + 'amacr' => 'ā', + 'amalg' => '⨿', + 'AMP' => '&', + 'AM' => '&', + 'amp' => '&', + 'am' => '&', + 'And' => '⩓', + 'and' => '∧', + 'andand' => '⩕', + 'andd' => '⩜', + 'andslope' => '⩘', + 'andv' => '⩚', + 'ang' => '∠', + 'ange' => '⦤', + 'angle' => '∠', + 'angmsd' => '∡', + 'angmsdaa' => '⦨', + 'angmsdab' => '⦩', + 'angmsdac' => '⦪', + 'angmsdad' => '⦫', + 'angmsdae' => '⦬', + 'angmsdaf' => '⦭', + 'angmsdag' => '⦮', + 'angmsdah' => '⦯', + 'angrt' => '∟', + 'angrtvb' => '⊾', + 'angrtvbd' => '⦝', + 'angsph' => '∢', + 'angst' => 'Å', + 'angzarr' => '⍼', + 'Aogon' => 'Ą', + 'aogon' => 'ą', + 'Aopf' => '𝔸', + 'aopf' => '𝕒', + 'ap' => '≈', + 'apacir' => '⩯', + 'apE' => '⩰', + 'ape' => '≊', + 'apid' => '≋', + 'apos' => '\'', + 'ApplyFunction' => '⁡', + 'approx' => '≈', + 'approxeq' => '≊', + 'Aring' => 'Å', + 'Arin' => 'Å', + 'aring' => 'å', + 'arin' => 'å', + 'Ascr' => '𝒜', + 'ascr' => '𝒶', + 'Assign' => '≔', + 'ast' => '*', + 'asymp' => '≈', + 'asympeq' => '≍', + 'Atilde' => 'Ã', + 'Atild' => 'Ã', + 'atilde' => 'ã', + 'atild' => 'ã', + 'Auml' => 'Ä', + 'Aum' => 'Ä', + 'auml' => 'ä', + 'aum' => 'ä', + 'awconint' => '∳', + 'awint' => '⨑', + 'backcong' => '≌', + 'backepsilon' => '϶', + 'backprime' => '‵', + 'backsim' => '∽', + 'backsimeq' => '⋍', + 'Backslash' => '∖', + 'Barv' => '⫧', + 'barvee' => '⊽', + 'Barwed' => '⌆', + 'barwed' => '⌅', + 'barwedge' => '⌅', + 'bbrk' => '⎵', + 'bbrktbrk' => '⎶', + 'bcong' => '≌', + 'Bcy' => 'Б', + 'bcy' => 'б', + 'bdquo' => '„', + 'becaus' => '∵', + 'Because' => '∵', + 'because' => '∵', + 'bemptyv' => '⦰', + 'bepsi' => '϶', + 'bernou' => 'ℬ', + 'Bernoullis' => 'ℬ', + 'Beta' => 'Β', + 'beta' => 'β', + 'beth' => 'ℶ', + 'between' => '≬', + 'Bfr' => '𝔅', + 'bfr' => '𝔟', + 'bigcap' => '⋂', + 'bigcirc' => '◯', + 'bigcup' => '⋃', + 'bigodot' => '⨀', + 'bigoplus' => '⨁', + 'bigotimes' => '⨂', + 'bigsqcup' => '⨆', + 'bigstar' => '★', + 'bigtriangledown' => '▽', + 'bigtriangleup' => '△', + 'biguplus' => '⨄', + 'bigvee' => '⋁', + 'bigwedge' => '⋀', + 'bkarow' => '⤍', + 'blacklozenge' => '⧫', + 'blacksquare' => '▪', + 'blacktriangle' => '▴', + 'blacktriangledown' => '▾', + 'blacktriangleleft' => '◂', + 'blacktriangleright' => '▸', + 'blank' => '␣', + 'blk12' => '▒', + 'blk14' => '░', + 'blk34' => '▓', + 'block' => '█', + 'bne' => '=⃥', + 'bnequiv' => '≡⃥', + 'bNot' => '⫭', + 'bnot' => '⌐', + 'Bopf' => '𝔹', + 'bopf' => '𝕓', + 'bot' => '⊥', + 'bottom' => '⊥', + 'bowtie' => '⋈', + 'boxbox' => '⧉', + 'boxDL' => '╗', + 'boxDl' => '╖', + 'boxdL' => '╕', + 'boxdl' => '┐', + 'boxDR' => '╔', + 'boxDr' => '╓', + 'boxdR' => '╒', + 'boxdr' => '┌', + 'boxH' => '═', + 'boxh' => '─', + 'boxHD' => '╦', + 'boxHd' => '╤', + 'boxhD' => '╥', + 'boxhd' => '┬', + 'boxHU' => '╩', + 'boxHu' => '╧', + 'boxhU' => '╨', + 'boxhu' => '┴', + 'boxminus' => '⊟', + 'boxplus' => '⊞', + 'boxtimes' => '⊠', + 'boxUL' => '╝', + 'boxUl' => '╜', + 'boxuL' => '╛', + 'boxul' => '┘', + 'boxUR' => '╚', + 'boxUr' => '╙', + 'boxuR' => '╘', + 'boxur' => '└', + 'boxV' => '║', + 'boxv' => '│', + 'boxVH' => '╬', + 'boxVh' => '╫', + 'boxvH' => '╪', + 'boxvh' => '┼', + 'boxVL' => '╣', + 'boxVl' => '╢', + 'boxvL' => '╡', + 'boxvl' => '┤', + 'boxVR' => '╠', + 'boxVr' => '╟', + 'boxvR' => '╞', + 'boxvr' => '├', + 'bprime' => '‵', + 'Breve' => '˘', + 'breve' => '˘', + 'brvbar' => '¦', + 'brvba' => '¦', + 'Bscr' => 'ℬ', + 'bscr' => '𝒷', + 'bsemi' => '⁏', + 'bsim' => '∽', + 'bsime' => '⋍', + 'bsol' => '\\', + 'bsolb' => '⧅', + 'bsolhsub' => '⟈', + 'bull' => '•', + 'bullet' => '•', + 'bump' => '≎', + 'bumpE' => '⪮', + 'bumpe' => '≏', + 'Bumpeq' => '≎', + 'bumpeq' => '≏', + 'Cacute' => 'Ć', + 'cacute' => 'ć', + 'Cap' => '⋒', + 'cap' => '∩', + 'capand' => '⩄', + 'capbrcup' => '⩉', + 'capcap' => '⩋', + 'capcup' => '⩇', + 'capdot' => '⩀', + 'CapitalDifferentialD' => 'ⅅ', + 'caps' => '∩︀', + 'caret' => '⁁', + 'caron' => 'ˇ', + 'Cayleys' => 'ℭ', + 'ccaps' => '⩍', + 'Ccaron' => 'Č', + 'ccaron' => 'č', + 'Ccedil' => 'Ç', + 'Ccedi' => 'Ç', + 'ccedil' => 'ç', + 'ccedi' => 'ç', + 'Ccirc' => 'Ĉ', + 'ccirc' => 'ĉ', + 'Cconint' => '∰', + 'ccups' => '⩌', + 'ccupssm' => '⩐', + 'Cdot' => 'Ċ', + 'cdot' => 'ċ', + 'cedil' => '¸', + 'cedi' => '¸', + 'Cedilla' => '¸', + 'cemptyv' => '⦲', + 'cent' => '¢', + 'cen' => '¢', + 'CenterDot' => '·', + 'centerdot' => '·', + 'Cfr' => 'ℭ', + 'cfr' => '𝔠', + 'CHcy' => 'Ч', + 'chcy' => 'ч', + 'check' => '✓', + 'checkmark' => '✓', + 'Chi' => 'Χ', + 'chi' => 'χ', + 'cir' => '○', + 'circ' => 'ˆ', + 'circeq' => '≗', + 'circlearrowleft' => '↺', + 'circlearrowright' => '↻', + 'circledast' => '⊛', + 'circledcirc' => '⊚', + 'circleddash' => '⊝', + 'CircleDot' => '⊙', + 'circledR' => '®', + 'circledS' => 'Ⓢ', + 'CircleMinus' => '⊖', + 'CirclePlus' => '⊕', + 'CircleTimes' => '⊗', + 'cirE' => '⧃', + 'cire' => '≗', + 'cirfnint' => '⨐', + 'cirmid' => '⫯', + 'cirscir' => '⧂', + 'ClockwiseContourIntegral' => '∲', + 'CloseCurlyDoubleQuote' => '”', + 'CloseCurlyQuote' => '’', + 'clubs' => '♣', + 'clubsuit' => '♣', + 'Colon' => '∷', + 'colon' => ':', + 'Colone' => '⩴', + 'colone' => '≔', + 'coloneq' => '≔', + 'comma' => ',', + 'commat' => '@', + 'comp' => '∁', + 'compfn' => '∘', + 'complement' => '∁', + 'complexes' => 'ℂ', + 'cong' => '≅', + 'congdot' => '⩭', + 'Congruent' => '≡', + 'Conint' => '∯', + 'conint' => '∮', + 'ContourIntegral' => '∮', + 'Copf' => 'ℂ', + 'copf' => '𝕔', + 'coprod' => '∐', + 'Coproduct' => '∐', + 'COPY' => '©', + 'COP' => '©', + 'copy' => '©', + 'cop' => '©', + 'copysr' => '℗', + 'CounterClockwiseContourIntegral' => '∳', + 'crarr' => '↵', + 'Cross' => '⨯', + 'cross' => '✗', + 'Cscr' => '𝒞', + 'cscr' => '𝒸', + 'csub' => '⫏', + 'csube' => '⫑', + 'csup' => '⫐', + 'csupe' => '⫒', + 'ctdot' => '⋯', + 'cudarrl' => '⤸', + 'cudarrr' => '⤵', + 'cuepr' => '⋞', + 'cuesc' => '⋟', + 'cularr' => '↶', + 'cularrp' => '⤽', + 'Cup' => '⋓', + 'cup' => '∪', + 'cupbrcap' => '⩈', + 'CupCap' => '≍', + 'cupcap' => '⩆', + 'cupcup' => '⩊', + 'cupdot' => '⊍', + 'cupor' => '⩅', + 'cups' => '∪︀', + 'curarr' => '↷', + 'curarrm' => '⤼', + 'curlyeqprec' => '⋞', + 'curlyeqsucc' => '⋟', + 'curlyvee' => '⋎', + 'curlywedge' => '⋏', + 'curren' => '¤', + 'curre' => '¤', + 'curvearrowleft' => '↶', + 'curvearrowright' => '↷', + 'cuvee' => '⋎', + 'cuwed' => '⋏', + 'cwconint' => '∲', + 'cwint' => '∱', + 'cylcty' => '⌭', + 'Dagger' => '‡', + 'dagger' => '†', + 'daleth' => 'ℸ', + 'Darr' => '↡', + 'dArr' => '⇓', + 'darr' => '↓', + 'dash' => '‐', + 'Dashv' => '⫤', + 'dashv' => '⊣', + 'dbkarow' => '⤏', + 'dblac' => '˝', + 'Dcaron' => 'Ď', + 'dcaron' => 'ď', + 'Dcy' => 'Д', + 'dcy' => 'д', + 'DD' => 'ⅅ', + 'dd' => 'ⅆ', + 'ddagger' => '‡', + 'ddarr' => '⇊', + 'DDotrahd' => '⤑', + 'ddotseq' => '⩷', + 'deg' => '°', + 'de' => '°', + 'Del' => '∇', + 'Delta' => 'Δ', + 'delta' => 'δ', + 'demptyv' => '⦱', + 'dfisht' => '⥿', + 'Dfr' => '𝔇', + 'dfr' => '𝔡', + 'dHar' => '⥥', + 'dharl' => '⇃', + 'dharr' => '⇂', + 'DiacriticalAcute' => '´', + 'DiacriticalDot' => '˙', + 'DiacriticalDoubleAcute' => '˝', + 'DiacriticalGrave' => '`', + 'DiacriticalTilde' => '˜', + 'diam' => '⋄', + 'Diamond' => '⋄', + 'diamond' => '⋄', + 'diamondsuit' => '♦', + 'diams' => '♦', + 'die' => '¨', + 'DifferentialD' => 'ⅆ', + 'digamma' => 'ϝ', + 'disin' => '⋲', + 'div' => '÷', + 'divide' => '÷', + 'divid' => '÷', + 'divideontimes' => '⋇', + 'divonx' => '⋇', + 'DJcy' => 'Ђ', + 'djcy' => 'ђ', + 'dlcorn' => '⌞', + 'dlcrop' => '⌍', + 'dollar' => '$', + 'Dopf' => '𝔻', + 'dopf' => '𝕕', + 'Dot' => '¨', + 'dot' => '˙', + 'DotDot' => '⃜', + 'doteq' => '≐', + 'doteqdot' => '≑', + 'DotEqual' => '≐', + 'dotminus' => '∸', + 'dotplus' => '∔', + 'dotsquare' => '⊡', + 'doublebarwedge' => '⌆', + 'DoubleContourIntegral' => '∯', + 'DoubleDot' => '¨', + 'DoubleDownArrow' => '⇓', + 'DoubleLeftArrow' => '⇐', + 'DoubleLeftRightArrow' => '⇔', + 'DoubleLeftTee' => '⫤', + 'DoubleLongLeftArrow' => '⟸', + 'DoubleLongLeftRightArrow' => '⟺', + 'DoubleLongRightArrow' => '⟹', + 'DoubleRightArrow' => '⇒', + 'DoubleRightTee' => '⊨', + 'DoubleUpArrow' => '⇑', + 'DoubleUpDownArrow' => '⇕', + 'DoubleVerticalBar' => '∥', + 'DownArrow' => '↓', + 'Downarrow' => '⇓', + 'downarrow' => '↓', + 'DownArrowBar' => '⤓', + 'DownArrowUpArrow' => '⇵', + 'DownBreve' => '̑', + 'downdownarrows' => '⇊', + 'downharpoonleft' => '⇃', + 'downharpoonright' => '⇂', + 'DownLeftRightVector' => '⥐', + 'DownLeftTeeVector' => '⥞', + 'DownLeftVector' => '↽', + 'DownLeftVectorBar' => '⥖', + 'DownRightTeeVector' => '⥟', + 'DownRightVector' => '⇁', + 'DownRightVectorBar' => '⥗', + 'DownTee' => '⊤', + 'DownTeeArrow' => '↧', + 'drbkarow' => '⤐', + 'drcorn' => '⌟', + 'drcrop' => '⌌', + 'Dscr' => '𝒟', + 'dscr' => '𝒹', + 'DScy' => 'Ѕ', + 'dscy' => 'ѕ', + 'dsol' => '⧶', + 'Dstrok' => 'Đ', + 'dstrok' => 'đ', + 'dtdot' => '⋱', + 'dtri' => '▿', + 'dtrif' => '▾', + 'duarr' => '⇵', + 'duhar' => '⥯', + 'dwangle' => '⦦', + 'DZcy' => 'Џ', + 'dzcy' => 'џ', + 'dzigrarr' => '⟿', + 'Eacute' => 'É', + 'Eacut' => 'É', + 'eacute' => 'é', + 'eacut' => 'é', + 'easter' => '⩮', + 'Ecaron' => 'Ě', + 'ecaron' => 'ě', + 'ecir' => 'ê', + 'Ecirc' => 'Ê', + 'Ecir' => 'Ê', + 'ecirc' => 'ê', + 'ecolon' => '≕', + 'Ecy' => 'Э', + 'ecy' => 'э', + 'eDDot' => '⩷', + 'Edot' => 'Ė', + 'eDot' => '≑', + 'edot' => 'ė', + 'ee' => 'ⅇ', + 'efDot' => '≒', + 'Efr' => '𝔈', + 'efr' => '𝔢', + 'eg' => '⪚', + 'Egrave' => 'È', + 'Egrav' => 'È', + 'egrave' => 'è', + 'egrav' => 'è', + 'egs' => '⪖', + 'egsdot' => '⪘', + 'el' => '⪙', + 'Element' => '∈', + 'elinters' => '⏧', + 'ell' => 'ℓ', + 'els' => '⪕', + 'elsdot' => '⪗', + 'Emacr' => 'Ē', + 'emacr' => 'ē', + 'empty' => '∅', + 'emptyset' => '∅', + 'EmptySmallSquare' => '◻', + 'emptyv' => '∅', + 'EmptyVerySmallSquare' => '▫', + 'emsp' => ' ', + 'emsp13' => ' ', + 'emsp14' => ' ', + 'ENG' => 'Ŋ', + 'eng' => 'ŋ', + 'ensp' => ' ', + 'Eogon' => 'Ę', + 'eogon' => 'ę', + 'Eopf' => '𝔼', + 'eopf' => '𝕖', + 'epar' => '⋕', + 'eparsl' => '⧣', + 'eplus' => '⩱', + 'epsi' => 'ε', + 'Epsilon' => 'Ε', + 'epsilon' => 'ε', + 'epsiv' => 'ϵ', + 'eqcirc' => '≖', + 'eqcolon' => '≕', + 'eqsim' => '≂', + 'eqslantgtr' => '⪖', + 'eqslantless' => '⪕', + 'Equal' => '⩵', + 'equals' => '=', + 'EqualTilde' => '≂', + 'equest' => '≟', + 'Equilibrium' => '⇌', + 'equiv' => '≡', + 'equivDD' => '⩸', + 'eqvparsl' => '⧥', + 'erarr' => '⥱', + 'erDot' => '≓', + 'Escr' => 'ℰ', + 'escr' => 'ℯ', + 'esdot' => '≐', + 'Esim' => '⩳', + 'esim' => '≂', + 'Eta' => 'Η', + 'eta' => 'η', + 'ETH' => 'Ð', + 'ET' => 'Ð', + 'eth' => 'ð', + 'et' => 'ð', + 'Euml' => 'Ë', + 'Eum' => 'Ë', + 'euml' => 'ë', + 'eum' => 'ë', + 'euro' => '€', + 'excl' => '!', + 'exist' => '∃', + 'Exists' => '∃', + 'expectation' => 'ℰ', + 'ExponentialE' => 'ⅇ', + 'exponentiale' => 'ⅇ', + 'fallingdotseq' => '≒', + 'Fcy' => 'Ф', + 'fcy' => 'ф', + 'female' => '♀', + 'ffilig' => 'ffi', + 'fflig' => 'ff', + 'ffllig' => 'ffl', + 'Ffr' => '𝔉', + 'ffr' => '𝔣', + 'filig' => 'fi', + 'FilledSmallSquare' => '◼', + 'FilledVerySmallSquare' => '▪', + 'fjlig' => 'fj', + 'flat' => '♭', + 'fllig' => 'fl', + 'fltns' => '▱', + 'fnof' => 'ƒ', + 'Fopf' => '𝔽', + 'fopf' => '𝕗', + 'ForAll' => '∀', + 'forall' => '∀', + 'fork' => '⋔', + 'forkv' => '⫙', + 'Fouriertrf' => 'ℱ', + 'fpartint' => '⨍', + 'frac12' => '½', + 'frac1' => '¼', + 'frac13' => '⅓', + 'frac14' => '¼', + 'frac15' => '⅕', + 'frac16' => '⅙', + 'frac18' => '⅛', + 'frac23' => '⅔', + 'frac25' => '⅖', + 'frac34' => '¾', + 'frac3' => '¾', + 'frac35' => '⅗', + 'frac38' => '⅜', + 'frac45' => '⅘', + 'frac56' => '⅚', + 'frac58' => '⅝', + 'frac78' => '⅞', + 'frasl' => '⁄', + 'frown' => '⌢', + 'Fscr' => 'ℱ', + 'fscr' => '𝒻', + 'gacute' => 'ǵ', + 'Gamma' => 'Γ', + 'gamma' => 'γ', + 'Gammad' => 'Ϝ', + 'gammad' => 'ϝ', + 'gap' => '⪆', + 'Gbreve' => 'Ğ', + 'gbreve' => 'ğ', + 'Gcedil' => 'Ģ', + 'Gcirc' => 'Ĝ', + 'gcirc' => 'ĝ', + 'Gcy' => 'Г', + 'gcy' => 'г', + 'Gdot' => 'Ġ', + 'gdot' => 'ġ', + 'gE' => '≧', + 'ge' => '≥', + 'gEl' => '⪌', + 'gel' => '⋛', + 'geq' => '≥', + 'geqq' => '≧', + 'geqslant' => '⩾', + 'ges' => '⩾', + 'gescc' => '⪩', + 'gesdot' => '⪀', + 'gesdoto' => '⪂', + 'gesdotol' => '⪄', + 'gesl' => '⋛︀', + 'gesles' => '⪔', + 'Gfr' => '𝔊', + 'gfr' => '𝔤', + 'Gg' => '⋙', + 'gg' => '≫', + 'ggg' => '⋙', + 'gimel' => 'ℷ', + 'GJcy' => 'Ѓ', + 'gjcy' => 'ѓ', + 'gl' => '≷', + 'gla' => '⪥', + 'glE' => '⪒', + 'glj' => '⪤', + 'gnap' => '⪊', + 'gnapprox' => '⪊', + 'gnE' => '≩', + 'gne' => '⪈', + 'gneq' => '⪈', + 'gneqq' => '≩', + 'gnsim' => '⋧', + 'Gopf' => '𝔾', + 'gopf' => '𝕘', + 'grave' => '`', + 'GreaterEqual' => '≥', + 'GreaterEqualLess' => '⋛', + 'GreaterFullEqual' => '≧', + 'GreaterGreater' => '⪢', + 'GreaterLess' => '≷', + 'GreaterSlantEqual' => '⩾', + 'GreaterTilde' => '≳', + 'Gscr' => '𝒢', + 'gscr' => 'ℊ', + 'gsim' => '≳', + 'gsime' => '⪎', + 'gsiml' => '⪐', + 'GT' => '>', + 'G' => '>', + 'Gt' => '≫', + 'gt' => '>', + 'g' => '>', + 'gtcc' => '⪧', + 'gtcir' => '⩺', + 'gtdot' => '⋗', + 'gtlPar' => '⦕', + 'gtquest' => '⩼', + 'gtrapprox' => '⪆', + 'gtrarr' => '⥸', + 'gtrdot' => '⋗', + 'gtreqless' => '⋛', + 'gtreqqless' => '⪌', + 'gtrless' => '≷', + 'gtrsim' => '≳', + 'gvertneqq' => '≩︀', + 'gvnE' => '≩︀', + 'Hacek' => 'ˇ', + 'hairsp' => ' ', + 'half' => '½', + 'hamilt' => 'ℋ', + 'HARDcy' => 'Ъ', + 'hardcy' => 'ъ', + 'hArr' => '⇔', + 'harr' => '↔', + 'harrcir' => '⥈', + 'harrw' => '↭', + 'Hat' => '^', + 'hbar' => 'ℏ', + 'Hcirc' => 'Ĥ', + 'hcirc' => 'ĥ', + 'hearts' => '♥', + 'heartsuit' => '♥', + 'hellip' => '…', + 'hercon' => '⊹', + 'Hfr' => 'ℌ', + 'hfr' => '𝔥', + 'HilbertSpace' => 'ℋ', + 'hksearow' => '⤥', + 'hkswarow' => '⤦', + 'hoarr' => '⇿', + 'homtht' => '∻', + 'hookleftarrow' => '↩', + 'hookrightarrow' => '↪', + 'Hopf' => 'ℍ', + 'hopf' => '𝕙', + 'horbar' => '―', + 'HorizontalLine' => '─', + 'Hscr' => 'ℋ', + 'hscr' => '𝒽', + 'hslash' => 'ℏ', + 'Hstrok' => 'Ħ', + 'hstrok' => 'ħ', + 'HumpDownHump' => '≎', + 'HumpEqual' => '≏', + 'hybull' => '⁃', + 'hyphen' => '‐', + 'Iacute' => 'Í', + 'Iacut' => 'Í', + 'iacute' => 'í', + 'iacut' => 'í', + 'ic' => '⁣', + 'Icirc' => 'Î', + 'Icir' => 'Î', + 'icirc' => 'î', + 'icir' => 'î', + 'Icy' => 'И', + 'icy' => 'и', + 'Idot' => 'İ', + 'IEcy' => 'Е', + 'iecy' => 'е', + 'iexcl' => '¡', + 'iexc' => '¡', + 'iff' => '⇔', + 'Ifr' => 'ℑ', + 'ifr' => '𝔦', + 'Igrave' => 'Ì', + 'Igrav' => 'Ì', + 'igrave' => 'ì', + 'igrav' => 'ì', + 'ii' => 'ⅈ', + 'iiiint' => '⨌', + 'iiint' => '∭', + 'iinfin' => '⧜', + 'iiota' => '℩', + 'IJlig' => 'IJ', + 'ijlig' => 'ij', + 'Im' => 'ℑ', + 'Imacr' => 'Ī', + 'imacr' => 'ī', + 'image' => 'ℑ', + 'ImaginaryI' => 'ⅈ', + 'imagline' => 'ℐ', + 'imagpart' => 'ℑ', + 'imath' => 'ı', + 'imof' => '⊷', + 'imped' => 'Ƶ', + 'Implies' => '⇒', + 'in' => '∈', + 'incare' => '℅', + 'infin' => '∞', + 'infintie' => '⧝', + 'inodot' => 'ı', + 'Int' => '∬', + 'int' => '∫', + 'intcal' => '⊺', + 'integers' => 'ℤ', + 'Integral' => '∫', + 'intercal' => '⊺', + 'Intersection' => '⋂', + 'intlarhk' => '⨗', + 'intprod' => '⨼', + 'InvisibleComma' => '⁣', + 'InvisibleTimes' => '⁢', + 'IOcy' => 'Ё', + 'iocy' => 'ё', + 'Iogon' => 'Į', + 'iogon' => 'į', + 'Iopf' => '𝕀', + 'iopf' => '𝕚', + 'Iota' => 'Ι', + 'iota' => 'ι', + 'iprod' => '⨼', + 'iquest' => '¿', + 'iques' => '¿', + 'Iscr' => 'ℐ', + 'iscr' => '𝒾', + 'isin' => '∈', + 'isindot' => '⋵', + 'isinE' => '⋹', + 'isins' => '⋴', + 'isinsv' => '⋳', + 'isinv' => '∈', + 'it' => '⁢', + 'Itilde' => 'Ĩ', + 'itilde' => 'ĩ', + 'Iukcy' => 'І', + 'iukcy' => 'і', + 'Iuml' => 'Ï', + 'Ium' => 'Ï', + 'iuml' => 'ï', + 'ium' => 'ï', + 'Jcirc' => 'Ĵ', + 'jcirc' => 'ĵ', + 'Jcy' => 'Й', + 'jcy' => 'й', + 'Jfr' => '𝔍', + 'jfr' => '𝔧', + 'jmath' => 'ȷ', + 'Jopf' => '𝕁', + 'jopf' => '𝕛', + 'Jscr' => '𝒥', + 'jscr' => '𝒿', + 'Jsercy' => 'Ј', + 'jsercy' => 'ј', + 'Jukcy' => 'Є', + 'jukcy' => 'є', + 'Kappa' => 'Κ', + 'kappa' => 'κ', + 'kappav' => 'ϰ', + 'Kcedil' => 'Ķ', + 'kcedil' => 'ķ', + 'Kcy' => 'К', + 'kcy' => 'к', + 'Kfr' => '𝔎', + 'kfr' => '𝔨', + 'kgreen' => 'ĸ', + 'KHcy' => 'Х', + 'khcy' => 'х', + 'KJcy' => 'Ќ', + 'kjcy' => 'ќ', + 'Kopf' => '𝕂', + 'kopf' => '𝕜', + 'Kscr' => '𝒦', + 'kscr' => '𝓀', + 'lAarr' => '⇚', + 'Lacute' => 'Ĺ', + 'lacute' => 'ĺ', + 'laemptyv' => '⦴', + 'lagran' => 'ℒ', + 'Lambda' => 'Λ', + 'lambda' => 'λ', + 'Lang' => '⟪', + 'lang' => '⟨', + 'langd' => '⦑', + 'langle' => '⟨', + 'lap' => '⪅', + 'Laplacetrf' => 'ℒ', + 'laquo' => '«', + 'laqu' => '«', + 'Larr' => '↞', + 'lArr' => '⇐', + 'larr' => '←', + 'larrb' => '⇤', + 'larrbfs' => '⤟', + 'larrfs' => '⤝', + 'larrhk' => '↩', + 'larrlp' => '↫', + 'larrpl' => '⤹', + 'larrsim' => '⥳', + 'larrtl' => '↢', + 'lat' => '⪫', + 'lAtail' => '⤛', + 'latail' => '⤙', + 'late' => '⪭', + 'lates' => '⪭︀', + 'lBarr' => '⤎', + 'lbarr' => '⤌', + 'lbbrk' => '❲', + 'lbrace' => '{', + 'lbrack' => '[', + 'lbrke' => '⦋', + 'lbrksld' => '⦏', + 'lbrkslu' => '⦍', + 'Lcaron' => 'Ľ', + 'lcaron' => 'ľ', + 'Lcedil' => 'Ļ', + 'lcedil' => 'ļ', + 'lceil' => '⌈', + 'lcub' => '{', + 'Lcy' => 'Л', + 'lcy' => 'л', + 'ldca' => '⤶', + 'ldquo' => '“', + 'ldquor' => '„', + 'ldrdhar' => '⥧', + 'ldrushar' => '⥋', + 'ldsh' => '↲', + 'lE' => '≦', + 'le' => '≤', + 'LeftAngleBracket' => '⟨', + 'LeftArrow' => '←', + 'Leftarrow' => '⇐', + 'leftarrow' => '←', + 'LeftArrowBar' => '⇤', + 'LeftArrowRightArrow' => '⇆', + 'leftarrowtail' => '↢', + 'LeftCeiling' => '⌈', + 'LeftDoubleBracket' => '⟦', + 'LeftDownTeeVector' => '⥡', + 'LeftDownVector' => '⇃', + 'LeftDownVectorBar' => '⥙', + 'LeftFloor' => '⌊', + 'leftharpoondown' => '↽', + 'leftharpoonup' => '↼', + 'leftleftarrows' => '⇇', + 'LeftRightArrow' => '↔', + 'Leftrightarrow' => '⇔', + 'leftrightarrow' => '↔', + 'leftrightarrows' => '⇆', + 'leftrightharpoons' => '⇋', + 'leftrightsquigarrow' => '↭', + 'LeftRightVector' => '⥎', + 'LeftTee' => '⊣', + 'LeftTeeArrow' => '↤', + 'LeftTeeVector' => '⥚', + 'leftthreetimes' => '⋋', + 'LeftTriangle' => '⊲', + 'LeftTriangleBar' => '⧏', + 'LeftTriangleEqual' => '⊴', + 'LeftUpDownVector' => '⥑', + 'LeftUpTeeVector' => '⥠', + 'LeftUpVector' => '↿', + 'LeftUpVectorBar' => '⥘', + 'LeftVector' => '↼', + 'LeftVectorBar' => '⥒', + 'lEg' => '⪋', + 'leg' => '⋚', + 'leq' => '≤', + 'leqq' => '≦', + 'leqslant' => '⩽', + 'les' => '⩽', + 'lescc' => '⪨', + 'lesdot' => '⩿', + 'lesdoto' => '⪁', + 'lesdotor' => '⪃', + 'lesg' => '⋚︀', + 'lesges' => '⪓', + 'lessapprox' => '⪅', + 'lessdot' => '⋖', + 'lesseqgtr' => '⋚', + 'lesseqqgtr' => '⪋', + 'LessEqualGreater' => '⋚', + 'LessFullEqual' => '≦', + 'LessGreater' => '≶', + 'lessgtr' => '≶', + 'LessLess' => '⪡', + 'lesssim' => '≲', + 'LessSlantEqual' => '⩽', + 'LessTilde' => '≲', + 'lfisht' => '⥼', + 'lfloor' => '⌊', + 'Lfr' => '𝔏', + 'lfr' => '𝔩', + 'lg' => '≶', + 'lgE' => '⪑', + 'lHar' => '⥢', + 'lhard' => '↽', + 'lharu' => '↼', + 'lharul' => '⥪', + 'lhblk' => '▄', + 'LJcy' => 'Љ', + 'ljcy' => 'љ', + 'Ll' => '⋘', + 'll' => '≪', + 'llarr' => '⇇', + 'llcorner' => '⌞', + 'Lleftarrow' => '⇚', + 'llhard' => '⥫', + 'lltri' => '◺', + 'Lmidot' => 'Ŀ', + 'lmidot' => 'ŀ', + 'lmoust' => '⎰', + 'lmoustache' => '⎰', + 'lnap' => '⪉', + 'lnapprox' => '⪉', + 'lnE' => '≨', + 'lne' => '⪇', + 'lneq' => '⪇', + 'lneqq' => '≨', + 'lnsim' => '⋦', + 'loang' => '⟬', + 'loarr' => '⇽', + 'lobrk' => '⟦', + 'LongLeftArrow' => '⟵', + 'Longleftarrow' => '⟸', + 'longleftarrow' => '⟵', + 'LongLeftRightArrow' => '⟷', + 'Longleftrightarrow' => '⟺', + 'longleftrightarrow' => '⟷', + 'longmapsto' => '⟼', + 'LongRightArrow' => '⟶', + 'Longrightarrow' => '⟹', + 'longrightarrow' => '⟶', + 'looparrowleft' => '↫', + 'looparrowright' => '↬', + 'lopar' => '⦅', + 'Lopf' => '𝕃', + 'lopf' => '𝕝', + 'loplus' => '⨭', + 'lotimes' => '⨴', + 'lowast' => '∗', + 'lowbar' => '_', + 'LowerLeftArrow' => '↙', + 'LowerRightArrow' => '↘', + 'loz' => '◊', + 'lozenge' => '◊', + 'lozf' => '⧫', + 'lpar' => '(', + 'lparlt' => '⦓', + 'lrarr' => '⇆', + 'lrcorner' => '⌟', + 'lrhar' => '⇋', + 'lrhard' => '⥭', + 'lrm' => '‎', + 'lrtri' => '⊿', + 'lsaquo' => '‹', + 'Lscr' => 'ℒ', + 'lscr' => '𝓁', + 'Lsh' => '↰', + 'lsh' => '↰', + 'lsim' => '≲', + 'lsime' => '⪍', + 'lsimg' => '⪏', + 'lsqb' => '[', + 'lsquo' => '‘', + 'lsquor' => '‚', + 'Lstrok' => 'Ł', + 'lstrok' => 'ł', + 'LT' => '<', + 'L' => '<', + 'Lt' => '≪', + 'lt' => '<', + 'l' => '<', + 'ltcc' => '⪦', + 'ltcir' => '⩹', + 'ltdot' => '⋖', + 'lthree' => '⋋', + 'ltimes' => '⋉', + 'ltlarr' => '⥶', + 'ltquest' => '⩻', + 'ltri' => '◃', + 'ltrie' => '⊴', + 'ltrif' => '◂', + 'ltrPar' => '⦖', + 'lurdshar' => '⥊', + 'luruhar' => '⥦', + 'lvertneqq' => '≨︀', + 'lvnE' => '≨︀', + 'macr' => '¯', + 'mac' => '¯', + 'male' => '♂', + 'malt' => '✠', + 'maltese' => '✠', + 'Map' => '⤅', + 'map' => '↦', + 'mapsto' => '↦', + 'mapstodown' => '↧', + 'mapstoleft' => '↤', + 'mapstoup' => '↥', + 'marker' => '▮', + 'mcomma' => '⨩', + 'Mcy' => 'М', + 'mcy' => 'м', + 'mdash' => '—', + 'mDDot' => '∺', + 'measuredangle' => '∡', + 'MediumSpace' => ' ', + 'Mellintrf' => 'ℳ', + 'Mfr' => '𝔐', + 'mfr' => '𝔪', + 'mho' => '℧', + 'micro' => 'µ', + 'micr' => 'µ', + 'mid' => '∣', + 'midast' => '*', + 'midcir' => '⫰', + 'middot' => '·', + 'middo' => '·', + 'minus' => '−', + 'minusb' => '⊟', + 'minusd' => '∸', + 'minusdu' => '⨪', + 'MinusPlus' => '∓', + 'mlcp' => '⫛', + 'mldr' => '…', + 'mnplus' => '∓', + 'models' => '⊧', + 'Mopf' => '𝕄', + 'mopf' => '𝕞', + 'mp' => '∓', + 'Mscr' => 'ℳ', + 'mscr' => '𝓂', + 'mstpos' => '∾', + 'Mu' => 'Μ', + 'mu' => 'μ', + 'multimap' => '⊸', + 'mumap' => '⊸', + 'nabla' => '∇', + 'Nacute' => 'Ń', + 'nacute' => 'ń', + 'nang' => '∠⃒', + 'nap' => '≉', + 'napE' => '⩰̸', + 'napid' => '≋̸', + 'napos' => 'ʼn', + 'napprox' => '≉', + 'natur' => '♮', + 'natural' => '♮', + 'naturals' => 'ℕ', + 'nbsp' => ' ', + 'nbs' => ' ', + 'nbump' => '≎̸', + 'nbumpe' => '≏̸', + 'ncap' => '⩃', + 'Ncaron' => 'Ň', + 'ncaron' => 'ň', + 'Ncedil' => 'Ņ', + 'ncedil' => 'ņ', + 'ncong' => '≇', + 'ncongdot' => '⩭̸', + 'ncup' => '⩂', + 'Ncy' => 'Н', + 'ncy' => 'н', + 'ndash' => '–', + 'ne' => '≠', + 'nearhk' => '⤤', + 'neArr' => '⇗', + 'nearr' => '↗', + 'nearrow' => '↗', + 'nedot' => '≐̸', + 'NegativeMediumSpace' => '​', + 'NegativeThickSpace' => '​', + 'NegativeThinSpace' => '​', + 'NegativeVeryThinSpace' => '​', + 'nequiv' => '≢', + 'nesear' => '⤨', + 'nesim' => '≂̸', + 'NestedGreaterGreater' => '≫', + 'NestedLessLess' => '≪', + 'NewLine' => ' +', + 'nexist' => '∄', + 'nexists' => '∄', + 'Nfr' => '𝔑', + 'nfr' => '𝔫', + 'ngE' => '≧̸', + 'nge' => '≱', + 'ngeq' => '≱', + 'ngeqq' => '≧̸', + 'ngeqslant' => '⩾̸', + 'nges' => '⩾̸', + 'nGg' => '⋙̸', + 'ngsim' => '≵', + 'nGt' => '≫⃒', + 'ngt' => '≯', + 'ngtr' => '≯', + 'nGtv' => '≫̸', + 'nhArr' => '⇎', + 'nharr' => '↮', + 'nhpar' => '⫲', + 'ni' => '∋', + 'nis' => '⋼', + 'nisd' => '⋺', + 'niv' => '∋', + 'NJcy' => 'Њ', + 'njcy' => 'њ', + 'nlArr' => '⇍', + 'nlarr' => '↚', + 'nldr' => '‥', + 'nlE' => '≦̸', + 'nle' => '≰', + 'nLeftarrow' => '⇍', + 'nleftarrow' => '↚', + 'nLeftrightarrow' => '⇎', + 'nleftrightarrow' => '↮', + 'nleq' => '≰', + 'nleqq' => '≦̸', + 'nleqslant' => '⩽̸', + 'nles' => '⩽̸', + 'nless' => '≮', + 'nLl' => '⋘̸', + 'nlsim' => '≴', + 'nLt' => '≪⃒', + 'nlt' => '≮', + 'nltri' => '⋪', + 'nltrie' => '⋬', + 'nLtv' => '≪̸', + 'nmid' => '∤', + 'NoBreak' => '⁠', + 'NonBreakingSpace' => ' ', + 'Nopf' => 'ℕ', + 'nopf' => '𝕟', + 'Not' => '⫬', + 'not' => '¬', + 'no' => '¬', + 'NotCongruent' => '≢', + 'NotCupCap' => '≭', + 'NotDoubleVerticalBar' => '∦', + 'NotElement' => '∉', + 'NotEqual' => '≠', + 'NotEqualTilde' => '≂̸', + 'NotExists' => '∄', + 'NotGreater' => '≯', + 'NotGreaterEqual' => '≱', + 'NotGreaterFullEqual' => '≧̸', + 'NotGreaterGreater' => '≫̸', + 'NotGreaterLess' => '≹', + 'NotGreaterSlantEqual' => '⩾̸', + 'NotGreaterTilde' => '≵', + 'NotHumpDownHump' => '≎̸', + 'NotHumpEqual' => '≏̸', + 'notin' => '∉', + 'notindot' => '⋵̸', + 'notinE' => '⋹̸', + 'notinva' => '∉', + 'notinvb' => '⋷', + 'notinvc' => '⋶', + 'NotLeftTriangle' => '⋪', + 'NotLeftTriangleBar' => '⧏̸', + 'NotLeftTriangleEqual' => '⋬', + 'NotLess' => '≮', + 'NotLessEqual' => '≰', + 'NotLessGreater' => '≸', + 'NotLessLess' => '≪̸', + 'NotLessSlantEqual' => '⩽̸', + 'NotLessTilde' => '≴', + 'NotNestedGreaterGreater' => '⪢̸', + 'NotNestedLessLess' => '⪡̸', + 'notni' => '∌', + 'notniva' => '∌', + 'notnivb' => '⋾', + 'notnivc' => '⋽', + 'NotPrecedes' => '⊀', + 'NotPrecedesEqual' => '⪯̸', + 'NotPrecedesSlantEqual' => '⋠', + 'NotReverseElement' => '∌', + 'NotRightTriangle' => '⋫', + 'NotRightTriangleBar' => '⧐̸', + 'NotRightTriangleEqual' => '⋭', + 'NotSquareSubset' => '⊏̸', + 'NotSquareSubsetEqual' => '⋢', + 'NotSquareSuperset' => '⊐̸', + 'NotSquareSupersetEqual' => '⋣', + 'NotSubset' => '⊂⃒', + 'NotSubsetEqual' => '⊈', + 'NotSucceeds' => '⊁', + 'NotSucceedsEqual' => '⪰̸', + 'NotSucceedsSlantEqual' => '⋡', + 'NotSucceedsTilde' => '≿̸', + 'NotSuperset' => '⊃⃒', + 'NotSupersetEqual' => '⊉', + 'NotTilde' => '≁', + 'NotTildeEqual' => '≄', + 'NotTildeFullEqual' => '≇', + 'NotTildeTilde' => '≉', + 'NotVerticalBar' => '∤', + 'npar' => '∦', + 'nparallel' => '∦', + 'nparsl' => '⫽⃥', + 'npart' => '∂̸', + 'npolint' => '⨔', + 'npr' => '⊀', + 'nprcue' => '⋠', + 'npre' => '⪯̸', + 'nprec' => '⊀', + 'npreceq' => '⪯̸', + 'nrArr' => '⇏', + 'nrarr' => '↛', + 'nrarrc' => '⤳̸', + 'nrarrw' => '↝̸', + 'nRightarrow' => '⇏', + 'nrightarrow' => '↛', + 'nrtri' => '⋫', + 'nrtrie' => '⋭', + 'nsc' => '⊁', + 'nsccue' => '⋡', + 'nsce' => '⪰̸', + 'Nscr' => '𝒩', + 'nscr' => '𝓃', + 'nshortmid' => '∤', + 'nshortparallel' => '∦', + 'nsim' => '≁', + 'nsime' => '≄', + 'nsimeq' => '≄', + 'nsmid' => '∤', + 'nspar' => '∦', + 'nsqsube' => '⋢', + 'nsqsupe' => '⋣', + 'nsub' => '⊄', + 'nsubE' => '⫅̸', + 'nsube' => '⊈', + 'nsubset' => '⊂⃒', + 'nsubseteq' => '⊈', + 'nsubseteqq' => '⫅̸', + 'nsucc' => '⊁', + 'nsucceq' => '⪰̸', + 'nsup' => '⊅', + 'nsupE' => '⫆̸', + 'nsupe' => '⊉', + 'nsupset' => '⊃⃒', + 'nsupseteq' => '⊉', + 'nsupseteqq' => '⫆̸', + 'ntgl' => '≹', + 'Ntilde' => 'Ñ', + 'Ntild' => 'Ñ', + 'ntilde' => 'ñ', + 'ntild' => 'ñ', + 'ntlg' => '≸', + 'ntriangleleft' => '⋪', + 'ntrianglelefteq' => '⋬', + 'ntriangleright' => '⋫', + 'ntrianglerighteq' => '⋭', + 'Nu' => 'Ν', + 'nu' => 'ν', + 'num' => '#', + 'numero' => '№', + 'numsp' => ' ', + 'nvap' => '≍⃒', + 'nVDash' => '⊯', + 'nVdash' => '⊮', + 'nvDash' => '⊭', + 'nvdash' => '⊬', + 'nvge' => '≥⃒', + 'nvgt' => '>⃒', + 'nvHarr' => '⤄', + 'nvinfin' => '⧞', + 'nvlArr' => '⤂', + 'nvle' => '≤⃒', + 'nvlt' => '<⃒', + 'nvltrie' => '⊴⃒', + 'nvrArr' => '⤃', + 'nvrtrie' => '⊵⃒', + 'nvsim' => '∼⃒', + 'nwarhk' => '⤣', + 'nwArr' => '⇖', + 'nwarr' => '↖', + 'nwarrow' => '↖', + 'nwnear' => '⤧', + 'Oacute' => 'Ó', + 'Oacut' => 'Ó', + 'oacute' => 'ó', + 'oacut' => 'ó', + 'oast' => '⊛', + 'ocir' => 'ô', + 'Ocirc' => 'Ô', + 'Ocir' => 'Ô', + 'ocirc' => 'ô', + 'Ocy' => 'О', + 'ocy' => 'о', + 'odash' => '⊝', + 'Odblac' => 'Ő', + 'odblac' => 'ő', + 'odiv' => '⨸', + 'odot' => '⊙', + 'odsold' => '⦼', + 'OElig' => 'Œ', + 'oelig' => 'œ', + 'ofcir' => '⦿', + 'Ofr' => '𝔒', + 'ofr' => '𝔬', + 'ogon' => '˛', + 'Ograve' => 'Ò', + 'Ograv' => 'Ò', + 'ograve' => 'ò', + 'ograv' => 'ò', + 'ogt' => '⧁', + 'ohbar' => '⦵', + 'ohm' => 'Ω', + 'oint' => '∮', + 'olarr' => '↺', + 'olcir' => '⦾', + 'olcross' => '⦻', + 'oline' => '‾', + 'olt' => '⧀', + 'Omacr' => 'Ō', + 'omacr' => 'ō', + 'Omega' => 'Ω', + 'omega' => 'ω', + 'Omicron' => 'Ο', + 'omicron' => 'ο', + 'omid' => '⦶', + 'ominus' => '⊖', + 'Oopf' => '𝕆', + 'oopf' => '𝕠', + 'opar' => '⦷', + 'OpenCurlyDoubleQuote' => '“', + 'OpenCurlyQuote' => '‘', + 'operp' => '⦹', + 'oplus' => '⊕', + 'Or' => '⩔', + 'or' => '∨', + 'orarr' => '↻', + 'ord' => 'º', + 'order' => 'ℴ', + 'orderof' => 'ℴ', + 'ordf' => 'ª', + 'ordm' => 'º', + 'origof' => '⊶', + 'oror' => '⩖', + 'orslope' => '⩗', + 'orv' => '⩛', + 'oS' => 'Ⓢ', + 'Oscr' => '𝒪', + 'oscr' => 'ℴ', + 'Oslash' => 'Ø', + 'Oslas' => 'Ø', + 'oslash' => 'ø', + 'oslas' => 'ø', + 'osol' => '⊘', + 'Otilde' => 'Õ', + 'Otild' => 'Õ', + 'otilde' => 'õ', + 'otild' => 'õ', + 'Otimes' => '⨷', + 'otimes' => '⊗', + 'otimesas' => '⨶', + 'Ouml' => 'Ö', + 'Oum' => 'Ö', + 'ouml' => 'ö', + 'oum' => 'ö', + 'ovbar' => '⌽', + 'OverBar' => '‾', + 'OverBrace' => '⏞', + 'OverBracket' => '⎴', + 'OverParenthesis' => '⏜', + 'par' => '¶', + 'para' => '¶', + 'parallel' => '∥', + 'parsim' => '⫳', + 'parsl' => '⫽', + 'part' => '∂', + 'PartialD' => '∂', + 'Pcy' => 'П', + 'pcy' => 'п', + 'percnt' => '%', + 'period' => '.', + 'permil' => '‰', + 'perp' => '⊥', + 'pertenk' => '‱', + 'Pfr' => '𝔓', + 'pfr' => '𝔭', + 'Phi' => 'Φ', + 'phi' => 'φ', + 'phiv' => 'ϕ', + 'phmmat' => 'ℳ', + 'phone' => '☎', + 'Pi' => 'Π', + 'pi' => 'π', + 'pitchfork' => '⋔', + 'piv' => 'ϖ', + 'planck' => 'ℏ', + 'planckh' => 'ℎ', + 'plankv' => 'ℏ', + 'plus' => '+', + 'plusacir' => '⨣', + 'plusb' => '⊞', + 'pluscir' => '⨢', + 'plusdo' => '∔', + 'plusdu' => '⨥', + 'pluse' => '⩲', + 'PlusMinus' => '±', + 'plusmn' => '±', + 'plusm' => '±', + 'plussim' => '⨦', + 'plustwo' => '⨧', + 'pm' => '±', + 'Poincareplane' => 'ℌ', + 'pointint' => '⨕', + 'Popf' => 'ℙ', + 'popf' => '𝕡', + 'pound' => '£', + 'poun' => '£', + 'Pr' => '⪻', + 'pr' => '≺', + 'prap' => '⪷', + 'prcue' => '≼', + 'prE' => '⪳', + 'pre' => '⪯', + 'prec' => '≺', + 'precapprox' => '⪷', + 'preccurlyeq' => '≼', + 'Precedes' => '≺', + 'PrecedesEqual' => '⪯', + 'PrecedesSlantEqual' => '≼', + 'PrecedesTilde' => '≾', + 'preceq' => '⪯', + 'precnapprox' => '⪹', + 'precneqq' => '⪵', + 'precnsim' => '⋨', + 'precsim' => '≾', + 'Prime' => '″', + 'prime' => '′', + 'primes' => 'ℙ', + 'prnap' => '⪹', + 'prnE' => '⪵', + 'prnsim' => '⋨', + 'prod' => '∏', + 'Product' => '∏', + 'profalar' => '⌮', + 'profline' => '⌒', + 'profsurf' => '⌓', + 'prop' => '∝', + 'Proportion' => '∷', + 'Proportional' => '∝', + 'propto' => '∝', + 'prsim' => '≾', + 'prurel' => '⊰', + 'Pscr' => '𝒫', + 'pscr' => '𝓅', + 'Psi' => 'Ψ', + 'psi' => 'ψ', + 'puncsp' => ' ', + 'Qfr' => '𝔔', + 'qfr' => '𝔮', + 'qint' => '⨌', + 'Qopf' => 'ℚ', + 'qopf' => '𝕢', + 'qprime' => '⁗', + 'Qscr' => '𝒬', + 'qscr' => '𝓆', + 'quaternions' => 'ℍ', + 'quatint' => '⨖', + 'quest' => '?', + 'questeq' => '≟', + 'QUOT' => '"', + 'QUO' => '"', + 'quot' => '"', + 'quo' => '"', + 'rAarr' => '⇛', + 'race' => '∽̱', + 'Racute' => 'Ŕ', + 'racute' => 'ŕ', + 'radic' => '√', + 'raemptyv' => '⦳', + 'Rang' => '⟫', + 'rang' => '⟩', + 'rangd' => '⦒', + 'range' => '⦥', + 'rangle' => '⟩', + 'raquo' => '»', + 'raqu' => '»', + 'Rarr' => '↠', + 'rArr' => '⇒', + 'rarr' => '→', + 'rarrap' => '⥵', + 'rarrb' => '⇥', + 'rarrbfs' => '⤠', + 'rarrc' => '⤳', + 'rarrfs' => '⤞', + 'rarrhk' => '↪', + 'rarrlp' => '↬', + 'rarrpl' => '⥅', + 'rarrsim' => '⥴', + 'Rarrtl' => '⤖', + 'rarrtl' => '↣', + 'rarrw' => '↝', + 'rAtail' => '⤜', + 'ratail' => '⤚', + 'ratio' => '∶', + 'rationals' => 'ℚ', + 'RBarr' => '⤐', + 'rBarr' => '⤏', + 'rbarr' => '⤍', + 'rbbrk' => '❳', + 'rbrace' => '}', + 'rbrack' => ']', + 'rbrke' => '⦌', + 'rbrksld' => '⦎', + 'rbrkslu' => '⦐', + 'Rcaron' => 'Ř', + 'rcaron' => 'ř', + 'Rcedil' => 'Ŗ', + 'rcedil' => 'ŗ', + 'rceil' => '⌉', + 'rcub' => '}', + 'Rcy' => 'Р', + 'rcy' => 'р', + 'rdca' => '⤷', + 'rdldhar' => '⥩', + 'rdquo' => '”', + 'rdquor' => '”', + 'rdsh' => '↳', + 'Re' => 'ℜ', + 'real' => 'ℜ', + 'realine' => 'ℛ', + 'realpart' => 'ℜ', + 'reals' => 'ℝ', + 'rect' => '▭', + 'REG' => '®', + 'RE' => '®', + 'reg' => '®', + 're' => '®', + 'ReverseElement' => '∋', + 'ReverseEquilibrium' => '⇋', + 'ReverseUpEquilibrium' => '⥯', + 'rfisht' => '⥽', + 'rfloor' => '⌋', + 'Rfr' => 'ℜ', + 'rfr' => '𝔯', + 'rHar' => '⥤', + 'rhard' => '⇁', + 'rharu' => '⇀', + 'rharul' => '⥬', + 'Rho' => 'Ρ', + 'rho' => 'ρ', + 'rhov' => 'ϱ', + 'RightAngleBracket' => '⟩', + 'RightArrow' => '→', + 'Rightarrow' => '⇒', + 'rightarrow' => '→', + 'RightArrowBar' => '⇥', + 'RightArrowLeftArrow' => '⇄', + 'rightarrowtail' => '↣', + 'RightCeiling' => '⌉', + 'RightDoubleBracket' => '⟧', + 'RightDownTeeVector' => '⥝', + 'RightDownVector' => '⇂', + 'RightDownVectorBar' => '⥕', + 'RightFloor' => '⌋', + 'rightharpoondown' => '⇁', + 'rightharpoonup' => '⇀', + 'rightleftarrows' => '⇄', + 'rightleftharpoons' => '⇌', + 'rightrightarrows' => '⇉', + 'rightsquigarrow' => '↝', + 'RightTee' => '⊢', + 'RightTeeArrow' => '↦', + 'RightTeeVector' => '⥛', + 'rightthreetimes' => '⋌', + 'RightTriangle' => '⊳', + 'RightTriangleBar' => '⧐', + 'RightTriangleEqual' => '⊵', + 'RightUpDownVector' => '⥏', + 'RightUpTeeVector' => '⥜', + 'RightUpVector' => '↾', + 'RightUpVectorBar' => '⥔', + 'RightVector' => '⇀', + 'RightVectorBar' => '⥓', + 'ring' => '˚', + 'risingdotseq' => '≓', + 'rlarr' => '⇄', + 'rlhar' => '⇌', + 'rlm' => '‏', + 'rmoust' => '⎱', + 'rmoustache' => '⎱', + 'rnmid' => '⫮', + 'roang' => '⟭', + 'roarr' => '⇾', + 'robrk' => '⟧', + 'ropar' => '⦆', + 'Ropf' => 'ℝ', + 'ropf' => '𝕣', + 'roplus' => '⨮', + 'rotimes' => '⨵', + 'RoundImplies' => '⥰', + 'rpar' => ')', + 'rpargt' => '⦔', + 'rppolint' => '⨒', + 'rrarr' => '⇉', + 'Rrightarrow' => '⇛', + 'rsaquo' => '›', + 'Rscr' => 'ℛ', + 'rscr' => '𝓇', + 'Rsh' => '↱', + 'rsh' => '↱', + 'rsqb' => ']', + 'rsquo' => '’', + 'rsquor' => '’', + 'rthree' => '⋌', + 'rtimes' => '⋊', + 'rtri' => '▹', + 'rtrie' => '⊵', + 'rtrif' => '▸', + 'rtriltri' => '⧎', + 'RuleDelayed' => '⧴', + 'ruluhar' => '⥨', + 'rx' => '℞', + 'Sacute' => 'Ś', + 'sacute' => 'ś', + 'sbquo' => '‚', + 'Sc' => '⪼', + 'sc' => '≻', + 'scap' => '⪸', + 'Scaron' => 'Š', + 'scaron' => 'š', + 'sccue' => '≽', + 'scE' => '⪴', + 'sce' => '⪰', + 'Scedil' => 'Ş', + 'scedil' => 'ş', + 'Scirc' => 'Ŝ', + 'scirc' => 'ŝ', + 'scnap' => '⪺', + 'scnE' => '⪶', + 'scnsim' => '⋩', + 'scpolint' => '⨓', + 'scsim' => '≿', + 'Scy' => 'С', + 'scy' => 'с', + 'sdot' => '⋅', + 'sdotb' => '⊡', + 'sdote' => '⩦', + 'searhk' => '⤥', + 'seArr' => '⇘', + 'searr' => '↘', + 'searrow' => '↘', + 'sect' => '§', + 'sec' => '§', + 'semi' => ';', + 'seswar' => '⤩', + 'setminus' => '∖', + 'setmn' => '∖', + 'sext' => '✶', + 'Sfr' => '𝔖', + 'sfr' => '𝔰', + 'sfrown' => '⌢', + 'sharp' => '♯', + 'SHCHcy' => 'Щ', + 'shchcy' => 'щ', + 'SHcy' => 'Ш', + 'shcy' => 'ш', + 'ShortDownArrow' => '↓', + 'ShortLeftArrow' => '←', + 'shortmid' => '∣', + 'shortparallel' => '∥', + 'ShortRightArrow' => '→', + 'ShortUpArrow' => '↑', + 'shy' => '­', + 'sh' => '­', + 'Sigma' => 'Σ', + 'sigma' => 'σ', + 'sigmaf' => 'ς', + 'sigmav' => 'ς', + 'sim' => '∼', + 'simdot' => '⩪', + 'sime' => '≃', + 'simeq' => '≃', + 'simg' => '⪞', + 'simgE' => '⪠', + 'siml' => '⪝', + 'simlE' => '⪟', + 'simne' => '≆', + 'simplus' => '⨤', + 'simrarr' => '⥲', + 'slarr' => '←', + 'SmallCircle' => '∘', + 'smallsetminus' => '∖', + 'smashp' => '⨳', + 'smeparsl' => '⧤', + 'smid' => '∣', + 'smile' => '⌣', + 'smt' => '⪪', + 'smte' => '⪬', + 'smtes' => '⪬︀', + 'SOFTcy' => 'Ь', + 'softcy' => 'ь', + 'sol' => '/', + 'solb' => '⧄', + 'solbar' => '⌿', + 'Sopf' => '𝕊', + 'sopf' => '𝕤', + 'spades' => '♠', + 'spadesuit' => '♠', + 'spar' => '∥', + 'sqcap' => '⊓', + 'sqcaps' => '⊓︀', + 'sqcup' => '⊔', + 'sqcups' => '⊔︀', + 'Sqrt' => '√', + 'sqsub' => '⊏', + 'sqsube' => '⊑', + 'sqsubset' => '⊏', + 'sqsubseteq' => '⊑', + 'sqsup' => '⊐', + 'sqsupe' => '⊒', + 'sqsupset' => '⊐', + 'sqsupseteq' => '⊒', + 'squ' => '□', + 'Square' => '□', + 'square' => '□', + 'SquareIntersection' => '⊓', + 'SquareSubset' => '⊏', + 'SquareSubsetEqual' => '⊑', + 'SquareSuperset' => '⊐', + 'SquareSupersetEqual' => '⊒', + 'SquareUnion' => '⊔', + 'squarf' => '▪', + 'squf' => '▪', + 'srarr' => '→', + 'Sscr' => '𝒮', + 'sscr' => '𝓈', + 'ssetmn' => '∖', + 'ssmile' => '⌣', + 'sstarf' => '⋆', + 'Star' => '⋆', + 'star' => '☆', + 'starf' => '★', + 'straightepsilon' => 'ϵ', + 'straightphi' => 'ϕ', + 'strns' => '¯', + 'Sub' => '⋐', + 'sub' => '⊂', + 'subdot' => '⪽', + 'subE' => '⫅', + 'sube' => '⊆', + 'subedot' => '⫃', + 'submult' => '⫁', + 'subnE' => '⫋', + 'subne' => '⊊', + 'subplus' => '⪿', + 'subrarr' => '⥹', + 'Subset' => '⋐', + 'subset' => '⊂', + 'subseteq' => '⊆', + 'subseteqq' => '⫅', + 'SubsetEqual' => '⊆', + 'subsetneq' => '⊊', + 'subsetneqq' => '⫋', + 'subsim' => '⫇', + 'subsub' => '⫕', + 'subsup' => '⫓', + 'succ' => '≻', + 'succapprox' => '⪸', + 'succcurlyeq' => '≽', + 'Succeeds' => '≻', + 'SucceedsEqual' => '⪰', + 'SucceedsSlantEqual' => '≽', + 'SucceedsTilde' => '≿', + 'succeq' => '⪰', + 'succnapprox' => '⪺', + 'succneqq' => '⪶', + 'succnsim' => '⋩', + 'succsim' => '≿', + 'SuchThat' => '∋', + 'Sum' => '∑', + 'sum' => '∑', + 'sung' => '♪', + 'Sup' => '⋑', + 'sup' => '³', + 'sup1' => '¹', + 'sup2' => '²', + 'sup3' => '³', + 'supdot' => '⪾', + 'supdsub' => '⫘', + 'supE' => '⫆', + 'supe' => '⊇', + 'supedot' => '⫄', + 'Superset' => '⊃', + 'SupersetEqual' => '⊇', + 'suphsol' => '⟉', + 'suphsub' => '⫗', + 'suplarr' => '⥻', + 'supmult' => '⫂', + 'supnE' => '⫌', + 'supne' => '⊋', + 'supplus' => '⫀', + 'Supset' => '⋑', + 'supset' => '⊃', + 'supseteq' => '⊇', + 'supseteqq' => '⫆', + 'supsetneq' => '⊋', + 'supsetneqq' => '⫌', + 'supsim' => '⫈', + 'supsub' => '⫔', + 'supsup' => '⫖', + 'swarhk' => '⤦', + 'swArr' => '⇙', + 'swarr' => '↙', + 'swarrow' => '↙', + 'swnwar' => '⤪', + 'szlig' => 'ß', + 'szli' => 'ß', + 'Tab' => ' ', + 'target' => '⌖', + 'Tau' => 'Τ', + 'tau' => 'τ', + 'tbrk' => '⎴', + 'Tcaron' => 'Ť', + 'tcaron' => 'ť', + 'Tcedil' => 'Ţ', + 'tcedil' => 'ţ', + 'Tcy' => 'Т', + 'tcy' => 'т', + 'tdot' => '⃛', + 'telrec' => '⌕', + 'Tfr' => '𝔗', + 'tfr' => '𝔱', + 'there4' => '∴', + 'Therefore' => '∴', + 'therefore' => '∴', + 'Theta' => 'Θ', + 'theta' => 'θ', + 'thetasym' => 'ϑ', + 'thetav' => 'ϑ', + 'thickapprox' => '≈', + 'thicksim' => '∼', + 'ThickSpace' => '  ', + 'thinsp' => ' ', + 'ThinSpace' => ' ', + 'thkap' => '≈', + 'thksim' => '∼', + 'THORN' => 'Þ', + 'THOR' => 'Þ', + 'thorn' => 'þ', + 'thor' => 'þ', + 'Tilde' => '∼', + 'tilde' => '˜', + 'TildeEqual' => '≃', + 'TildeFullEqual' => '≅', + 'TildeTilde' => '≈', + 'times' => '×', + 'time' => '×', + 'timesb' => '⊠', + 'timesbar' => '⨱', + 'timesd' => '⨰', + 'tint' => '∭', + 'toea' => '⤨', + 'top' => '⊤', + 'topbot' => '⌶', + 'topcir' => '⫱', + 'Topf' => '𝕋', + 'topf' => '𝕥', + 'topfork' => '⫚', + 'tosa' => '⤩', + 'tprime' => '‴', + 'TRADE' => '™', + 'trade' => '™', + 'triangle' => '▵', + 'triangledown' => '▿', + 'triangleleft' => '◃', + 'trianglelefteq' => '⊴', + 'triangleq' => '≜', + 'triangleright' => '▹', + 'trianglerighteq' => '⊵', + 'tridot' => '◬', + 'trie' => '≜', + 'triminus' => '⨺', + 'TripleDot' => '⃛', + 'triplus' => '⨹', + 'trisb' => '⧍', + 'tritime' => '⨻', + 'trpezium' => '⏢', + 'Tscr' => '𝒯', + 'tscr' => '𝓉', + 'TScy' => 'Ц', + 'tscy' => 'ц', + 'TSHcy' => 'Ћ', + 'tshcy' => 'ћ', + 'Tstrok' => 'Ŧ', + 'tstrok' => 'ŧ', + 'twixt' => '≬', + 'twoheadleftarrow' => '↞', + 'twoheadrightarrow' => '↠', + 'Uacute' => 'Ú', + 'Uacut' => 'Ú', + 'uacute' => 'ú', + 'uacut' => 'ú', + 'Uarr' => '↟', + 'uArr' => '⇑', + 'uarr' => '↑', + 'Uarrocir' => '⥉', + 'Ubrcy' => 'Ў', + 'ubrcy' => 'ў', + 'Ubreve' => 'Ŭ', + 'ubreve' => 'ŭ', + 'Ucirc' => 'Û', + 'Ucir' => 'Û', + 'ucirc' => 'û', + 'ucir' => 'û', + 'Ucy' => 'У', + 'ucy' => 'у', + 'udarr' => '⇅', + 'Udblac' => 'Ű', + 'udblac' => 'ű', + 'udhar' => '⥮', + 'ufisht' => '⥾', + 'Ufr' => '𝔘', + 'ufr' => '𝔲', + 'Ugrave' => 'Ù', + 'Ugrav' => 'Ù', + 'ugrave' => 'ù', + 'ugrav' => 'ù', + 'uHar' => '⥣', + 'uharl' => '↿', + 'uharr' => '↾', + 'uhblk' => '▀', + 'ulcorn' => '⌜', + 'ulcorner' => '⌜', + 'ulcrop' => '⌏', + 'ultri' => '◸', + 'Umacr' => 'Ū', + 'umacr' => 'ū', + 'uml' => '¨', + 'um' => '¨', + 'UnderBar' => '_', + 'UnderBrace' => '⏟', + 'UnderBracket' => '⎵', + 'UnderParenthesis' => '⏝', + 'Union' => '⋃', + 'UnionPlus' => '⊎', + 'Uogon' => 'Ų', + 'uogon' => 'ų', + 'Uopf' => '𝕌', + 'uopf' => '𝕦', + 'UpArrow' => '↑', + 'Uparrow' => '⇑', + 'uparrow' => '↑', + 'UpArrowBar' => '⤒', + 'UpArrowDownArrow' => '⇅', + 'UpDownArrow' => '↕', + 'Updownarrow' => '⇕', + 'updownarrow' => '↕', + 'UpEquilibrium' => '⥮', + 'upharpoonleft' => '↿', + 'upharpoonright' => '↾', + 'uplus' => '⊎', + 'UpperLeftArrow' => '↖', + 'UpperRightArrow' => '↗', + 'Upsi' => 'ϒ', + 'upsi' => 'υ', + 'upsih' => 'ϒ', + 'Upsilon' => 'Υ', + 'upsilon' => 'υ', + 'UpTee' => '⊥', + 'UpTeeArrow' => '↥', + 'upuparrows' => '⇈', + 'urcorn' => '⌝', + 'urcorner' => '⌝', + 'urcrop' => '⌎', + 'Uring' => 'Ů', + 'uring' => 'ů', + 'urtri' => '◹', + 'Uscr' => '𝒰', + 'uscr' => '𝓊', + 'utdot' => '⋰', + 'Utilde' => 'Ũ', + 'utilde' => 'ũ', + 'utri' => '▵', + 'utrif' => '▴', + 'uuarr' => '⇈', + 'Uuml' => 'Ü', + 'Uum' => 'Ü', + 'uuml' => 'ü', + 'uum' => 'ü', + 'uwangle' => '⦧', + 'vangrt' => '⦜', + 'varepsilon' => 'ϵ', + 'varkappa' => 'ϰ', + 'varnothing' => '∅', + 'varphi' => 'ϕ', + 'varpi' => 'ϖ', + 'varpropto' => '∝', + 'vArr' => '⇕', + 'varr' => '↕', + 'varrho' => 'ϱ', + 'varsigma' => 'ς', + 'varsubsetneq' => '⊊︀', + 'varsubsetneqq' => '⫋︀', + 'varsupsetneq' => '⊋︀', + 'varsupsetneqq' => '⫌︀', + 'vartheta' => 'ϑ', + 'vartriangleleft' => '⊲', + 'vartriangleright' => '⊳', + 'Vbar' => '⫫', + 'vBar' => '⫨', + 'vBarv' => '⫩', + 'Vcy' => 'В', + 'vcy' => 'в', + 'VDash' => '⊫', + 'Vdash' => '⊩', + 'vDash' => '⊨', + 'vdash' => '⊢', + 'Vdashl' => '⫦', + 'Vee' => '⋁', + 'vee' => '∨', + 'veebar' => '⊻', + 'veeeq' => '≚', + 'vellip' => '⋮', + 'Verbar' => '‖', + 'verbar' => '|', + 'Vert' => '‖', + 'vert' => '|', + 'VerticalBar' => '∣', + 'VerticalLine' => '|', + 'VerticalSeparator' => '❘', + 'VerticalTilde' => '≀', + 'VeryThinSpace' => ' ', + 'Vfr' => '𝔙', + 'vfr' => '𝔳', + 'vltri' => '⊲', + 'vnsub' => '⊂⃒', + 'vnsup' => '⊃⃒', + 'Vopf' => '𝕍', + 'vopf' => '𝕧', + 'vprop' => '∝', + 'vrtri' => '⊳', + 'Vscr' => '𝒱', + 'vscr' => '𝓋', + 'vsubnE' => '⫋︀', + 'vsubne' => '⊊︀', + 'vsupnE' => '⫌︀', + 'vsupne' => '⊋︀', + 'Vvdash' => '⊪', + 'vzigzag' => '⦚', + 'Wcirc' => 'Ŵ', + 'wcirc' => 'ŵ', + 'wedbar' => '⩟', + 'Wedge' => '⋀', + 'wedge' => '∧', + 'wedgeq' => '≙', + 'weierp' => '℘', + 'Wfr' => '𝔚', + 'wfr' => '𝔴', + 'Wopf' => '𝕎', + 'wopf' => '𝕨', + 'wp' => '℘', + 'wr' => '≀', + 'wreath' => '≀', + 'Wscr' => '𝒲', + 'wscr' => '𝓌', + 'xcap' => '⋂', + 'xcirc' => '◯', + 'xcup' => '⋃', + 'xdtri' => '▽', + 'Xfr' => '𝔛', + 'xfr' => '𝔵', + 'xhArr' => '⟺', + 'xharr' => '⟷', + 'Xi' => 'Ξ', + 'xi' => 'ξ', + 'xlArr' => '⟸', + 'xlarr' => '⟵', + 'xmap' => '⟼', + 'xnis' => '⋻', + 'xodot' => '⨀', + 'Xopf' => '𝕏', + 'xopf' => '𝕩', + 'xoplus' => '⨁', + 'xotime' => '⨂', + 'xrArr' => '⟹', + 'xrarr' => '⟶', + 'Xscr' => '𝒳', + 'xscr' => '𝓍', + 'xsqcup' => '⨆', + 'xuplus' => '⨄', + 'xutri' => '△', + 'xvee' => '⋁', + 'xwedge' => '⋀', + 'Yacute' => 'Ý', + 'Yacut' => 'Ý', + 'yacute' => 'ý', + 'yacut' => 'ý', + 'YAcy' => 'Я', + 'yacy' => 'я', + 'Ycirc' => 'Ŷ', + 'ycirc' => 'ŷ', + 'Ycy' => 'Ы', + 'ycy' => 'ы', + 'yen' => '¥', + 'ye' => '¥', + 'Yfr' => '𝔜', + 'yfr' => '𝔶', + 'YIcy' => 'Ї', + 'yicy' => 'ї', + 'Yopf' => '𝕐', + 'yopf' => '𝕪', + 'Yscr' => '𝒴', + 'yscr' => '𝓎', + 'YUcy' => 'Ю', + 'yucy' => 'ю', + 'Yuml' => 'Ÿ', + 'yuml' => 'ÿ', + 'yum' => 'ÿ', + 'Zacute' => 'Ź', + 'zacute' => 'ź', + 'Zcaron' => 'Ž', + 'zcaron' => 'ž', + 'Zcy' => 'З', + 'zcy' => 'з', + 'Zdot' => 'Ż', + 'zdot' => 'ż', + 'zeetrf' => 'ℨ', + 'ZeroWidthSpace' => '​', + 'Zeta' => 'Ζ', + 'zeta' => 'ζ', + 'Zfr' => 'ℨ', + 'zfr' => '𝔷', + 'ZHcy' => 'Ж', + 'zhcy' => 'ж', + 'zigrarr' => '⇝', + 'Zopf' => 'ℤ', + 'zopf' => '𝕫', + 'Zscr' => '𝒵', + 'zscr' => '𝓏', + 'zwj' => '‍', + 'zwnj' => '‌' + ); +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Exception.php b/core/vendor/masterminds/html5/src/HTML5/Exception.php new file mode 100644 index 0000000..8f33126 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Exception.php @@ -0,0 +1,9 @@ + self::NAMESPACE_HTML, + 'svg' => self::NAMESPACE_SVG, + 'math' => self::NAMESPACE_MATHML + ); + + /** + * Holds the always available namespaces (which does not require the XMLNS declaration). + * + * @var array + */ + protected $implicitNamespaces = array( + 'xml' => self::NAMESPACE_XML, + 'xmlns' => self::NAMESPACE_XMLNS, + 'xlink' => self::NAMESPACE_XLINK + ); + + /** + * Holds a stack of currently active namespaces. + * + * @var array + */ + protected $nsStack = array(); + + /** + * Holds the number of namespaces declared by a node. + * + * @var array + */ + protected $pushes = array(); + + /** + * Defined in 8.2.5. + */ + const IM_INITIAL = 0; + + const IM_BEFORE_HTML = 1; + + const IM_BEFORE_HEAD = 2; + + const IM_IN_HEAD = 3; + + const IM_IN_HEAD_NOSCRIPT = 4; + + const IM_AFTER_HEAD = 5; + + const IM_IN_BODY = 6; + + const IM_TEXT = 7; + + const IM_IN_TABLE = 8; + + const IM_IN_TABLE_TEXT = 9; + + const IM_IN_CAPTION = 10; + + const IM_IN_COLUMN_GROUP = 11; + + const IM_IN_TABLE_BODY = 12; + + const IM_IN_ROW = 13; + + const IM_IN_CELL = 14; + + const IM_IN_SELECT = 15; + + const IM_IN_SELECT_IN_TABLE = 16; + + const IM_AFTER_BODY = 17; + + const IM_IN_FRAMESET = 18; + + const IM_AFTER_FRAMESET = 19; + + const IM_AFTER_AFTER_BODY = 20; + + const IM_AFTER_AFTER_FRAMESET = 21; + + const IM_IN_SVG = 22; + + const IM_IN_MATHML = 23; + + protected $options = array(); + + protected $stack = array(); + + protected $current; // Pointer in the tag hierarchy. + protected $doc; + + protected $frag; + + protected $processor; + + protected $insertMode = 0; + + /** + * Track if we are in an element that allows only inline child nodes + * @var string|null + */ + protected $onlyInline; + + /** + * Quirks mode is enabled by default. + * Any document that is missing the + * DT will be considered to be in quirks mode. + */ + protected $quirks = true; + + protected $errors = array(); + + public function __construct($isFragment = false, array $options = array()) + { + $this->options = $options; + + if (isset($options[self::OPT_TARGET_DOC])) { + $this->doc = $options[self::OPT_TARGET_DOC]; + } else { + $impl = new \DOMImplementation(); + // XXX: + // Create the doctype. For now, we are always creating HTML5 + // documents, and attempting to up-convert any older DTDs to HTML5. + $dt = $impl->createDocumentType('html'); + // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt); + $this->doc = $impl->createDocument(null, null, $dt); + } + $this->errors = array(); + + $this->current = $this->doc; // ->documentElement; + + // Create a rules engine for tags. + $this->rules = new TreeBuildingRules($this->doc); + + $implicitNS = array(); + if (isset($this->options[self::OPT_IMPLICIT_NS])) { + $implicitNS = $this->options[self::OPT_IMPLICIT_NS]; + } elseif (isset($this->options["implicitNamespaces"])) { + $implicitNS = $this->options["implicitNamespaces"]; + } + + // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options + array_unshift($this->nsStack, $implicitNS + array( + '' => self::NAMESPACE_HTML + ) + $this->implicitNamespaces); + + if ($isFragment) { + $this->insertMode = static::IM_IN_BODY; + $this->frag = $this->doc->createDocumentFragment(); + $this->current = $this->frag; + } + } + + /** + * Get the document. + */ + public function document() + { + return $this->doc; + } + + /** + * Get the DOM fragment for the body. + * + * This returns a DOMNodeList because a fragment may have zero or more + * DOMNodes at its root. + * + * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context + * + * @return \DOMFragmentDocumentFragment + */ + public function fragment() + { + return $this->frag; + } + + /** + * Provide an instruction processor. + * + * This is used for handling Processor Instructions as they are + * inserted. If omitted, PI's are inserted directly into the DOM tree. + */ + public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc) + { + $this->processor = $proc; + } + + public function doctype($name, $idType = 0, $id = null, $quirks = false) + { + // This is used solely for setting quirks mode. Currently we don't + // try to preserve the inbound DT. We convert it to HTML5. + $this->quirks = $quirks; + + if ($this->insertMode > static::IM_INITIAL) { + $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name); + + return; + } + + $this->insertMode = static::IM_BEFORE_HTML; + } + + /** + * Process the start tag. + * + * @todo - XMLNS namespace handling (we need to parse, even if it's not valid) + * - XLink, MathML and SVG namespace handling + * - Omission rules: 8.1.2.4 Optional tags + */ + public function startTag($name, $attributes = array(), $selfClosing = false) + { + // fprintf(STDOUT, $name); + $lname = $this->normalizeTagName($name); + + // Make sure we have an html element. + if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) { + $this->startTag('html'); + } + + // Set quirks mode if we're at IM_INITIAL with no doctype. + if ($this->insertMode == static::IM_INITIAL) { + $this->quirks = true; + $this->parseError("No DOCTYPE specified."); + } + + // SPECIAL TAG HANDLING: + // Spec says do this, and "don't ask." + if ($name == 'image') { + $name = 'img'; + } + + // Autoclose p tags where appropriate. + if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) { + $this->autoclose('p'); + } + + // Set insert mode: + switch ($name) { + case 'html': + $this->insertMode = static::IM_BEFORE_HEAD; + break; + case 'head': + if ($this->insertMode > static::IM_BEFORE_HEAD) { + $this->parseError("Unexpected head tag outside of head context."); + } else { + $this->insertMode = static::IM_IN_HEAD; + } + break; + case 'body': + $this->insertMode = static::IM_IN_BODY; + break; + case 'svg': + $this->insertMode = static::IM_IN_SVG; + break; + case 'math': + $this->insertMode = static::IM_IN_MATHML; + break; + case 'noscript': + if ($this->insertMode == static::IM_IN_HEAD) { + $this->insertMode = static::IM_IN_HEAD_NOSCRIPT; + } + break; + } + + // Special case handling for SVG. + if ($this->insertMode == static::IM_IN_SVG) { + $lname = Elements::normalizeSvgElement($lname); + } + + $pushes = 0; + // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace + if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) { + array_unshift($this->nsStack, array( + '' => $this->nsRoots[$lname] + ) + $this->nsStack[0]); + $pushes ++; + } + $needsWorkaround = false; + if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) { + // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack + foreach ($attributes as $aName => $aVal) { + if ($aName === 'xmlns') { + $needsWorkaround = $aVal; + array_unshift($this->nsStack, array( + '' => $aVal + ) + $this->nsStack[0]); + $pushes ++; + } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') { + array_unshift($this->nsStack, array( + substr($aName, $pos + 1) => $aVal + ) + $this->nsStack[0]); + $pushes ++; + } + } + } + + if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) { + $this->autoclose($this->onlyInline); + $this->onlyInline = null; + } + + try { + $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : ''; + + + if ($needsWorkaround!==false) { + + $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>"; + + $frag = new \DOMDocument('1.0', 'UTF-8'); + $frag->loadXML($xml); + + $ele = $this->doc->importNode($frag->documentElement, true); + + } else { + if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) { + $ele = $this->doc->createElement($lname); + } else { + $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); + } + } + + } catch (\DOMException $e) { + $this->parseError("Illegal tag name: <$lname>. Replaced with ."); + $ele = $this->doc->createElement('invalid'); + } + + if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) { + $this->onlyInline = $lname; + } + + // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them. + // When we are on a void tag, we do not need to care about namesapce nesting. + if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) { + // PHP tends to free the memory used by DOM, + // to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes + // see https://bugs.php.net/bug.php?id=67459 + $this->pushes[spl_object_hash($ele)] = array($pushes, $ele); + + // SEE https://github.com/facebook/hhvm/issues/2962 + if (defined('HHVM_VERSION')) { + $ele->setAttribute('html5-php-fake-id-attribute', spl_object_hash($ele)); + } + } + + foreach ($attributes as $aName => $aVal) { + // xmlns attributes can't be set + if ($aName === 'xmlns') { + continue; + } + + if ($this->insertMode == static::IM_IN_SVG) { + $aName = Elements::normalizeSvgAttribute($aName); + } elseif ($this->insertMode == static::IM_IN_MATHML) { + $aName = Elements::normalizeMathMlAttribute($aName); + } + + try { + $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false; + + if ($prefix==='xmlns') { + $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal); + } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) { + $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal); + } else { + $ele->setAttribute($aName, $aVal); + } + } catch (\DOMException $e) { + $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName"); + continue; + } + + // This is necessary on a non-DTD schema, like HTML5. + if ($aName == 'id') { + $ele->setIdAttribute('id', true); + } + } + + // Some elements have special processing rules. Handle those separately. + if ($this->rules->hasRules($name) && $this->frag !== $this->current) { + $this->current = $this->rules->evaluate($ele, $this->current); + } // Otherwise, it's a standard element. + else { + $this->current->appendChild($ele); + + // XXX: Need to handle self-closing tags and unary tags. + if (! Elements::isA($name, Elements::VOID_TAG)) { + $this->current = $ele; + } + } + + // This is sort of a last-ditch attempt to correct for cases where no head/body + // elements are provided. + if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') { + $this->insertMode = static::IM_IN_BODY; + } + + // When we are on a void tag, we do not need to care about namesapce nesting, + // but we have to remove the namespaces pushed to $nsStack. + if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) { + // remove the namespaced definded by current node + for ($i = 0; $i < $pushes; $i ++) { + array_shift($this->nsStack); + } + } + // Return the element mask, which the tokenizer can then use to set + // various processing rules. + return Elements::element($name); + } + + public function endTag($name) + { + $lname = $this->normalizeTagName($name); + + // Ignore closing tags for unary elements. + if (Elements::isA($name, Elements::VOID_TAG)) { + return; + } + + if ($this->insertMode <= static::IM_BEFORE_HTML) { + // 8.2.5.4.2 + if (in_array($name, array( + 'html', + 'br', + 'head', + 'title' + ))) { + $this->startTag('html'); + $this->endTag($name); + $this->insertMode = static::IM_BEFORE_HEAD; + + return; + } + + // Ignore the tag. + $this->parseError("Illegal closing tag at global scope."); + + return; + } + + // Special case handling for SVG. + if ($this->insertMode == static::IM_IN_SVG) { + $lname = Elements::normalizeSvgElement($lname); + } + + // See https://github.com/facebook/hhvm/issues/2962 + if (defined('HHVM_VERSION') && ($cid = $this->current->getAttribute('html5-php-fake-id-attribute'))) { + $this->current->removeAttribute('html5-php-fake-id-attribute'); + } else { + $cid = spl_object_hash($this->current); + } + + // XXX: Not sure whether we need this anymore. + // if ($name != $lname) { + // return $this->quirksTreeResolver($lname); + // } + + // XXX: HTML has no parent. What do we do, though, + // if this element appears in the wrong place? + if ($lname == 'html') { + return; + } + + // remove the namespaced definded by current node + if (isset($this->pushes[$cid])) { + for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) { + array_shift($this->nsStack); + } + unset($this->pushes[$cid]); + } + + if (! $this->autoclose($lname)) { + $this->parseError('Could not find closing tag for ' . $lname); + } + + // switch ($this->insertMode) { + switch ($lname) { + case "head": + $this->insertMode = static::IM_AFTER_HEAD; + break; + case "body": + $this->insertMode = static::IM_AFTER_BODY; + break; + case "svg": + case "mathml": + $this->insertMode = static::IM_IN_BODY; + break; + } + } + + public function comment($cdata) + { + // TODO: Need to handle case where comment appears outside of the HTML tag. + $node = $this->doc->createComment($cdata); + $this->current->appendChild($node); + } + + public function text($data) + { + // XXX: Hmmm.... should we really be this strict? + if ($this->insertMode < static::IM_IN_HEAD) { + // Per '8.2.5.4.3 The "before head" insertion mode' the characters + // " \t\n\r\f" should be ignored but no mention of a parse error. This is + // practical as most documents contain these characters. Other text is not + // expected here so recording a parse error is necessary. + $dataTmp = trim($data, " \t\n\r\f"); + if (! empty($dataTmp)) { + // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); + $this->parseError("Unexpected text. Ignoring: " . $dataTmp); + } + + return; + } + // fprintf(STDOUT, "Appending text %s.", $data); + $node = $this->doc->createTextNode($data); + $this->current->appendChild($node); + } + + public function eof() + { + // If the $current isn't the $root, do we need to do anything? + } + + public function parseError($msg, $line = 0, $col = 0) + { + $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); + } + + public function getErrors() + { + return $this->errors; + } + + public function cdata($data) + { + $node = $this->doc->createCDATASection($data); + $this->current->appendChild($node); + } + + public function processingInstruction($name, $data = null) + { + // XXX: Ignore initial XML declaration, per the spec. + if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) { + return; + } + + // Important: The processor may modify the current DOM tree however + // it sees fit. + if (isset($this->processor)) { + $res = $this->processor->process($this->current, $name, $data); + if (! empty($res)) { + $this->current = $res; + } + + return; + } + + // Otherwise, this is just a dumb PI element. + $node = $this->doc->createProcessingInstruction($name, $data); + + $this->current->appendChild($node); + } + + // ========================================================================== + // UTILITIES + // ========================================================================== + + /** + * Apply normalization rules to a tag name. + * + * See sections 2.9 and 8.1.2. + * + * @param string $name + * The tag name. + * @return string The normalized tag name. + */ + protected function normalizeTagName($name) + { + /* + * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); } + */ + return $name; + } + + protected function quirksTreeResolver($name) + { + throw new \Exception("Not implemented."); + } + + /** + * Automatically climb the tree and close the closest node with the matching $tag. + */ + protected function autoclose($tag) + { + $working = $this->current; + do { + if ($working->nodeType != XML_ELEMENT_NODE) { + return false; + } + if ($working->tagName == $tag) { + $this->current = $working->parentNode; + + return true; + } + } while ($working = $working->parentNode); + return false; + } + + /** + * Checks if the given tagname is an ancestor of the present candidate. + * + * If $this->current or anything above $this->current matches the given tag + * name, this returns true. + */ + protected function isAncestor($tagname) + { + $candidate = $this->current; + while ($candidate->nodeType === XML_ELEMENT_NODE) { + if ($candidate->tagName == $tagname) { + return true; + } + $candidate = $candidate->parentNode; + } + + return false; + } + + /** + * Returns true if the immediate parent element is of the given tagname. + */ + protected function isParent($tagname) + { + return $this->current->tagName == $tagname; + } +} \ No newline at end of file diff --git a/core/vendor/masterminds/html5/src/HTML5/Parser/EventHandler.php b/core/vendor/masterminds/html5/src/HTML5/Parser/EventHandler.php new file mode 100644 index 0000000..2d55347 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Parser/EventHandler.php @@ -0,0 +1,122 @@ +) + * @return numeric One of the Tokenizer::TEXTMODE_* constants. + */ + public function startTag($name, $attributes = array(), $selfClosing = false); + + /** + * An end-tag. + */ + public function endTag($name); + + /** + * A comment section (unparsed character data). + */ + public function comment($cdata); + + /** + * A unit of parsed character data. + * + * Entities in this text are *already decoded*. + */ + public function text($cdata); + + /** + * Indicates that the document has been entirely processed. + */ + public function eof(); + + /** + * Emitted when the parser encounters an error condition. + */ + public function parseError($msg, $line, $col); + + /** + * A CDATA section. + * + * @param string $data + * The unparsed character data. + */ + public function cdata($data); + + /** + * This is a holdover from the XML spec. + * + * While user agents don't get PIs, server-side does. + * + * @param string $name + * The name of the processor (e.g. 'php'). + * @param string $data + * The unparsed data. + */ + public function processingInstruction($name, $data = null); +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Parser/FileInputStream.php b/core/vendor/masterminds/html5/src/HTML5/Parser/FileInputStream.php new file mode 100644 index 0000000..e58006a --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Parser/FileInputStream.php @@ -0,0 +1,32 @@ +is = $input; + } + + /** + * Get the current position. + * + * @return int The current intiger byte position. + */ + public function position() + { + return $this->is->key(); + } + + /** + * Take a peek at the next character in the data. + * + * @return string The next character. + */ + public function peek() + { + return $this->is->peek(); + } + + /** + * Get the next character. + * + * Note: This advances the pointer. + * + * @return string The next character. + */ + public function next() + { + $this->is->next(); + if ($this->is->valid()) { + if ($this->debug) + fprintf(STDOUT, "> %s\n", $this->is->current()); + return $this->is->current(); + } + + return false; + } + + /** + * Get the current character. + * + * Note, this does not advance the pointer. + * + * @return string The current character. + */ + public function current() + { + if ($this->is->valid()) { + return $this->is->current(); + } + + return false; + } + + /** + * Silently consume N chars. + */ + public function consume($count = 1) + { + for ($i = 0; $i < $count; ++ $i) { + $this->next(); + } + } + + /** + * Unconsume some of the data. + * This moves the data pointer backwards. + * + * @param int $howMany + * The number of characters to move the pointer back. + */ + public function unconsume($howMany = 1) + { + $this->is->unconsume($howMany); + } + + /** + * Get the next group of that contains hex characters. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string The next group that is hex characters. + */ + public function getHex() + { + return $this->is->charsWhile(static::CHARS_HEX); + } + + /** + * Get the next group of characters that are ASCII Alpha characters. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string The next group of ASCII alpha characters. + */ + public function getAsciiAlpha() + { + return $this->is->charsWhile(static::CHARS_ALPHA); + } + + /** + * Get the next group of characters that are ASCII Alpha characters and numbers. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string The next group of ASCII alpha characters and numbers. + */ + public function getAsciiAlphaNum() + { + return $this->is->charsWhile(static::CHARS_ALNUM); + } + + /** + * Get the next group of numbers. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string The next group of numbers. + */ + public function getNumeric() + { + return $this->is->charsWhile('0123456789'); + } + + /** + * Consume whitespace. + * + * Whitespace in HTML5 is: formfeed, tab, newline, space. + */ + public function whitespace() + { + return $this->is->charsWhile("\n\t\f "); + } + + /** + * Returns the current line that is being consumed. + * + * @return int The current line number. + */ + public function currentLine() + { + return $this->is->currentLine(); + } + + /** + * Read chars until something in the mask is encountered. + */ + public function charsUntil($mask) + { + return $this->is->charsUntil($mask); + } + + /** + * Read chars as long as the mask matches. + */ + public function charsWhile($mask) + { + return $this->is->charsWhile($mask); + } + + /** + * Returns the current column of the current line that the tokenizer is at. + * + * Newlines are column 0. The first char after a newline is column 1. + * + * @return int The column number. + */ + public function columnOffset() + { + return $this->is->columnOffset(); + } + + /** + * Get all characters until EOF. + * + * This consumes characters until the EOF. + * + * @return int The number of characters remaining. + */ + public function remainingChars() + { + return $this->is->remainingChars(); + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Parser/StringInputStream.php b/core/vendor/masterminds/html5/src/HTML5/Parser/StringInputStream.php new file mode 100644 index 0000000..4cac3c2 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Parser/StringInputStream.php @@ -0,0 +1,331 @@ + + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ + +// Some conventions: +// - /* */ indicates verbatim text from the HTML 5 specification +// MPB: Not sure which version of the spec. Moving from HTML5lib to +// HTML5-PHP, I have been using this version: +// http://www.w3.org/TR/2012/CR-html5-20121217/Overview.html#contents +// +// - // indicates regular comments + +class StringInputStream implements InputStream +{ + + /** + * The string data we're parsing. + */ + private $data; + + /** + * The current integer byte position we are in $data + */ + private $char; + + /** + * Length of $data; when $char === $data, we are at the end-of-file. + */ + private $EOF; + + /** + * Parse errors. + */ + public $errors = array(); + + /** + * Create a new InputStream wrapper. + * + * @param $data Data + * to parse + */ + public function __construct($data, $encoding = 'UTF-8', $debug = '') + { + $data = UTF8Utils::convertToUTF8($data, $encoding); + if ($debug) + fprintf(STDOUT, $debug, $data, strlen($data)); + + // There is good reason to question whether it makes sense to + // do this here, since most of these checks are done during + // parsing, and since this check doesn't actually *do* anything. + $this->errors = UTF8Utils::checkForIllegalCodepoints($data); + // if (!empty($e)) { + // throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e)); + // } + + $data = $this->replaceLinefeeds($data); + + $this->data = $data; + $this->char = 0; + $this->EOF = strlen($data); + } + + /** + * Replace linefeed characters according to the spec. + */ + protected function replaceLinefeeds($data) + { + /* + * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially. Any CR characters that are followed by LF characters must be removed, and any CR characters not followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are represented by LF characters, and there are never any CR characters in the input to the tokenization stage. + */ + $crlfTable = array( + "\0" => "\xEF\xBF\xBD", + "\r\n" => "\n", + "\r" => "\n" + ); + + return strtr($data, $crlfTable); + } + + /** + * Returns the current line that the tokenizer is at. + */ + public function currentLine() + { + if (empty($this->EOF) || $this->char == 0) { + return 1; + } + // Add one to $this->char because we want the number for the next + // byte to be processed. + return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1; + } + + /** + * + * @deprecated + * + */ + public function getCurrentLine() + { + return currentLine(); + } + + /** + * Returns the current column of the current line that the tokenizer is at. + * + * Newlines are column 0. The first char after a newline is column 1. + * + * @return int The column number. + */ + public function columnOffset() + { + // Short circuit for the first char. + if ($this->char == 0) { + return 0; + } + // strrpos is weird, and the offset needs to be negative for what we + // want (i.e., the last \n before $this->char). This needs to not have + // one (to make it point to the next character, the one we want the + // position of) added to it because strrpos's behaviour includes the + // final offset byte. + $backwardFrom = $this->char - 1 - strlen($this->data); + $lastLine = strrpos($this->data, "\n", $backwardFrom); + + // However, for here we want the length up until the next byte to be + // processed, so add one to the current byte ($this->char). + if ($lastLine !== false) { + $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine); + } else { + // After a newline. + $findLengthOf = substr($this->data, 0, $this->char); + } + + return UTF8Utils::countChars($findLengthOf); + } + + /** + * + * @deprecated + * + */ + public function getColumnOffset() + { + return $this->columnOffset(); + } + + /** + * Get the current character. + * + * @return string The current character. + */ + public function current() + { + return $this->data[$this->char]; + } + + /** + * Advance the pointer. + * This is part of the Iterator interface. + */ + public function next() + { + $this->char ++; + } + + /** + * Rewind to the start of the string. + */ + public function rewind() + { + $this->char = 0; + } + + /** + * Is the current pointer location valid. + * + * @return bool Is the current pointer location valid. + */ + public function valid() + { + if ($this->char < $this->EOF) { + return true; + } + + return false; + } + + /** + * Get all characters until EOF. + * + * This reads to the end of the file, and sets the read marker at the + * end of the file. + * + * @note This performs bounds checking + * + * @return string Returns the remaining text. If called when the InputStream is + * already exhausted, it returns an empty string. + */ + public function remainingChars() + { + if ($this->char < $this->EOF) { + $data = substr($this->data, $this->char); + $this->char = $this->EOF; + + return $data; + } + + return ''; // false; + } + + /** + * Read to a particular match (or until $max bytes are consumed). + * + * This operates on byte sequences, not characters. + * + * Matches as far as possible until we reach a certain set of bytes + * and returns the matched substring. + * + * @param string $bytes + * Bytes to match. + * @param int $max + * Maximum number of bytes to scan. + * @return mixed Index or false if no match is found. You should use strong + * equality when checking the result, since index could be 0. + */ + public function charsUntil($bytes, $max = null) + { + if ($this->char >= $this->EOF) { + return false; + } + + if ($max === 0 || $max) { + $len = strcspn($this->data, $bytes, $this->char, $max); + } else { + $len = strcspn($this->data, $bytes, $this->char); + } + + $string = (string) substr($this->data, $this->char, $len); + $this->char += $len; + + return $string; + } + + /** + * Returns the string so long as $bytes matches. + * + * Matches as far as possible with a certain set of bytes + * and returns the matched substring. + * + * @param string $bytes + * A mask of bytes to match. If ANY byte in this mask matches the + * current char, the pointer advances and the char is part of the + * substring. + * @param int $max + * The max number of chars to read. + */ + public function charsWhile($bytes, $max = null) + { + if ($this->char >= $this->EOF) { + return false; + } + + if ($max === 0 || $max) { + $len = strspn($this->data, $bytes, $this->char, $max); + } else { + $len = strspn($this->data, $bytes, $this->char); + } + $string = (string) substr($this->data, $this->char, $len); + $this->char += $len; + + return $string; + } + + /** + * Unconsume characters. + * + * @param int $howMany + * The number of characters to unconsume. + */ + public function unconsume($howMany = 1) + { + if (($this->char - $howMany) >= 0) { + $this->char = $this->char - $howMany; + } + } + + /** + * Look ahead without moving cursor. + */ + public function peek() + { + if (($this->char + 1) <= $this->EOF) { + return $this->data[$this->char + 1]; + } + + return false; + } + + public function key() + { + return $this->char; + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php b/core/vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php new file mode 100644 index 0000000..e00b9a2 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php @@ -0,0 +1,1092 @@ +scanner = $scanner; + $this->events = $eventHandler; + } + + /** + * Begin parsing. + * + * This will begin scanning the document, tokenizing as it goes. + * Tokens are emitted into the event handler. + * + * Tokenizing will continue until the document is completely + * read. Errors are emitted into the event handler, but + * the parser will attempt to continue parsing until the + * entire input stream is read. + */ + public function parse() + { + $p = 0; + do { + $p = $this->scanner->position(); + $this->consumeData(); + + // FIXME: Add infinite loop protection. + } while ($this->carryOn); + } + + /** + * Set the text mode for the character data reader. + * + * HTML5 defines three different modes for reading text: + * - Normal: Read until a tag is encountered. + * - RCDATA: Read until a tag is encountered, but skip a few otherwise- + * special characters. + * - Raw: Read until a special closing tag is encountered (viz. pre, script) + * + * This allows those modes to be set. + * + * Normally, setting is done by the event handler via a special return code on + * startTag(), but it can also be set manually using this function. + * + * @param integer $textmode + * One of Elements::TEXT_* + * @param string $untilTag + * The tag that should stop RAW or RCDATA mode. Normal mode does not + * use this indicator. + */ + public function setTextMode($textmode, $untilTag = null) + { + $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA); + $this->untilTag = $untilTag; + } + + /** + * Consume a character and make a move. + * HTML5 8.2.4.1 + */ + protected function consumeData() + { + // Character Ref + /* + * $this->characterReference() || $this->tagOpen() || $this->eof() || $this->characterData(); + */ + $this->characterReference(); + $this->tagOpen(); + $this->eof(); + $this->characterData(); + + return $this->carryOn; + } + + /** + * Parse anything that looks like character data. + * + * Different rules apply based on the current text mode. + * + * @see Elements::TEXT_RAW Elements::TEXT_RCDATA. + */ + protected function characterData() + { + if ($this->scanner->current() === false) { + return false; + } + switch ($this->textMode) { + case Elements::TEXT_RAW: + return $this->rawText(); + case Elements::TEXT_RCDATA: + return $this->rcdata(); + default: + $tok = $this->scanner->current(); + if (strspn($tok, "<&")) { + return false; + } + return $this->text(); + } + } + + /** + * This buffers the current token as character data. + */ + protected function text() + { + $tok = $this->scanner->current(); + + // This should never happen... + if ($tok === false) { + return false; + } + // Null + if ($tok === "\00") { + $this->parseError("Received null character."); + } + // fprintf(STDOUT, "Writing '%s'", $tok); + $this->buffer($tok); + $this->scanner->next(); + return true; + } + + /** + * Read text in RAW mode. + */ + protected function rawText() + { + if (is_null($this->untilTag)) { + return $this->text(); + } + $sequence = 'untilTag . '>'; + $txt = $this->readUntilSequence($sequence); + $this->events->text($txt); + $this->setTextMode(0); + return $this->endTag(); + } + + /** + * Read text in RCDATA mode. + */ + protected function rcdata() + { + if (is_null($this->untilTag)) { + return $this->text(); + } + $sequence = 'untilTag; + $txt = ''; + $tok = $this->scanner->current(); + + $caseSensitive = !Elements::isHtml5Element($this->untilTag); + while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence, $caseSensitive)))) { + if ($tok == '&') { + $txt .= $this->decodeCharacterReference(); + $tok = $this->scanner->current(); + } else { + $txt .= $tok; + $tok = $this->scanner->next(); + } + } + $len = strlen($sequence); + $this->scanner->consume($len); + $len += strlen($this->scanner->whitespace()); + if ($this->scanner->current() !== '>') { + $this->parseError("Unclosed RCDATA end tag"); + } + $this->scanner->unconsume($len); + $this->events->text($txt); + $this->setTextMode(0); + return $this->endTag(); + } + + /** + * If the document is read, emit an EOF event. + */ + protected function eof() + { + if ($this->scanner->current() === false) { + // fprintf(STDOUT, "EOF"); + $this->flushBuffer(); + $this->events->eof(); + $this->carryOn = false; + return true; + } + return false; + } + + /** + * Handle character references (aka entities). + * + * This version is specific to PCDATA, as it buffers data into the + * text buffer. For a generic version, see decodeCharacterReference(). + * + * HTML5 8.2.4.2 + */ + protected function characterReference() + { + $ref = $this->decodeCharacterReference(); + if ($ref !== false) { + $this->buffer($ref); + return true; + } + return false; + } + + /** + * Emit a tagStart event on encountering a tag. + * + * 8.2.4.8 + */ + protected function tagOpen() + { + if ($this->scanner->current() != '<') { + return false; + } + + // Any buffered text data can go out now. + $this->flushBuffer(); + + $this->scanner->next(); + + return $this->markupDeclaration() || $this->endTag() || $this->processingInstruction() || $this->tagName() || + /* This always returns false. */ + $this->parseError("Illegal tag opening") || $this->characterData(); + } + + /** + * Look for markup. + */ + protected function markupDeclaration() + { + if ($this->scanner->current() != '!') { + return false; + } + + $tok = $this->scanner->next(); + + // Comment: + if ($tok == '-' && $this->scanner->peek() == '-') { + $this->scanner->next(); // Consume the other '-' + $this->scanner->next(); // Next char. + return $this->comment(); + } + + elseif ($tok == 'D' || $tok == 'd') { // Doctype + return $this->doctype(''); + } + + elseif ($tok == '[') { // CDATA section + return $this->cdataSection(); + } + + // FINISH + $this->parseError("Expected . Emit an empty comment because 8.2.4.46 says to. + if ($tok == '>') { + // Parse error. Emit the comment token. + $this->parseError("Expected comment data, got '>'"); + $this->events->comment(''); + $this->scanner->next(); + return true; + } + + // Replace NULL with the replacement char. + if ($tok == "\0") { + $tok = UTF8Utils::FFFD; + } + while (! $this->isCommentEnd()) { + $comment .= $tok; + $tok = $this->scanner->next(); + } + + $this->events->comment($comment); + $this->scanner->next(); + return true; + } + + /** + * Check if the scanner has reached the end of a comment. + */ + protected function isCommentEnd() + { + // EOF + if ($this->scanner->current() === false) { + // Hit the end. + $this->parseError("Unexpected EOF in a comment."); + return true; + } + + // If it doesn't start with -, not the end. + if ($this->scanner->current() != '-') { + return false; + } + + // Advance one, and test for '->' + if ($this->scanner->next() == '-' && $this->scanner->peek() == '>') { + $this->scanner->next(); // Consume the last '>' + return true; + } + // Unread '-'; + $this->scanner->unconsume(1); + return false; + } + + /** + * Parse a DOCTYPE. + * + * Parse a DOCTYPE declaration. This method has strong bearing on whether or + * not Quirksmode is enabled on the event handler. + * + * @todo This method is a little long. Should probably refactor. + */ + protected function doctype() + { + if (strcasecmp($this->scanner->current(), 'D')) { + return false; + } + // Check that string is DOCTYPE. + $chars = $this->scanner->charsWhile("DOCTYPEdoctype"); + if (strcasecmp($chars, 'DOCTYPE')) { + $this->parseError('Expected DOCTYPE, got %s', $chars); + return $this->bogusComment('scanner->whitespace(); + $tok = $this->scanner->current(); + + // EOF: die. + if ($tok === false) { + $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true); + return $this->eof(); + } + + $doctypeName = ''; + + // NULL char: convert. + if ($tok === "\0") { + $this->parseError("Unexpected null character in DOCTYPE."); + $doctypeName .= UTF8::FFFD; + $tok = $this->scanner->next(); + } + + $stop = " \n\f>"; + $doctypeName = $this->scanner->charsUntil($stop); + // Lowercase ASCII, replace \0 with FFFD + $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD)); + + $tok = $this->scanner->current(); + + // If false, emit a parse error, DOCTYPE, and return. + if ($tok === false) { + $this->parseError('Unexpected EOF in DOCTYPE declaration.'); + $this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, null, true); + return true; + } + + // Short DOCTYPE, like + if ($tok == '>') { + // DOCTYPE without a name. + if (strlen($doctypeName) == 0) { + $this->parseError("Expected a DOCTYPE name. Got nothing."); + $this->events->doctype($doctypeName, 0, null, true); + $this->scanner->next(); + return true; + } + $this->events->doctype($doctypeName); + $this->scanner->next(); + return true; + } + $this->scanner->whitespace(); + + $pub = strtoupper($this->scanner->getAsciiAlpha()); + $white = strlen($this->scanner->whitespace()); + $tok = $this->scanner->current(); + + // Get ID, and flag it as pub or system. + if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) { + // Get the sys ID. + $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM; + $id = $this->quotedString("\0>"); + if ($id === false) { + $this->events->doctype($doctypeName, $type, $pub, false); + return false; + } + + // Premature EOF. + if ($this->scanner->current() === false) { + $this->parseError("Unexpected EOF in DOCTYPE"); + $this->events->doctype($doctypeName, $type, $id, true); + return true; + } + + // Well-formed complete DOCTYPE. + $this->scanner->whitespace(); + if ($this->scanner->current() == '>') { + $this->events->doctype($doctypeName, $type, $id, false); + $this->scanner->next(); + return true; + } + + // If we get here, we have scanner->charsUntil(">"); + $this->parseError("Malformed DOCTYPE."); + $this->events->doctype($doctypeName, $type, $id, true); + $this->scanner->next(); + return true; + } + + // Else it's a bogus DOCTYPE. + // Consume to > and trash. + $this->scanner->charsUntil('>'); + + $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub); + $this->events->doctype($doctypeName, 0, null, true); + $this->scanner->next(); + return true; + } + + /** + * Utility for reading a quoted string. + * + * @param string $stopchars + * Characters (in addition to a close-quote) that should stop the string. + * E.g. sometimes '>' is higher precedence than '"' or "'". + * @return mixed String if one is found (quotations omitted) + */ + protected function quotedString($stopchars) + { + $tok = $this->scanner->current(); + if ($tok == '"' || $tok == "'") { + $this->scanner->next(); + $ret = $this->scanner->charsUntil($tok . $stopchars); + if ($this->scanner->current() == $tok) { + $this->scanner->next(); + } else { + // Parse error because no close quote. + $this->parseError("Expected %s, got %s", $tok, $this->scanner->current()); + } + return $ret; + } + return false; + } + + /** + * Handle a CDATA section. + */ + protected function cdataSection() + { + if ($this->scanner->current() != '[') { + return false; + } + $cdata = ''; + $this->scanner->next(); + + $chars = $this->scanner->charsWhile('CDAT'); + if ($chars != 'CDATA' || $this->scanner->current() != '[') { + $this->parseError('Expected [CDATA[, got %s', $chars); + return $this->bogusComment('scanner->next(); + do { + if ($tok === false) { + $this->parseError('Unexpected EOF inside CDATA.'); + $this->bogusComment('scanner->next(); + } while (! $this->sequenceMatches(']]>')); + + // Consume ]]> + $this->scanner->consume(3); + + $this->events->cdata($cdata); + return true; + } + + // ================================================================ + // Non-HTML5 + // ================================================================ + /** + * Handle a processing instruction. + * + * XML processing instructions are supposed to be ignored in HTML5, + * treated as "bogus comments". However, since we're not a user + * agent, we allow them. We consume until ?> and then issue a + * EventListener::processingInstruction() event. + */ + protected function processingInstruction() + { + if ($this->scanner->current() != '?') { + return false; + } + + $tok = $this->scanner->next(); + $procName = $this->scanner->getAsciiAlpha(); + $white = strlen($this->scanner->whitespace()); + + // If not a PI, send to bogusComment. + if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) { + $this->parseError("Expected processing instruction name, got $tok"); + $this->bogusComment('. + while (! ($this->scanner->current() == '?' && $this->scanner->peek() == '>')) { + $data .= $this->scanner->current(); + + $tok = $this->scanner->next(); + if ($tok === false) { + $this->parseError("Unexpected EOF in processing instruction."); + $this->events->processingInstruction($procName, $data); + return true; + } + } + + $this->scanner->next(); // > + $this->scanner->next(); // Next token. + $this->events->processingInstruction($procName, $data); + return true; + } + + // ================================================================ + // UTILITY FUNCTIONS + // ================================================================ + + /** + * Read from the input stream until we get to the desired sequene + * or hit the end of the input stream. + */ + protected function readUntilSequence($sequence) + { + $buffer = ''; + + // Optimization for reading larger blocks faster. + $first = substr($sequence, 0, 1); + while ($this->scanner->current() !== false) { + $buffer .= $this->scanner->charsUntil($first); + + // Stop as soon as we hit the stopping condition. + if ($this->sequenceMatches($sequence, false)) { + return $buffer; + } + $buffer .= $this->scanner->current(); + $this->scanner->next(); + } + + // If we get here, we hit the EOF. + $this->parseError("Unexpected EOF during text read."); + return $buffer; + } + + /** + * Check if upcomming chars match the given sequence. + * + * This will read the stream for the $sequence. If it's + * found, this will return true. If not, return false. + * Since this unconsumes any chars it reads, the caller + * will still need to read the next sequence, even if + * this returns true. + * + * Example: $this->sequenceMatches('') will + * see if the input stream is at the start of a + * '' string. + */ + protected function sequenceMatches($sequence, $caseSensitive = true) + { + $len = strlen($sequence); + $buffer = ''; + for ($i = 0; $i < $len; ++ $i) { + $buffer .= $this->scanner->current(); + + // EOF. Rewind and let the caller handle it. + if ($this->scanner->current() === false) { + $this->scanner->unconsume($i); + return false; + } + $this->scanner->next(); + } + + $this->scanner->unconsume($len); + return $caseSensitive ? $buffer == $sequence : strcasecmp($buffer, $sequence) === 0; + } + + /** + * Send a TEXT event with the contents of the text buffer. + * + * This emits an EventHandler::text() event with the current contents of the + * temporary text buffer. (The buffer is used to group as much PCDATA + * as we can instead of emitting lots and lots of TEXT events.) + */ + protected function flushBuffer() + { + if ($this->text === '') { + return; + } + $this->events->text($this->text); + $this->text = ''; + } + + /** + * Add text to the temporary buffer. + * + * @see flushBuffer() + */ + protected function buffer($str) + { + $this->text .= $str; + } + + /** + * Emit a parse error. + * + * A parse error always returns false because it never consumes any + * characters. + */ + protected function parseError($msg) + { + $args = func_get_args(); + + if (count($args) > 1) { + array_shift($args); + $msg = vsprintf($msg, $args); + } + + $line = $this->scanner->currentLine(); + $col = $this->scanner->columnOffset(); + $this->events->parseError($msg, $line, $col); + return false; + } + + /** + * Decode a character reference and return the string. + * + * Returns false if the entity could not be found. If $inAttribute is set + * to true, a bare & will be returned as-is. + * + * @param boolean $inAttribute + * Set to true if the text is inside of an attribute value. + * false otherwise. + */ + protected function decodeCharacterReference($inAttribute = false) + { + + // If it fails this, it's definitely not an entity. + if ($this->scanner->current() != '&') { + return false; + } + + // Next char after &. + $tok = $this->scanner->next(); + $entity = ''; + $start = $this->scanner->position(); + + if ($tok == false) { + return '&'; + } + + // These indicate not an entity. We return just + // the &. + if (strspn($tok, static::WHITE . "&<") == 1) { + // $this->scanner->next(); + return '&'; + } + + // Numeric entity + if ($tok == '#') { + $tok = $this->scanner->next(); + + // Hexidecimal encoding. + // X[0-9a-fA-F]+; + // x[0-9a-fA-F]+; + if ($tok == 'x' || $tok == 'X') { + $tok = $this->scanner->next(); // Consume x + + // Convert from hex code to char. + $hex = $this->scanner->getHex(); + if (empty($hex)) { + $this->parseError("Expected &#xHEX;, got &#x%s", $tok); + // We unconsume because we don't know what parser rules might + // be in effect for the remaining chars. For example. '&#>' + // might result in a specific parsing rule inside of tag + // contexts, while not inside of pcdata context. + $this->scanner->unconsume(2); + return '&'; + } + $entity = CharacterReference::lookupHex($hex); + } // Decimal encoding. + // [0-9]+; + else { + // Convert from decimal to char. + $numeric = $this->scanner->getNumeric(); + if ($numeric === false) { + $this->parseError("Expected &#DIGITS;, got &#%s", $tok); + $this->scanner->unconsume(2); + return '&'; + } + $entity = CharacterReference::lookupDecimal($numeric); + } + } // String entity. + else { + // Attempt to consume a string up to a ';'. + // [a-zA-Z0-9]+; + $cname = $this->scanner->getAsciiAlpha(); + $entity = CharacterReference::lookupName($cname); + if ($entity == null) { + $this->parseError("No match in entity table for '%s'", $entity); + } + } + + // The scanner has advanced the cursor for us. + $tok = $this->scanner->current(); + + // We have an entity. We're done here. + if ($tok == ';') { + $this->scanner->next(); + return $entity; + } + + // If in an attribute, then failing to match ; means unconsume the + // entire string. Otherwise, failure to match is an error. + if ($inAttribute) { + $this->scanner->unconsume($this->scanner->position() - $start); + return '&'; + } + + $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok); + return '&' . $entity; + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Parser/TreeBuildingRules.php b/core/vendor/masterminds/html5/src/HTML5/Parser/TreeBuildingRules.php new file mode 100644 index 0000000..2af3c66 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Parser/TreeBuildingRules.php @@ -0,0 +1,140 @@ + 1, + 'dd' => 1, + 'dt' => 1, + 'rt' => 1, + 'rp' => 1, + 'tr' => 1, + 'th' => 1, + 'td' => 1, + 'thead' => 1, + 'tfoot' => 1, + 'tbody' => 1, + 'table' => 1, + 'optgroup' => 1, + 'option' => 1 + ); + + /** + * Build a new rules engine. + * + * @param \DOMDocument $doc + * The DOM document to use for evaluation and modification. + */ + public function __construct($doc) + { + $this->doc = $doc; + } + + /** + * Returns true if the given tagname has special processing rules. + */ + public function hasRules($tagname) + { + return isset(static::$tags[$tagname]); + } + + /** + * Evaluate the rule for the current tag name. + * + * This may modify the existing DOM. + * + * @return \DOMElement The new Current DOM element. + */ + public function evaluate($new, $current) + { + switch ($new->tagName) { + case 'li': + return $this->handleLI($new, $current); + case 'dt': + case 'dd': + return $this->handleDT($new, $current); + case 'rt': + case 'rp': + return $this->handleRT($new, $current); + case 'optgroup': + return $this->closeIfCurrentMatches($new, $current, array( + 'optgroup' + )); + case 'option': + return $this->closeIfCurrentMatches($new, $current, array( + 'option', + 'optgroup' + )); + case 'tr': + return $this->closeIfCurrentMatches($new, $current, array( + 'tr' + )); + case 'td': + case 'th': + return $this->closeIfCurrentMatches($new, $current, array( + 'th', + 'td' + )); + case 'tbody': + case 'thead': + case 'tfoot': + case 'table': // Spec isn't explicit about this, but it's necessary. + + return $this->closeIfCurrentMatches($new, $current, array( + 'thead', + 'tfoot', + 'tbody' + )); + } + + return $current; + } + + protected function handleLI($ele, $current) + { + return $this->closeIfCurrentMatches($ele, $current, array( + 'li' + )); + } + + protected function handleDT($ele, $current) + { + return $this->closeIfCurrentMatches($ele, $current, array( + 'dt', + 'dd' + )); + } + + protected function handleRT($ele, $current) + { + return $this->closeIfCurrentMatches($ele, $current, array( + 'rt', + 'rp' + )); + } + + protected function closeIfCurrentMatches($ele, $current, $match) + { + $tname = $current->tagName; + if (in_array($current->tagName, $match)) { + $current->parentNode->appendChild($ele); + } else { + $current->appendChild($ele); + } + + return $ele; + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Parser/UTF8Utils.php b/core/vendor/masterminds/html5/src/HTML5/Parser/UTF8Utils.php new file mode 100644 index 0000000..d319252 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Parser/UTF8Utils.php @@ -0,0 +1,171 @@ + + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ +/** + * UTF-8 Utilities + */ +class UTF8Utils +{ + + /** + * The Unicode replacement character.. + */ + const FFFD = "\xEF\xBF\xBD"; + + /** + * Count the number of characters in a string. + * + * UTF-8 aware. This will try (in order) iconv, + * MB, libxml, and finally a custom counter. + * + * @todo Move this to a general utility class. + */ + public static function countChars($string) + { + // Get the length for the string we need. + if (function_exists('iconv_strlen')) { + return iconv_strlen($string, 'utf-8'); + } elseif (function_exists('mb_strlen')) { + return mb_strlen($string, 'utf-8'); + } elseif (function_exists('utf8_decode')) { + // MPB: Will this work? Won't certain decodes lead to two chars + // extrapolated out of 2-byte chars? + return strlen(utf8_decode($string)); + } + $count = count_chars($string); + // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range) + // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range) + return array_sum(array_slice($count, 0, 0x80)) + array_sum(array_slice($count, 0xC2, 0x33)); + } + + /** + * Convert data from the given encoding to UTF-8. + * + * This has not yet been tested with charactersets other than UTF-8. + * It should work with ISO-8859-1/-13 and standard Latin Win charsets. + * + * @param string $data + * The data to convert. + * @param string $encoding + * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php + */ + public static function convertToUTF8($data, $encoding = 'UTF-8') + { + /* + * From the HTML5 spec: Given an encoding, the bytes in the input stream must be converted to Unicode characters for the tokeniser, as described by the rules for that encoding, except that the leading U+FEFF BYTE ORDER MARK character, if any, must not be stripped by the encoding layer (it is stripped by the rule below). Bytes or sequences of bytes in the original byte stream that could not be converted to Unicode characters must be converted to U+FFFD REPLACEMENT CHARACTER code points. + */ + + // mb_convert_encoding is chosen over iconv because of a bug. The best + // details for the bug are on http://us1.php.net/manual/en/function.iconv.php#108643 + // which contains links to the actual but reports as well as work around + // details. + if (function_exists('mb_convert_encoding')) { + // mb library has the following behaviors: + // - UTF-16 surrogates result in false. + // - Overlongs and outside Plane 16 result in empty strings. + + // Before we run mb_convert_encoding we need to tell it what to do with + // characters it does not know. This could be different than the parent + // application executing this library so we store the value, change it + // to our needs, and then change it back when we are done. This feels + // a little excessive and it would be great if there was a better way. + $save = ini_get('mbstring.substitute_character'); + ini_set('mbstring.substitute_character', "none"); + $data = mb_convert_encoding($data, 'UTF-8', $encoding); + ini_set('mbstring.substitute_character', $save); + } // @todo Get iconv running in at least some environments if that is possible. + elseif (function_exists('iconv') && $encoding != 'auto') { + // fprintf(STDOUT, "iconv found\n"); + // iconv has the following behaviors: + // - Overlong representations are ignored. + // - Beyond Plane 16 is replaced with a lower char. + // - Incomplete sequences generate a warning. + $data = @iconv($encoding, 'UTF-8//IGNORE', $data); + } else { + // we can make a conforming native implementation + throw new Exception('Not implemented, please install mbstring or iconv'); + } + + /* + * One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present. + */ + if (substr($data, 0, 3) === "\xEF\xBB\xBF") { + $data = substr($data, 3); + } + + return $data; + } + + /** + * Checks for Unicode code points that are not valid in a document. + * + * @param string $data + * A string to analyze. + * @return array An array of (string) error messages produced by the scanning. + */ + public static function checkForIllegalCodepoints($data) + { + if (! function_exists('preg_match_all')) { + throw\Exception('The PCRE library is not loaded or is not available.'); + } + + // Vestigal error handling. + $errors = array(); + + /* + * All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error. + */ + for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) { + $errors[] = 'null-character'; + } + + /* + * Any occurrences of any characters in the ranges U+0001 to U+0008, U+000B, U+000E to U+001F, U+007F to U+009F, U+D800 to U+DFFF , U+FDD0 to U+FDEF, and characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE, U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and U+10FFFF are parse errors. (These are all control characters or permanently undefined Unicode characters.) + */ + // Check PCRE is loaded. + $count = preg_match_all( + '/(?: + [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F + | + \xC2[\x80-\x9F] # U+0080 to U+009F + | + \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF + | + \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF + | + \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF + | + [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16}) + )/x', $data, $matches); + for ($i = 0; $i < $count; $i ++) { + $errors[] = 'invalid-codepoint'; + } + + return $errors; + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Serializer/HTML5Entities.php b/core/vendor/masterminds/html5/src/HTML5/Serializer/HTML5Entities.php new file mode 100644 index 0000000..4f90f84 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Serializer/HTML5Entities.php @@ -0,0 +1,1533 @@ + ' ', + "\n" => ' ', + '!' => '!', + '"' => '"', + '#' => '#', + '$' => '$', + '%' => '%', + '&' => '&', + '\'' => ''', + '(' => '(', + ')' => ')', + '*' => '*', + '+' => '+', + ',' => ',', + '.' => '.', + '/' => '/', + ':' => ':', + ';' => ';', + '<' => '<', + '<⃒' => '&nvlt', + '=' => '=', + '=⃥' => '&bne', + '>' => '>', + '>⃒' => '&nvgt', + '?' => '?', + '@' => '@', + '[' => '[', + '\\' => '\', + ']' => ']', + '^' => '^', + '_' => '_', + '`' => '`', + 'fj' => '&fjlig', + '{' => '{', + '|' => '|', + '}' => '}', + ' ' => ' ', + '¡' => '¡', + '¢' => '¢', + '£' => '£', + '¤' => '¤', + '¥' => '¥', + '¦' => '¦', + '§' => '§', + '¨' => '¨', + '©' => '©', + 'ª' => 'ª', + '«' => '«', + '¬' => '¬', + '­' => '­', + '®' => '®', + '¯' => '¯', + '°' => '°', + '±' => '±', + '²' => '²', + '³' => '³', + '´' => '´', + 'µ' => 'µ', + '¶' => '¶', + '·' => '·', + '¸' => '¸', + '¹' => '¹', + 'º' => 'º', + '»' => '»', + '¼' => '¼', + '½' => '½', + '¾' => '¾', + '¿' => '¿', + 'À' => 'À', + 'Á' => 'Á', + 'Â' => 'Â', + 'Ã' => 'Ã', + 'Ä' => 'Ä', + 'Å' => 'Å', + 'Æ' => 'Æ', + 'Ç' => 'Ç', + 'È' => 'È', + 'É' => 'É', + 'Ê' => 'Ê', + 'Ë' => 'Ë', + 'Ì' => 'Ì', + 'Í' => 'Í', + 'Î' => 'Î', + 'Ï' => 'Ï', + 'Ð' => 'Ð', + 'Ñ' => 'Ñ', + 'Ò' => 'Ò', + 'Ó' => 'Ó', + 'Ô' => 'Ô', + 'Õ' => 'Õ', + 'Ö' => 'Ö', + '×' => '×', + 'Ø' => 'Ø', + 'Ù' => 'Ù', + 'Ú' => 'Ú', + 'Û' => 'Û', + 'Ü' => 'Ü', + 'Ý' => 'Ý', + 'Þ' => 'Þ', + 'ß' => 'ß', + 'à' => 'à', + 'á' => 'á', + 'â' => 'â', + 'ã' => 'ã', + 'ä' => 'ä', + 'å' => 'å', + 'æ' => 'æ', + 'ç' => 'ç', + 'è' => 'è', + 'é' => 'é', + 'ê' => 'ê', + 'ë' => 'ë', + 'ì' => 'ì', + 'í' => 'í', + 'î' => 'î', + 'ï' => 'ï', + 'ð' => 'ð', + 'ñ' => 'ñ', + 'ò' => 'ò', + 'ó' => 'ó', + 'ô' => 'ô', + 'õ' => 'õ', + 'ö' => 'ö', + '÷' => '÷', + 'ø' => 'ø', + 'ù' => 'ù', + 'ú' => 'ú', + 'û' => 'û', + 'ü' => 'ü', + 'ý' => 'ý', + 'þ' => 'þ', + 'ÿ' => 'ÿ', + 'Ā' => 'Ā', + 'ā' => 'ā', + 'Ă' => 'Ă', + 'ă' => 'ă', + 'Ą' => 'Ą', + 'ą' => 'ą', + 'Ć' => 'Ć', + 'ć' => 'ć', + 'Ĉ' => 'Ĉ', + 'ĉ' => 'ĉ', + 'Ċ' => 'Ċ', + 'ċ' => 'ċ', + 'Č' => 'Č', + 'č' => 'č', + 'Ď' => 'Ď', + 'ď' => 'ď', + 'Đ' => 'Đ', + 'đ' => 'đ', + 'Ē' => 'Ē', + 'ē' => 'ē', + 'Ė' => 'Ė', + 'ė' => 'ė', + 'Ę' => 'Ę', + 'ę' => 'ę', + 'Ě' => 'Ě', + 'ě' => 'ě', + 'Ĝ' => 'Ĝ', + 'ĝ' => 'ĝ', + 'Ğ' => 'Ğ', + 'ğ' => 'ğ', + 'Ġ' => 'Ġ', + 'ġ' => 'ġ', + 'Ģ' => 'Ģ', + 'Ĥ' => 'Ĥ', + 'ĥ' => 'ĥ', + 'Ħ' => 'Ħ', + 'ħ' => 'ħ', + 'Ĩ' => 'Ĩ', + 'ĩ' => 'ĩ', + 'Ī' => 'Ī', + 'ī' => 'ī', + 'Į' => 'Į', + 'į' => 'į', + 'İ' => 'İ', + 'ı' => 'ı', + 'IJ' => 'IJ', + 'ij' => 'ij', + 'Ĵ' => 'Ĵ', + 'ĵ' => 'ĵ', + 'Ķ' => 'Ķ', + 'ķ' => 'ķ', + 'ĸ' => 'ĸ', + 'Ĺ' => 'Ĺ', + 'ĺ' => 'ĺ', + 'Ļ' => 'Ļ', + 'ļ' => 'ļ', + 'Ľ' => 'Ľ', + 'ľ' => 'ľ', + 'Ŀ' => 'Ŀ', + 'ŀ' => 'ŀ', + 'Ł' => 'Ł', + 'ł' => 'ł', + 'Ń' => 'Ń', + 'ń' => 'ń', + 'Ņ' => 'Ņ', + 'ņ' => 'ņ', + 'Ň' => 'Ň', + 'ň' => 'ň', + 'ʼn' => 'ʼn', + 'Ŋ' => 'Ŋ', + 'ŋ' => 'ŋ', + 'Ō' => 'Ō', + 'ō' => 'ō', + 'Ő' => 'Ő', + 'ő' => 'ő', + 'Œ' => 'Œ', + 'œ' => 'œ', + 'Ŕ' => 'Ŕ', + 'ŕ' => 'ŕ', + 'Ŗ' => 'Ŗ', + 'ŗ' => 'ŗ', + 'Ř' => 'Ř', + 'ř' => 'ř', + 'Ś' => 'Ś', + 'ś' => 'ś', + 'Ŝ' => 'Ŝ', + 'ŝ' => 'ŝ', + 'Ş' => 'Ş', + 'ş' => 'ş', + 'Š' => 'Š', + 'š' => 'š', + 'Ţ' => 'Ţ', + 'ţ' => 'ţ', + 'Ť' => 'Ť', + 'ť' => 'ť', + 'Ŧ' => 'Ŧ', + 'ŧ' => 'ŧ', + 'Ũ' => 'Ũ', + 'ũ' => 'ũ', + 'Ū' => 'Ū', + 'ū' => 'ū', + 'Ŭ' => 'Ŭ', + 'ŭ' => 'ŭ', + 'Ů' => 'Ů', + 'ů' => 'ů', + 'Ű' => 'Ű', + 'ű' => 'ű', + 'Ų' => 'Ų', + 'ų' => 'ų', + 'Ŵ' => 'Ŵ', + 'ŵ' => 'ŵ', + 'Ŷ' => 'Ŷ', + 'ŷ' => 'ŷ', + 'Ÿ' => 'Ÿ', + 'Ź' => 'Ź', + 'ź' => 'ź', + 'Ż' => 'Ż', + 'ż' => 'ż', + 'Ž' => 'Ž', + 'ž' => 'ž', + 'ƒ' => 'ƒ', + 'Ƶ' => 'Ƶ', + 'ǵ' => 'ǵ', + 'ȷ' => 'ȷ', + 'ˆ' => 'ˆ', + 'ˇ' => 'ˇ', + '˘' => '˘', + '˙' => '˙', + '˚' => '˚', + '˛' => '˛', + '˜' => '˜', + '˝' => '˝', + '̑' => '̑', + 'Α' => 'Α', + 'Β' => 'Β', + 'Γ' => 'Γ', + 'Δ' => 'Δ', + 'Ε' => 'Ε', + 'Ζ' => 'Ζ', + 'Η' => 'Η', + 'Θ' => 'Θ', + 'Ι' => 'Ι', + 'Κ' => 'Κ', + 'Λ' => 'Λ', + 'Μ' => 'Μ', + 'Ν' => 'Ν', + 'Ξ' => 'Ξ', + 'Ο' => 'Ο', + 'Π' => 'Π', + 'Ρ' => 'Ρ', + 'Σ' => 'Σ', + 'Τ' => 'Τ', + 'Υ' => 'Υ', + 'Φ' => 'Φ', + 'Χ' => 'Χ', + 'Ψ' => 'Ψ', + 'Ω' => 'Ω', + 'α' => 'α', + 'β' => 'β', + 'γ' => 'γ', + 'δ' => 'δ', + 'ε' => 'ε', + 'ζ' => 'ζ', + 'η' => 'η', + 'θ' => 'θ', + 'ι' => 'ι', + 'κ' => 'κ', + 'λ' => 'λ', + 'μ' => 'μ', + 'ν' => 'ν', + 'ξ' => 'ξ', + 'ο' => 'ο', + 'π' => 'π', + 'ρ' => 'ρ', + 'ς' => 'ς', + 'σ' => 'σ', + 'τ' => 'τ', + 'υ' => 'υ', + 'φ' => 'φ', + 'χ' => 'χ', + 'ψ' => 'ψ', + 'ω' => 'ω', + 'ϑ' => 'ϑ', + 'ϒ' => 'ϒ', + 'ϕ' => 'ϕ', + 'ϖ' => 'ϖ', + 'Ϝ' => 'Ϝ', + 'ϝ' => 'ϝ', + 'ϰ' => 'ϰ', + 'ϱ' => 'ϱ', + 'ϵ' => 'ϵ', + '϶' => '϶', + 'Ё' => 'Ё', + 'Ђ' => 'Ђ', + 'Ѓ' => 'Ѓ', + 'Є' => 'Є', + 'Ѕ' => 'Ѕ', + 'І' => 'І', + 'Ї' => 'Ї', + 'Ј' => 'Ј', + 'Љ' => 'Љ', + 'Њ' => 'Њ', + 'Ћ' => 'Ћ', + 'Ќ' => 'Ќ', + 'Ў' => 'Ў', + 'Џ' => 'Џ', + 'А' => 'А', + 'Б' => 'Б', + 'В' => 'В', + 'Г' => 'Г', + 'Д' => 'Д', + 'Е' => 'Е', + 'Ж' => 'Ж', + 'З' => 'З', + 'И' => 'И', + 'Й' => 'Й', + 'К' => 'К', + 'Л' => 'Л', + 'М' => 'М', + 'Н' => 'Н', + 'О' => 'О', + 'П' => 'П', + 'Р' => 'Р', + 'С' => 'С', + 'Т' => 'Т', + 'У' => 'У', + 'Ф' => 'Ф', + 'Х' => 'Х', + 'Ц' => 'Ц', + 'Ч' => 'Ч', + 'Ш' => 'Ш', + 'Щ' => 'Щ', + 'Ъ' => 'Ъ', + 'Ы' => 'Ы', + 'Ь' => 'Ь', + 'Э' => 'Э', + 'Ю' => 'Ю', + 'Я' => 'Я', + 'а' => 'а', + 'б' => 'б', + 'в' => 'в', + 'г' => 'г', + 'д' => 'д', + 'е' => 'е', + 'ж' => 'ж', + 'з' => 'з', + 'и' => 'и', + 'й' => 'й', + 'к' => 'к', + 'л' => 'л', + 'м' => 'м', + 'н' => 'н', + 'о' => 'о', + 'п' => 'п', + 'р' => 'р', + 'с' => 'с', + 'т' => 'т', + 'у' => 'у', + 'ф' => 'ф', + 'х' => 'х', + 'ц' => 'ц', + 'ч' => 'ч', + 'ш' => 'ш', + 'щ' => 'щ', + 'ъ' => 'ъ', + 'ы' => 'ы', + 'ь' => 'ь', + 'э' => 'э', + 'ю' => 'ю', + 'я' => 'я', + 'ё' => 'ё', + 'ђ' => 'ђ', + 'ѓ' => 'ѓ', + 'є' => 'є', + 'ѕ' => 'ѕ', + 'і' => 'і', + 'ї' => 'ї', + 'ј' => 'ј', + 'љ' => 'љ', + 'њ' => 'њ', + 'ћ' => 'ћ', + 'ќ' => 'ќ', + 'ў' => 'ў', + 'џ' => 'џ', + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + ' ' => ' ', + '​' => '​', + '‌' => '‌', + '‍' => '‍', + '‎' => '‎', + '‏' => '‏', + '‐' => '‐', + '–' => '–', + '—' => '—', + '―' => '―', + '‖' => '‖', + '‘' => '‘', + '’' => '’', + '‚' => '‚', + '“' => '“', + '”' => '”', + '„' => '„', + '†' => '†', + '‡' => '‡', + '•' => '•', + '‥' => '‥', + '…' => '…', + '‰' => '‰', + '‱' => '‱', + '′' => '′', + '″' => '″', + '‴' => '‴', + '‵' => '‵', + '‹' => '‹', + '›' => '›', + '‾' => '‾', + '⁁' => '⁁', + '⁃' => '⁃', + '⁄' => '⁄', + '⁏' => '⁏', + '⁗' => '⁗', + ' ' => ' ', + '  ' => '&ThickSpace', + '⁠' => '⁠', + '⁡' => '⁡', + '⁢' => '⁢', + '⁣' => '⁣', + '€' => '€', + '⃛' => '⃛', + '⃜' => '⃜', + 'ℂ' => 'ℂ', + '℅' => '℅', + 'ℊ' => 'ℊ', + 'ℋ' => 'ℋ', + 'ℌ' => 'ℌ', + 'ℍ' => 'ℍ', + 'ℎ' => 'ℎ', + 'ℏ' => 'ℏ', + 'ℐ' => 'ℐ', + 'ℑ' => 'ℑ', + 'ℒ' => 'ℒ', + 'ℓ' => 'ℓ', + 'ℕ' => 'ℕ', + '№' => '№', + '℗' => '℗', + '℘' => '℘', + 'ℙ' => 'ℙ', + 'ℚ' => 'ℚ', + 'ℛ' => 'ℛ', + 'ℜ' => 'ℜ', + 'ℝ' => 'ℝ', + '℞' => '℞', + '™' => '™', + 'ℤ' => 'ℤ', + '℧' => '℧', + 'ℨ' => 'ℨ', + '℩' => '℩', + 'ℬ' => 'ℬ', + 'ℭ' => 'ℭ', + 'ℯ' => 'ℯ', + 'ℰ' => 'ℰ', + 'ℱ' => 'ℱ', + 'ℳ' => 'ℳ', + 'ℴ' => 'ℴ', + 'ℵ' => 'ℵ', + 'ℶ' => 'ℶ', + 'ℷ' => 'ℷ', + 'ℸ' => 'ℸ', + 'ⅅ' => 'ⅅ', + 'ⅆ' => 'ⅆ', + 'ⅇ' => 'ⅇ', + 'ⅈ' => 'ⅈ', + '⅓' => '⅓', + '⅔' => '⅔', + '⅕' => '⅕', + '⅖' => '⅖', + '⅗' => '⅗', + '⅘' => '⅘', + '⅙' => '⅙', + '⅚' => '⅚', + '⅛' => '⅛', + '⅜' => '⅜', + '⅝' => '⅝', + '⅞' => '⅞', + '←' => '←', + '↑' => '↑', + '→' => '→', + '↓' => '↓', + '↔' => '↔', + '↕' => '↕', + '↖' => '↖', + '↗' => '↗', + '↘' => '↘', + '↙' => '↙', + '↚' => '↚', + '↛' => '↛', + '↝' => '↝', + '↝̸' => '&nrarrw', + '↞' => '↞', + '↟' => '↟', + '↠' => '↠', + '↡' => '↡', + '↢' => '↢', + '↣' => '↣', + '↤' => '↤', + '↥' => '↥', + '↦' => '↦', + '↧' => '↧', + '↩' => '↩', + '↪' => '↪', + '↫' => '↫', + '↬' => '↬', + '↭' => '↭', + '↮' => '↮', + '↰' => '↰', + '↱' => '↱', + '↲' => '↲', + '↳' => '↳', + '↵' => '↵', + '↶' => '↶', + '↷' => '↷', + '↺' => '↺', + '↻' => '↻', + '↼' => '↼', + '↽' => '↽', + '↾' => '↾', + '↿' => '↿', + '⇀' => '⇀', + '⇁' => '⇁', + '⇂' => '⇂', + '⇃' => '⇃', + '⇄' => '⇄', + '⇅' => '⇅', + '⇆' => '⇆', + '⇇' => '⇇', + '⇈' => '⇈', + '⇉' => '⇉', + '⇊' => '⇊', + '⇋' => '⇋', + '⇌' => '⇌', + '⇍' => '⇍', + '⇎' => '⇎', + '⇏' => '⇏', + '⇐' => '⇐', + '⇑' => '⇑', + '⇒' => '⇒', + '⇓' => '⇓', + '⇔' => '⇔', + '⇕' => '⇕', + '⇖' => '⇖', + '⇗' => '⇗', + '⇘' => '⇘', + '⇙' => '⇙', + '⇚' => '⇚', + '⇛' => '⇛', + '⇝' => '⇝', + '⇤' => '⇤', + '⇥' => '⇥', + '⇵' => '⇵', + '⇽' => '⇽', + '⇾' => '⇾', + '⇿' => '⇿', + '∀' => '∀', + '∁' => '∁', + '∂' => '∂', + '∂̸' => '&npart', + '∃' => '∃', + '∄' => '∄', + '∅' => '∅', + '∇' => '∇', + '∈' => '∈', + '∉' => '∉', + '∋' => '∋', + '∌' => '∌', + '∏' => '∏', + '∐' => '∐', + '∑' => '∑', + '−' => '−', + '∓' => '∓', + '∔' => '∔', + '∖' => '∖', + '∗' => '∗', + '∘' => '∘', + '√' => '√', + '∝' => '∝', + '∞' => '∞', + '∟' => '∟', + '∠' => '∠', + '∠⃒' => '&nang', + '∡' => '∡', + '∢' => '∢', + '∣' => '∣', + '∤' => '∤', + '∥' => '∥', + '∦' => '∦', + '∧' => '∧', + '∨' => '∨', + '∩' => '∩', + '∩︀' => '&caps', + '∪' => '∪', + '∪︀' => '&cups', + '∫' => '∫', + '∬' => '∬', + '∭' => '∭', + '∮' => '∮', + '∯' => '∯', + '∰' => '∰', + '∱' => '∱', + '∲' => '∲', + '∳' => '∳', + '∴' => '∴', + '∵' => '∵', + '∶' => '∶', + '∷' => '∷', + '∸' => '∸', + '∺' => '∺', + '∻' => '∻', + '∼' => '∼', + '∼⃒' => '&nvsim', + '∽' => '∽', + '∽̱' => '&race', + '∾' => '∾', + '∾̳' => '&acE', + '∿' => '∿', + '≀' => '≀', + '≁' => '≁', + '≂' => '≂', + '≂̸' => '&nesim', + '≃' => '≃', + '≄' => '≄', + '≅' => '≅', + '≆' => '≆', + '≇' => '≇', + '≈' => '≈', + '≉' => '≉', + '≊' => '≊', + '≋' => '≋', + '≋̸' => '&napid', + '≌' => '≌', + '≍' => '≍', + '≍⃒' => '&nvap', + '≎' => '≎', + '≎̸' => '&nbump', + '≏' => '≏', + '≏̸' => '&nbumpe', + '≐' => '≐', + '≐̸' => '&nedot', + '≑' => '≑', + '≒' => '≒', + '≓' => '≓', + '≔' => '≔', + '≕' => '≕', + '≖' => '≖', + '≗' => '≗', + '≙' => '≙', + '≚' => '≚', + '≜' => '≜', + '≟' => '≟', + '≠' => '≠', + '≡' => '≡', + '≡⃥' => '&bnequiv', + '≢' => '≢', + '≤' => '≤', + '≤⃒' => '&nvle', + '≥' => '≥', + '≥⃒' => '&nvge', + '≦' => '≦', + '≦̸' => '&nlE', + '≧' => '≧', + '≧̸' => '&NotGreaterFullEqual', + '≨' => '≨', + '≨︀' => '&lvertneqq', + '≩' => '≩', + '≩︀' => '&gvertneqq', + '≪' => '≪', + '≪̸' => '&nLtv', + '≪⃒' => '&nLt', + '≫' => '≫', + '≫̸' => '&NotGreaterGreater', + '≫⃒' => '&nGt', + '≬' => '≬', + '≭' => '≭', + '≮' => '≮', + '≯' => '≯', + '≰' => '≰', + '≱' => '≱', + '≲' => '≲', + '≳' => '≳', + '≴' => '≴', + '≵' => '≵', + '≶' => '≶', + '≷' => '≷', + '≸' => '≸', + '≹' => '≹', + '≺' => '≺', + '≻' => '≻', + '≼' => '≼', + '≽' => '≽', + '≾' => '≾', + '≿' => '≿', + '≿̸' => '&NotSucceedsTilde', + '⊀' => '⊀', + '⊁' => '⊁', + '⊂' => '⊂', + '⊂⃒' => '&vnsub', + '⊃' => '⊃', + '⊃⃒' => '&nsupset', + '⊄' => '⊄', + '⊅' => '⊅', + '⊆' => '⊆', + '⊇' => '⊇', + '⊈' => '⊈', + '⊉' => '⊉', + '⊊' => '⊊', + '⊊︀' => '&vsubne', + '⊋' => '⊋', + '⊋︀' => '&vsupne', + '⊍' => '⊍', + '⊎' => '⊎', + '⊏' => '⊏', + '⊏̸' => '&NotSquareSubset', + '⊐' => '⊐', + '⊐̸' => '&NotSquareSuperset', + '⊑' => '⊑', + '⊒' => '⊒', + '⊓' => '⊓', + '⊓︀' => '&sqcaps', + '⊔' => '⊔', + '⊔︀' => '&sqcups', + '⊕' => '⊕', + '⊖' => '⊖', + '⊗' => '⊗', + '⊘' => '⊘', + '⊙' => '⊙', + '⊚' => '⊚', + '⊛' => '⊛', + '⊝' => '⊝', + '⊞' => '⊞', + '⊟' => '⊟', + '⊠' => '⊠', + '⊡' => '⊡', + '⊢' => '⊢', + '⊣' => '⊣', + '⊤' => '⊤', + '⊥' => '⊥', + '⊧' => '⊧', + '⊨' => '⊨', + '⊩' => '⊩', + '⊪' => '⊪', + '⊫' => '⊫', + '⊬' => '⊬', + '⊭' => '⊭', + '⊮' => '⊮', + '⊯' => '⊯', + '⊰' => '⊰', + '⊲' => '⊲', + '⊳' => '⊳', + '⊴' => '⊴', + '⊴⃒' => '&nvltrie', + '⊵' => '⊵', + '⊵⃒' => '&nvrtrie', + '⊶' => '⊶', + '⊷' => '⊷', + '⊸' => '⊸', + '⊹' => '⊹', + '⊺' => '⊺', + '⊻' => '⊻', + '⊽' => '⊽', + '⊾' => '⊾', + '⊿' => '⊿', + '⋀' => '⋀', + '⋁' => '⋁', + '⋂' => '⋂', + '⋃' => '⋃', + '⋄' => '⋄', + '⋅' => '⋅', + '⋆' => '⋆', + '⋇' => '⋇', + '⋈' => '⋈', + '⋉' => '⋉', + '⋊' => '⋊', + '⋋' => '⋋', + '⋌' => '⋌', + '⋍' => '⋍', + '⋎' => '⋎', + '⋏' => '⋏', + '⋐' => '⋐', + '⋑' => '⋑', + '⋒' => '⋒', + '⋓' => '⋓', + '⋔' => '⋔', + '⋕' => '⋕', + '⋖' => '⋖', + '⋗' => '⋗', + '⋘' => '⋘', + '⋘̸' => '&nLl', + '⋙' => '⋙', + '⋙̸' => '&nGg', + '⋚' => '⋚', + '⋚︀' => '&lesg', + '⋛' => '⋛', + '⋛︀' => '&gesl', + '⋞' => '⋞', + '⋟' => '⋟', + '⋠' => '⋠', + '⋡' => '⋡', + '⋢' => '⋢', + '⋣' => '⋣', + '⋦' => '⋦', + '⋧' => '⋧', + '⋨' => '⋨', + '⋩' => '⋩', + '⋪' => '⋪', + '⋫' => '⋫', + '⋬' => '⋬', + '⋭' => '⋭', + '⋮' => '⋮', + '⋯' => '⋯', + '⋰' => '⋰', + '⋱' => '⋱', + '⋲' => '⋲', + '⋳' => '⋳', + '⋴' => '⋴', + '⋵' => '⋵', + '⋵̸' => '¬indot', + '⋶' => '⋶', + '⋷' => '⋷', + '⋹' => '⋹', + '⋹̸' => '¬inE', + '⋺' => '⋺', + '⋻' => '⋻', + '⋼' => '⋼', + '⋽' => '⋽', + '⋾' => '⋾', + '⌅' => '⌅', + '⌆' => '⌆', + '⌈' => '⌈', + '⌉' => '⌉', + '⌊' => '⌊', + '⌋' => '⌋', + '⌌' => '⌌', + '⌍' => '⌍', + '⌎' => '⌎', + '⌏' => '⌏', + '⌐' => '⌐', + '⌒' => '⌒', + '⌓' => '⌓', + '⌕' => '⌕', + '⌖' => '⌖', + '⌜' => '⌜', + '⌝' => '⌝', + '⌞' => '⌞', + '⌟' => '⌟', + '⌢' => '⌢', + '⌣' => '⌣', + '⌭' => '⌭', + '⌮' => '⌮', + '⌶' => '⌶', + '⌽' => '⌽', + '⌿' => '⌿', + '⍼' => '⍼', + '⎰' => '⎰', + '⎱' => '⎱', + '⎴' => '⎴', + '⎵' => '⎵', + '⎶' => '⎶', + '⏜' => '⏜', + '⏝' => '⏝', + '⏞' => '⏞', + '⏟' => '⏟', + '⏢' => '⏢', + '⏧' => '⏧', + '␣' => '␣', + 'Ⓢ' => 'Ⓢ', + '─' => '─', + '│' => '│', + '┌' => '┌', + '┐' => '┐', + '└' => '└', + '┘' => '┘', + '├' => '├', + '┤' => '┤', + '┬' => '┬', + '┴' => '┴', + '┼' => '┼', + '═' => '═', + '║' => '║', + '╒' => '╒', + '╓' => '╓', + '╔' => '╔', + '╕' => '╕', + '╖' => '╖', + '╗' => '╗', + '╘' => '╘', + '╙' => '╙', + '╚' => '╚', + '╛' => '╛', + '╜' => '╜', + '╝' => '╝', + '╞' => '╞', + '╟' => '╟', + '╠' => '╠', + '╡' => '╡', + '╢' => '╢', + '╣' => '╣', + '╤' => '╤', + '╥' => '╥', + '╦' => '╦', + '╧' => '╧', + '╨' => '╨', + '╩' => '╩', + '╪' => '╪', + '╫' => '╫', + '╬' => '╬', + '▀' => '▀', + '▄' => '▄', + '█' => '█', + '░' => '░', + '▒' => '▒', + '▓' => '▓', + '□' => '□', + '▪' => '▪', + '▫' => '▫', + '▭' => '▭', + '▮' => '▮', + '▱' => '▱', + '△' => '△', + '▴' => '▴', + '▵' => '▵', + '▸' => '▸', + '▹' => '▹', + '▽' => '▽', + '▾' => '▾', + '▿' => '▿', + '◂' => '◂', + '◃' => '◃', + '◊' => '◊', + '○' => '○', + '◬' => '◬', + '◯' => '◯', + '◸' => '◸', + '◹' => '◹', + '◺' => '◺', + '◻' => '◻', + '◼' => '◼', + '★' => '★', + '☆' => '☆', + '☎' => '☎', + '♀' => '♀', + '♂' => '♂', + '♠' => '♠', + '♣' => '♣', + '♥' => '♥', + '♦' => '♦', + '♪' => '♪', + '♭' => '♭', + '♮' => '♮', + '♯' => '♯', + '✓' => '✓', + '✗' => '✗', + '✠' => '✠', + '✶' => '✶', + '❘' => '❘', + '❲' => '❲', + '❳' => '❳', + '⟈' => '⟈', + '⟉' => '⟉', + '⟦' => '⟦', + '⟧' => '⟧', + '⟨' => '⟨', + '⟩' => '⟩', + '⟪' => '⟪', + '⟫' => '⟫', + '⟬' => '⟬', + '⟭' => '⟭', + '⟵' => '⟵', + '⟶' => '⟶', + '⟷' => '⟷', + '⟸' => '⟸', + '⟹' => '⟹', + '⟺' => '⟺', + '⟼' => '⟼', + '⟿' => '⟿', + '⤂' => '⤂', + '⤃' => '⤃', + '⤄' => '⤄', + '⤅' => '⤅', + '⤌' => '⤌', + '⤍' => '⤍', + '⤎' => '⤎', + '⤏' => '⤏', + '⤐' => '⤐', + '⤑' => '⤑', + '⤒' => '⤒', + '⤓' => '⤓', + '⤖' => '⤖', + '⤙' => '⤙', + '⤚' => '⤚', + '⤛' => '⤛', + '⤜' => '⤜', + '⤝' => '⤝', + '⤞' => '⤞', + '⤟' => '⤟', + '⤠' => '⤠', + '⤣' => '⤣', + '⤤' => '⤤', + '⤥' => '⤥', + '⤦' => '⤦', + '⤧' => '⤧', + '⤨' => '⤨', + '⤩' => '⤩', + '⤪' => '⤪', + '⤳' => '⤳', + '⤳̸' => '&nrarrc', + '⤵' => '⤵', + '⤶' => '⤶', + '⤷' => '⤷', + '⤸' => '⤸', + '⤹' => '⤹', + '⤼' => '⤼', + '⤽' => '⤽', + '⥅' => '⥅', + '⥈' => '⥈', + '⥉' => '⥉', + '⥊' => '⥊', + '⥋' => '⥋', + '⥎' => '⥎', + '⥏' => '⥏', + '⥐' => '⥐', + '⥑' => '⥑', + '⥒' => '⥒', + '⥓' => '⥓', + '⥔' => '⥔', + '⥕' => '⥕', + '⥖' => '⥖', + '⥗' => '⥗', + '⥘' => '⥘', + '⥙' => '⥙', + '⥚' => '⥚', + '⥛' => '⥛', + '⥜' => '⥜', + '⥝' => '⥝', + '⥞' => '⥞', + '⥟' => '⥟', + '⥠' => '⥠', + '⥡' => '⥡', + '⥢' => '⥢', + '⥣' => '⥣', + '⥤' => '⥤', + '⥥' => '⥥', + '⥦' => '⥦', + '⥧' => '⥧', + '⥨' => '⥨', + '⥩' => '⥩', + '⥪' => '⥪', + '⥫' => '⥫', + '⥬' => '⥬', + '⥭' => '⥭', + '⥮' => '⥮', + '⥯' => '⥯', + '⥰' => '⥰', + '⥱' => '⥱', + '⥲' => '⥲', + '⥳' => '⥳', + '⥴' => '⥴', + '⥵' => '⥵', + '⥶' => '⥶', + '⥸' => '⥸', + '⥹' => '⥹', + '⥻' => '⥻', + '⥼' => '⥼', + '⥽' => '⥽', + '⥾' => '⥾', + '⥿' => '⥿', + '⦅' => '⦅', + '⦆' => '⦆', + '⦋' => '⦋', + '⦌' => '⦌', + '⦍' => '⦍', + '⦎' => '⦎', + '⦏' => '⦏', + '⦐' => '⦐', + '⦑' => '⦑', + '⦒' => '⦒', + '⦓' => '⦓', + '⦔' => '⦔', + '⦕' => '⦕', + '⦖' => '⦖', + '⦚' => '⦚', + '⦜' => '⦜', + '⦝' => '⦝', + '⦤' => '⦤', + '⦥' => '⦥', + '⦦' => '⦦', + '⦧' => '⦧', + '⦨' => '⦨', + '⦩' => '⦩', + '⦪' => '⦪', + '⦫' => '⦫', + '⦬' => '⦬', + '⦭' => '⦭', + '⦮' => '⦮', + '⦯' => '⦯', + '⦰' => '⦰', + '⦱' => '⦱', + '⦲' => '⦲', + '⦳' => '⦳', + '⦴' => '⦴', + '⦵' => '⦵', + '⦶' => '⦶', + '⦷' => '⦷', + '⦹' => '⦹', + '⦻' => '⦻', + '⦼' => '⦼', + '⦾' => '⦾', + '⦿' => '⦿', + '⧀' => '⧀', + '⧁' => '⧁', + '⧂' => '⧂', + '⧃' => '⧃', + '⧄' => '⧄', + '⧅' => '⧅', + '⧉' => '⧉', + '⧍' => '⧍', + '⧎' => '⧎', + '⧏' => '⧏', + '⧏̸' => '&NotLeftTriangleBar', + '⧐' => '⧐', + '⧐̸' => '&NotRightTriangleBar', + '⧜' => '⧜', + '⧝' => '⧝', + '⧞' => '⧞', + '⧣' => '⧣', + '⧤' => '⧤', + '⧥' => '⧥', + '⧫' => '⧫', + '⧴' => '⧴', + '⧶' => '⧶', + '⨀' => '⨀', + '⨁' => '⨁', + '⨂' => '⨂', + '⨄' => '⨄', + '⨆' => '⨆', + '⨌' => '⨌', + '⨍' => '⨍', + '⨐' => '⨐', + '⨑' => '⨑', + '⨒' => '⨒', + '⨓' => '⨓', + '⨔' => '⨔', + '⨕' => '⨕', + '⨖' => '⨖', + '⨗' => '⨗', + '⨢' => '⨢', + '⨣' => '⨣', + '⨤' => '⨤', + '⨥' => '⨥', + '⨦' => '⨦', + '⨧' => '⨧', + '⨩' => '⨩', + '⨪' => '⨪', + '⨭' => '⨭', + '⨮' => '⨮', + '⨯' => '⨯', + '⨰' => '⨰', + '⨱' => '⨱', + '⨳' => '⨳', + '⨴' => '⨴', + '⨵' => '⨵', + '⨶' => '⨶', + '⨷' => '⨷', + '⨸' => '⨸', + '⨹' => '⨹', + '⨺' => '⨺', + '⨻' => '⨻', + '⨼' => '⨼', + '⨿' => '⨿', + '⩀' => '⩀', + '⩂' => '⩂', + '⩃' => '⩃', + '⩄' => '⩄', + '⩅' => '⩅', + '⩆' => '⩆', + '⩇' => '⩇', + '⩈' => '⩈', + '⩉' => '⩉', + '⩊' => '⩊', + '⩋' => '⩋', + '⩌' => '⩌', + '⩍' => '⩍', + '⩐' => '⩐', + '⩓' => '⩓', + '⩔' => '⩔', + '⩕' => '⩕', + '⩖' => '⩖', + '⩗' => '⩗', + '⩘' => '⩘', + '⩚' => '⩚', + '⩛' => '⩛', + '⩜' => '⩜', + '⩝' => '⩝', + '⩟' => '⩟', + '⩦' => '⩦', + '⩪' => '⩪', + '⩭' => '⩭', + '⩭̸' => '&ncongdot', + '⩮' => '⩮', + '⩯' => '⩯', + '⩰' => '⩰', + '⩰̸' => '&napE', + '⩱' => '⩱', + '⩲' => '⩲', + '⩳' => '⩳', + '⩴' => '⩴', + '⩵' => '⩵', + '⩷' => '⩷', + '⩸' => '⩸', + '⩹' => '⩹', + '⩺' => '⩺', + '⩻' => '⩻', + '⩼' => '⩼', + '⩽' => '⩽', + '⩽̸' => '&nles', + '⩾' => '⩾', + '⩾̸' => '&nges', + '⩿' => '⩿', + '⪀' => '⪀', + '⪁' => '⪁', + '⪂' => '⪂', + '⪃' => '⪃', + '⪄' => '⪄', + '⪅' => '⪅', + '⪆' => '⪆', + '⪇' => '⪇', + '⪈' => '⪈', + '⪉' => '⪉', + '⪊' => '⪊', + '⪋' => '⪋', + '⪌' => '⪌', + '⪍' => '⪍', + '⪎' => '⪎', + '⪏' => '⪏', + '⪐' => '⪐', + '⪑' => '⪑', + '⪒' => '⪒', + '⪓' => '⪓', + '⪔' => '⪔', + '⪕' => '⪕', + '⪖' => '⪖', + '⪗' => '⪗', + '⪘' => '⪘', + '⪙' => '⪙', + '⪚' => '⪚', + '⪝' => '⪝', + '⪞' => '⪞', + '⪟' => '⪟', + '⪠' => '⪠', + '⪡' => '⪡', + '⪡̸' => '&NotNestedLessLess', + '⪢' => '⪢', + '⪢̸' => '&NotNestedGreaterGreater', + '⪤' => '⪤', + '⪥' => '⪥', + '⪦' => '⪦', + '⪧' => '⪧', + '⪨' => '⪨', + '⪩' => '⪩', + '⪪' => '⪪', + '⪫' => '⪫', + '⪬' => '⪬', + '⪬︀' => '&smtes', + '⪭' => '⪭', + '⪭︀' => '&lates', + '⪮' => '⪮', + '⪯' => '⪯', + '⪯̸' => '&NotPrecedesEqual', + '⪰' => '⪰', + '⪰̸' => '&NotSucceedsEqual', + '⪳' => '⪳', + '⪴' => '⪴', + '⪵' => '⪵', + '⪶' => '⪶', + '⪷' => '⪷', + '⪸' => '⪸', + '⪹' => '⪹', + '⪺' => '⪺', + '⪻' => '⪻', + '⪼' => '⪼', + '⪽' => '⪽', + '⪾' => '⪾', + '⪿' => '⪿', + '⫀' => '⫀', + '⫁' => '⫁', + '⫂' => '⫂', + '⫃' => '⫃', + '⫄' => '⫄', + '⫅' => '⫅', + '⫅̸' => '&nsubE', + '⫆' => '⫆', + '⫆̸' => '&nsupseteqq', + '⫇' => '⫇', + '⫈' => '⫈', + '⫋' => '⫋', + '⫋︀' => '&vsubnE', + '⫌' => '⫌', + '⫌︀' => '&varsupsetneqq', + '⫏' => '⫏', + '⫐' => '⫐', + '⫑' => '⫑', + '⫒' => '⫒', + '⫓' => '⫓', + '⫔' => '⫔', + '⫕' => '⫕', + '⫖' => '⫖', + '⫗' => '⫗', + '⫘' => '⫘', + '⫙' => '⫙', + '⫚' => '⫚', + '⫛' => '⫛', + '⫤' => '⫤', + '⫦' => '⫦', + '⫧' => '⫧', + '⫨' => '⫨', + '⫩' => '⫩', + '⫫' => '⫫', + '⫬' => '⫬', + '⫭' => '⫭', + '⫮' => '⫮', + '⫯' => '⫯', + '⫰' => '⫰', + '⫱' => '⫱', + '⫲' => '⫲', + '⫳' => '⫳', + '⫽︀' => '&varsupsetneqq', + 'ff' => 'ff', + 'fi' => 'fi', + 'fl' => 'fl', + 'ffi' => 'ffi', + 'ffl' => 'ffl', + '𝒜' => '𝒜', + '𝒞' => '𝒞', + '𝒟' => '𝒟', + '𝒢' => '𝒢', + '𝒥' => '𝒥', + '𝒦' => '𝒦', + '𝒩' => '𝒩', + '𝒪' => '𝒪', + '𝒫' => '𝒫', + '𝒬' => '𝒬', + '𝒮' => '𝒮', + '𝒯' => '𝒯', + '𝒰' => '𝒰', + '𝒱' => '𝒱', + '𝒲' => '𝒲', + '𝒳' => '𝒳', + '𝒴' => '𝒴', + '𝒵' => '𝒵', + '𝒶' => '𝒶', + '𝒷' => '𝒷', + '𝒸' => '𝒸', + '𝒹' => '𝒹', + '𝒻' => '𝒻', + '𝒽' => '𝒽', + '𝒾' => '𝒾', + '𝒿' => '𝒿', + '𝓀' => '𝓀', + '𝓁' => '𝓁', + '𝓂' => '𝓂', + '𝓃' => '𝓃', + '𝓅' => '𝓅', + '𝓆' => '𝓆', + '𝓇' => '𝓇', + '𝓈' => '𝓈', + '𝓉' => '𝓉', + '𝓊' => '𝓊', + '𝓋' => '𝓋', + '𝓌' => '𝓌', + '𝓍' => '𝓍', + '𝓎' => '𝓎', + '𝓏' => '𝓏', + '𝔄' => '𝔄', + '𝔅' => '𝔅', + '𝔇' => '𝔇', + '𝔈' => '𝔈', + '𝔉' => '𝔉', + '𝔊' => '𝔊', + '𝔍' => '𝔍', + '𝔎' => '𝔎', + '𝔏' => '𝔏', + '𝔐' => '𝔐', + '𝔑' => '𝔑', + '𝔒' => '𝔒', + '𝔓' => '𝔓', + '𝔔' => '𝔔', + '𝔖' => '𝔖', + '𝔗' => '𝔗', + '𝔘' => '𝔘', + '𝔙' => '𝔙', + '𝔚' => '𝔚', + '𝔛' => '𝔛', + '𝔜' => '𝔜', + '𝔞' => '𝔞', + '𝔟' => '𝔟', + '𝔠' => '𝔠', + '𝔡' => '𝔡', + '𝔢' => '𝔢', + '𝔣' => '𝔣', + '𝔤' => '𝔤', + '𝔥' => '𝔥', + '𝔦' => '𝔦', + '𝔧' => '𝔧', + '𝔨' => '𝔨', + '𝔩' => '𝔩', + '𝔪' => '𝔪', + '𝔫' => '𝔫', + '𝔬' => '𝔬', + '𝔭' => '𝔭', + '𝔮' => '𝔮', + '𝔯' => '𝔯', + '𝔰' => '𝔰', + '𝔱' => '𝔱', + '𝔲' => '𝔲', + '𝔳' => '𝔳', + '𝔴' => '𝔴', + '𝔵' => '𝔵', + '𝔶' => '𝔶', + '𝔷' => '𝔷', + '𝔸' => '𝔸', + '𝔹' => '𝔹', + '𝔻' => '𝔻', + '𝔼' => '𝔼', + '𝔽' => '𝔽', + '𝔾' => '𝔾', + '𝕀' => '𝕀', + '𝕁' => '𝕁', + '𝕂' => '𝕂', + '𝕃' => '𝕃', + '𝕄' => '𝕄', + '𝕆' => '𝕆', + '𝕊' => '𝕊', + '𝕋' => '𝕋', + '𝕌' => '𝕌', + '𝕍' => '𝕍', + '𝕎' => '𝕎', + '𝕏' => '𝕏', + '𝕐' => '𝕐', + '𝕒' => '𝕒', + '𝕓' => '𝕓', + '𝕔' => '𝕔', + '𝕕' => '𝕕', + '𝕖' => '𝕖', + '𝕗' => '𝕗', + '𝕘' => '𝕘', + '𝕙' => '𝕙', + '𝕚' => '𝕚', + '𝕛' => '𝕛', + '𝕜' => '𝕜', + '𝕝' => '𝕝', + '𝕞' => '𝕞', + '𝕟' => '𝕟', + '𝕠' => '𝕠', + '𝕡' => '𝕡', + '𝕢' => '𝕢', + '𝕣' => '𝕣', + '𝕤' => '𝕤', + '𝕥' => '𝕥', + '𝕦' => '𝕦', + '𝕧' => '𝕧', + '𝕨' => '𝕨', + '𝕩' => '𝕩', + '𝕪' => '𝕪', + '𝕫' => '𝕫' + ); +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php b/core/vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php new file mode 100644 index 0000000..c009698 --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php @@ -0,0 +1,481 @@ +'http://www.w3.org/1999/xhtml', + 'attrNamespace'=>'http://www.w3.org/1999/xhtml', + + 'nodeName'=>'img', 'nodeName'=>array('img', 'a'), + 'attrName'=>'alt', 'attrName'=>array('title', 'alt'), + + + 'prefixes'=>['xh'=>'http://www.w3.org/1999/xhtml'), + 'xpath' => "@checked[../../xh:input[@type='radio' or @type='checkbox']]", + ), + */ + array( + 'nodeNamespace'=>'http://www.w3.org/1999/xhtml', + 'attrName'=>array('alt', 'title'), + ), + + ); + + const DOCTYPE = ''; + + public function __construct($output, $options = array()) + { + if (isset($options['encode_entities'])) { + $this->encode = $options['encode_entities']; + } + + $this->outputMode = static::IM_IN_HTML; + $this->out = $output; + + // If HHVM, see https://github.com/facebook/hhvm/issues/2727 + $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); + } + public function addRule(array $rule) + { + $this->nonBooleanAttributes[] = $rule; + } + + public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) + { + $this->traverser = $traverser; + + return $this; + } + + public function document($dom) + { + $this->doctype(); + if ($dom->documentElement) { + $this->traverser->node($dom->documentElement); + $this->nl(); + } + } + + protected function doctype() + { + $this->wr(static::DOCTYPE); + $this->nl(); + } + + public function element($ele) + { + $name = $ele->tagName; + + // Per spec: + // If the element has a declared namespace in the HTML, MathML or + // SVG namespaces, we use the lname instead of the tagName. + if ($this->traverser->isLocalElement($ele)) { + $name = $ele->localName; + } + + // If we are in SVG or MathML there is special handling. + // Using if/elseif instead of switch because it's faster in PHP. + if ($name == 'svg') { + $this->outputMode = static::IM_IN_SVG; + $name = Elements::normalizeSvgElement($name); + } elseif ($name == 'math') { + $this->outputMode = static::IM_IN_MATHML; + } + + $this->openTag($ele); + if (Elements::isA($name, Elements::TEXT_RAW)) { + foreach ($ele->childNodes as $child) { + $this->wr($child->data); + } + } else { + // Handle children. + if ($ele->hasChildNodes()) { + $this->traverser->children($ele->childNodes); + } + + // Close out the SVG or MathML special handling. + if ($name == 'svg' || $name == 'math') { + $this->outputMode = static::IM_IN_HTML; + } + } + + // If not unary, add a closing tag. + if (! Elements::isA($name, Elements::VOID_TAG)) { + $this->closeTag($ele); + } + } + + /** + * Write a text node. + * + * @param \DOMText $ele + * The text node to write. + */ + public function text($ele) + { + if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { + $this->wr($ele->data); + return; + } + + // FIXME: This probably needs some flags set. + $this->wr($this->enc($ele->data)); + } + + public function cdata($ele) + { + // This encodes CDATA. + $this->wr($ele->ownerDocument->saveXML($ele)); + } + + public function comment($ele) + { + // These produce identical output. + // $this->wr(''); + $this->wr($ele->ownerDocument->saveXML($ele)); + } + + public function processorInstruction($ele) + { + $this->wr('wr($ele->target) + ->wr(' ') + ->wr($ele->data) + ->wr('?>'); + } + /** + * Write the namespace attributes + * + * + * @param \DOMNode $ele + * The element being written. + */ + protected function namespaceAttrs($ele) + { + if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){ + $this->xpath = new \DOMXPath($ele->ownerDocument); + } + + foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) { + if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { + $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); + } + } + } + + /** + * Write the opening tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele + * The element being written. + */ + protected function openTag($ele) + { + $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); + + + $this->attrs($ele); + $this->namespaceAttrs($ele); + + + if ($this->outputMode == static::IM_IN_HTML) { + $this->wr('>'); + } // If we are not in html mode we are in SVG, MathML, or XML embedded content. + else { + if ($ele->hasChildNodes()) { + $this->wr('>'); + } // If there are no children this is self closing. + else { + $this->wr(' />'); + } + } + } + + protected function attrs($ele) + { + // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. + if (! $ele->hasAttributes()) { + return $this; + } + + // TODO: Currently, this always writes name="value", and does not do + // value-less attributes. + $map = $ele->attributes; + $len = $map->length; + for ($i = 0; $i < $len; ++ $i) { + $node = $map->item($i); + $val = $this->enc($node->value, true); + + // XXX: The spec says that we need to ensure that anything in + // the XML, XMLNS, or XLink NS's should use the canonical + // prefix. It seems that DOM does this for us already, but there + // may be exceptions. + $name = $node->name; + + // Special handling for attributes in SVG and MathML. + // Using if/elseif instead of switch because it's faster in PHP. + if ($this->outputMode == static::IM_IN_SVG) { + $name = Elements::normalizeSvgAttribute($name); + } elseif ($this->outputMode == static::IM_IN_MATHML) { + $name = Elements::normalizeMathMlAttribute($name); + } + + $this->wr(' ')->wr($name); + + if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) { + $this->wr('="')->wr($val)->wr('"'); + } + } + } + + + protected function nonBooleanAttribute(\DOMAttr $attr) + { + $ele = $attr->ownerElement; + foreach($this->nonBooleanAttributes as $rule){ + + if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){ + continue; + } + if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){ + continue; + } + if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){ + continue; + } + if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){ + continue; + } + if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){ + continue; + } + if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){ + continue; + } + if(isset($rule['xpath'])){ + + $xp = $this->getXPath($attr); + if(isset($rule['prefixes'])){ + foreach($rule['prefixes'] as $nsPrefix => $ns){ + $xp->registerNamespace($nsPrefix, $ns); + } + } + if(!$xp->query($rule['xpath'], $attr->ownerElement)->length){ + continue; + } + } + + return true; + } + + return false; + } + + private function getXPath(\DOMNode $node){ + if(!$this->xpath){ + $this->xpath = new \DOMXPath($node->ownerDocument); + } + return $this->xpath; + } + + /** + * Write the closing tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele + * The element being written. + */ + protected function closeTag($ele) + { + if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { + $this->wr('wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); + } + } + + /** + * Write to the output. + * + * @param string $text + * The string to put into the output. + * + * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. + */ + protected function wr($text) + { + fwrite($this->out, $text); + return $this; + } + + /** + * Write a new line character. + * + * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. + */ + protected function nl() + { + fwrite($this->out, PHP_EOL); + return $this; + } + + /** + * Encode text. + * + * When encode is set to false, the default value, the text passed in is + * escaped per section 8.3 of the html5 spec. For details on how text is + * escaped see the escape() method. + * + * When encoding is set to true the text is converted to named character + * references where appropriate. Section 8.1.4 Character references of the + * html5 spec refers to using named character references. This is useful for + * characters that can't otherwise legally be used in the text. + * + * The named character references are listed in section 8.5. + * + * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. + * This includes such characters as +.# and many other common ones. By default + * encoding here will just escape &'<>". + * + * Note, PHP 5.4+ has better html5 encoding. + * + * @todo Use the Entities class in php 5.3 to have html5 entities. + * + * @param string $text + * text to encode. + * @param boolean $attribute + * True if we are encoding an attrubute, false otherwise + * + * @return string The encoded text. + */ + protected function enc($text, $attribute = false) + { + + // Escape the text rather than convert to named character references. + if (! $this->encode) { + return $this->escape($text, $attribute); + } + + // If we are in PHP 5.4+ we can use the native html5 entity functionality to + // convert the named character references. + + if ($this->hasHTML5) { + return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); + } // If a version earlier than 5.4 html5 entities are not entirely handled. + // This manually handles them. + else { + return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); + } + } + + /** + * Escape test. + * + * According to the html5 spec section 8.3 Serializing HTML fragments, text + * within tags that are not style, script, xmp, iframe, noembed, and noframes + * need to be properly escaped. + * + * The & should be converted to &, no breaking space unicode characters + * converted to  , when in attribute mode the " should be converted to + * ", and when not in attribute mode the < and > should be converted to + * < and >. + * + * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString + * + * @param string $text + * text to escape. + * @param boolean $attribute + * True if we are escaping an attrubute, false otherwise + */ + protected function escape($text, $attribute = false) + { + + // Not using htmlspecialchars because, while it does escaping, it doesn't + // match the requirements of section 8.5. For example, it doesn't handle + // non-breaking spaces. + if ($attribute) { + $replace = array( + '"' => '"', + '&' => '&', + "\xc2\xa0" => ' ' + ); + } else { + $replace = array( + '<' => '<', + '>' => '>', + '&' => '&', + "\xc2\xa0" => ' ' + ); + } + + return strtr($text, $replace); + } +} diff --git a/core/vendor/masterminds/html5/src/HTML5/Serializer/README.md b/core/vendor/masterminds/html5/src/HTML5/Serializer/README.md new file mode 100644 index 0000000..849a47f --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Serializer/README.md @@ -0,0 +1,33 @@ +# The Serializer (Writer) Model + +The serializer roughly follows sections _8.1 Writing HTML documents_ and section +_8.3 Serializing HTML fragments_ by converting DOMDocument, DOMDocumentFragment, +and DOMNodeList into HTML5. + + [ HTML5 ] // Interface for saving. + || + [ Traverser ] // Walk the DOM + || + [ Rules ] // Convert DOM elements into strings. + || + [ HTML5 ] // HTML5 document or fragment in text. + + +## HTML5 Class + +Provides the top level interface for saving. + +## The Traverser + +Walks the DOM finding each element and passing it off to the output rules to +convert to HTML5. + +## Output Rules + +The output rules are defined in the RulesInterface which can have multiple +implementations. Currently, the OutputRules is the default implementation that +converts a DOM as is into HTML5. + +## HTML5 String + +The output of the process it HTML5 as a string or saved to a file. \ No newline at end of file diff --git a/core/vendor/masterminds/html5/src/HTML5/Serializer/RulesInterface.php b/core/vendor/masterminds/html5/src/HTML5/Serializer/RulesInterface.php new file mode 100644 index 0000000..6ef5e5e --- /dev/null +++ b/core/vendor/masterminds/html5/src/HTML5/Serializer/RulesInterface.php @@ -0,0 +1,103 @@ + 'html', + 'http://www.w3.org/1998/Math/MathML' => 'math', + 'http://www.w3.org/2000/svg' => 'svg' + ); + + protected $dom; + + protected $options; + + protected $encode = false; + + protected $rules; + + protected $out; + + /** + * Create a traverser. + * + * @param DOMNode|DOMNodeList $dom + * The document or node to traverse. + * @param resource $out + * A stream that allows writing. The traverser will output into this + * stream. + * @param array $options + * An array or options for the traverser as key/value pairs. These include: + * - encode_entities: A bool to specify if full encding should happen for all named + * charachter references. Defaults to false which escapes &'<>". + * - output_rules: The path to the class handling the output rules. + */ + public function __construct($dom, $out, RulesInterface $rules, $options = array()) + { + $this->dom = $dom; + $this->out = $out; + $this->rules = $rules; + $this->options = $options; + + $this->rules->setTraverser($this); + } + + /** + * Tell the traverser to walk the DOM. + * + * @return resource $out + * Returns the output stream. + */ + public function walk() + { + if ($this->dom instanceof \DOMDocument) { + $this->rules->document($this->dom); + } elseif ($this->dom instanceof \DOMDocumentFragment) { + // Document fragments are a special case. Only the children need to + // be serialized. + if ($this->dom->hasChildNodes()) { + $this->children($this->dom->childNodes); + } + } // If NodeList, loop + elseif ($this->dom instanceof \DOMNodeList) { + // If this is a NodeList of DOMDocuments this will not work. + $this->children($this->dom); + } // Else assume this is a DOMNode-like datastructure. + else { + $this->node($this->dom); + } + + return $this->out; + } + + /** + * Process a node in the DOM. + * + * @param mixed $node + * A node implementing \DOMNode. + */ + public function node($node) + { + // A listing of types is at http://php.net/manual/en/dom.constants.php + switch ($node->nodeType) { + case XML_ELEMENT_NODE: + $this->rules->element($node); + break; + case XML_TEXT_NODE: + $this->rules->text($node); + break; + case XML_CDATA_SECTION_NODE: + $this->rules->cdata($node); + break; + // FIXME: It appears that the parser doesn't do PI's. + case XML_PI_NODE: + $this->rules->processorInstruction($node); + break; + case XML_COMMENT_NODE: + $this->rules->comment($node); + break; + // Currently we don't support embedding DTDs. + default: + //print ''; + break; + } + } + + /** + * Walk through all the nodes on a node list. + * + * @param \DOMNodeList $nl + * A list of child elements to walk through. + */ + public function children($nl) + { + foreach ($nl as $node) { + $this->node($node); + } + } + + /** + * Is an element local? + * + * @param mixed $ele + * An element that implement \DOMNode. + * + * @return bool True if local and false otherwise. + */ + public function isLocalElement($ele) + { + $uri = $ele->namespaceURI; + if (empty($uri)) { + return false; + } + + return isset(static::$local_ns[$uri]); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/ElementsTest.php b/core/vendor/masterminds/html5/test/HTML5/ElementsTest.php new file mode 100644 index 0000000..629b561 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/ElementsTest.php @@ -0,0 +1,486 @@ +html5Elements as $element) { + $this->assertTrue(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element); + + $this->assertTrue(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + + $nonhtml5 = array( + 'foo', + 'bar', + 'baz' + ); + foreach ($nonhtml5 as $element) { + $this->assertFalse(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element); + + $this->assertFalse(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + } + + public function testIsMathMLElement() + { + foreach ($this->mathmlElements as $element) { + $this->assertTrue(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element); + + // MathML is case sensetitive so these should all fail. + $this->assertFalse(Elements::isMathMLElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element)); + } + + $nonMathML = array( + 'foo', + 'bar', + 'baz' + ); + foreach ($nonMathML as $element) { + $this->assertFalse(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element); + } + } + + public function testIsSvgElement() + { + foreach ($this->svgElements as $element) { + $this->assertTrue(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element); + + // SVG is case sensetitive so these should all fail. + $this->assertFalse(Elements::isSvgElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element)); + } + + $nonSVG = array( + 'foo', + 'bar', + 'baz' + ); + foreach ($nonSVG as $element) { + $this->assertFalse(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element); + } + } + + public function testIsElement() + { + foreach ($this->html5Elements as $element) { + $this->assertTrue(Elements::isElement($element), 'html5 element test failed on: ' . $element); + + $this->assertTrue(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + + foreach ($this->mathmlElements as $element) { + $this->assertTrue(Elements::isElement($element), 'MathML element test failed on: ' . $element); + + // MathML is case sensetitive so these should all fail. + $this->assertFalse(Elements::isElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element)); + } + + foreach ($this->svgElements as $element) { + $this->assertTrue(Elements::isElement($element), 'SVG element test failed on: ' . $element); + + // SVG is case sensetitive so these should all fail. But, there is duplication + // html5 and SVG. Since html5 is case insensetitive we need to make sure + // it's not a html5 element first. + if (! in_array($element, $this->html5Elements)) { + $this->assertFalse(Elements::isElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element)); + } + } + + $nonhtml5 = array( + 'foo', + 'bar', + 'baz' + ); + foreach ($nonhtml5 as $element) { + $this->assertFalse(Elements::isElement($element), 'html5 element test failed on: ' . $element); + + $this->assertFalse(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + } + + public function testElement() + { + foreach ($this->html5Elements as $element) { + $this->assertGreaterThan(0, Elements::element($element)); + } + $nonhtml5 = array( + 'foo', + 'bar', + 'baz' + ); + foreach ($nonhtml5 as $element) { + $this->assertFalse(Elements::element($element)); + } + } + + public function testIsA() + { + $this->assertTrue(Elements::isA('script', Elements::KNOWN_ELEMENT)); + $this->assertFalse(Elements::isA('scriptypoo', Elements::KNOWN_ELEMENT)); + $this->assertTrue(Elements::isA('script', Elements::TEXT_RAW)); + $this->assertFalse(Elements::isA('script', Elements::TEXT_RCDATA)); + + $voidElements = array( + 'area', + 'base', + 'basefont', + 'bgsound', + 'br', + 'col', + 'command', + 'embed', + 'frame', + 'hr', + 'img' + ); + + foreach ($voidElements as $element) { + $this->assertTrue(Elements::isA($element, Elements::VOID_TAG), 'Void element test failed on: ' . $element); + } + + $nonVoid = array( + 'span', + 'a', + 'div' + ); + foreach ($nonVoid as $tag) { + $this->assertFalse(Elements::isA($tag, Elements::VOID_TAG), 'Void element test failed on: ' . $tag); + } + + $blockTags = array( + 'address', + 'article', + 'aside', + 'audio', + 'blockquote', + 'canvas', + 'dd', + 'div', + 'dl', + 'fieldset', + 'figcaption', + 'figure', + 'footer', + 'form', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'header', + 'hgroup', + 'hr', + 'noscript', + 'ol', + 'output', + 'p', + 'pre', + 'section', + 'table', + 'tfoot', + 'ul', + 'video' + ); + + foreach ($blockTags as $tag) { + $this->assertTrue(Elements::isA($tag, Elements::BLOCK_TAG), 'Block tag test failed on: ' . $tag); + } + + $nonBlockTags = array( + 'span', + 'img', + 'label' + ); + foreach ($nonBlockTags as $tag) { + $this->assertFalse(Elements::isA($tag, Elements::BLOCK_TAG), 'Block tag test failed on: ' . $tag); + } + } + + public function testNormalizeSvgElement() + { + $tests = array( + 'foo' => 'foo', + 'altglyph' => 'altGlyph', + 'BAR' => 'bar', + 'fespecularlighting' => 'feSpecularLighting', + 'bAz' => 'baz', + 'foreignobject' => 'foreignObject' + ); + + foreach ($tests as $input => $expected) { + $this->assertEquals($expected, Elements::normalizeSvgElement($input)); + } + } + + public function testNormalizeSvgAttribute() + { + $tests = array( + 'foo' => 'foo', + 'attributename' => 'attributeName', + 'BAR' => 'bar', + 'limitingconeangle' => 'limitingConeAngle', + 'bAz' => 'baz', + 'patterncontentunits' => 'patternContentUnits' + ); + + foreach ($tests as $input => $expected) { + $this->assertEquals($expected, Elements::normalizeSvgAttribute($input)); + } + } + + public function testNormalizeMathMlAttribute() + { + $tests = array( + 'foo' => 'foo', + 'definitionurl' => 'definitionURL', + 'BAR' => 'bar' + ); + + foreach ($tests as $input => $expected) { + $this->assertEquals($expected, Elements::normalizeMathMlAttribute($input)); + } + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Html5Test.html b/core/vendor/masterminds/html5/test/HTML5/Html5Test.html new file mode 100644 index 0000000..a976e8b --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Html5Test.html @@ -0,0 +1,10 @@ + + + + + Test + + +

This is a test.

+ + \ No newline at end of file diff --git a/core/vendor/masterminds/html5/test/HTML5/Html5Test.php b/core/vendor/masterminds/html5/test/HTML5/Html5Test.php new file mode 100644 index 0000000..a1a6c9c --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Html5Test.php @@ -0,0 +1,398 @@ +html5 = $this->getInstance(); + } + + /** + * Parse and serialize a string. + */ + protected function cycle($html) + { + $dom = $this->html5->loadHTML('' . $html . ''); + $out = $this->html5->saveHTML($dom); + + return $out; + } + + protected function cycleFragment($fragment) + { + $dom = $this->html5->loadHTMLFragment($fragment); + $out = $this->html5->saveHTML($dom); + + return $out; + } + + public function testLoadOptions() + { + // doc + $dom = $this->html5->loadHTML($this->wrap(''), array( + 'implicitNamespaces' => array('t' => 'http://example.com'), + "xmlNamespaces" => true + )); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + $this->assertFalse($this->html5->hasErrors()); + + $xpath = new \DOMXPath( $dom ); + $xpath->registerNamespace( "t", "http://example.com" ); + $this->assertEquals(1, $xpath->query( "//t:tag" )->length); + + // doc fragment + $frag = $this->html5->loadHTMLFragment('', array( + 'implicitNamespaces' => array('t' => 'http://example.com'), + "xmlNamespaces" => true + )); + $this->assertInstanceOf('\DOMDocumentFragment', $frag); + $this->assertEmpty($this->html5->getErrors()); + $this->assertFalse($this->html5->hasErrors()); + + $frag->ownerDocument->appendChild($frag); + $xpath = new \DOMXPath( $frag->ownerDocument ); + $xpath->registerNamespace( "t", "http://example.com" ); + $this->assertEquals(1, $xpath->query( "//t:tag" , $frag)->length); + } + + public function testErrors() + { + $dom = $this->html5->loadHTML(''); + $this->assertInstanceOf('\DOMDocument', $dom); + + $this->assertNotEmpty($this->html5->getErrors()); + $this->assertTrue($this->html5->hasErrors()); + } + + public function testLoad() + { + $dom = $this->html5->load(__DIR__ . '/Html5Test.html'); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + $this->assertFalse($this->html5->hasErrors()); + + $file = fopen(__DIR__ . '/Html5Test.html', 'r'); + $dom = $this->html5->load($file); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + + $dom = $this->html5->loadHTMLFile(__DIR__ . '/Html5Test.html'); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + } + + public function testLoadHTML() + { + $contents = file_get_contents(__DIR__ . '/Html5Test.html'); + $dom = $this->html5->loadHTML($contents); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + } + + public function testLoadHTMLFragment() + { + $fragment = '
Baz
'; + $dom = $this->html5->loadHTMLFragment($fragment); + $this->assertInstanceOf('\DOMDocumentFragment', $dom); + $this->assertEmpty($this->html5->getErrors()); + } + + public function testSaveHTML() + { + $dom = $this->html5->load(__DIR__ . '/Html5Test.html'); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + + $saved = $this->html5->saveHTML($dom); + $this->assertRegExp('|

This is a test.

|', $saved); + } + + public function testSaveHTMLFragment() + { + $fragment = '
Baz
'; + $dom = $this->html5->loadHTMLFragment($fragment); + + $string = $this->html5->saveHTML($dom); + $this->assertEquals($fragment, $string); + } + + public function testSave() + { + $dom = $this->html5->load(__DIR__ . '/Html5Test.html'); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + + // Test resource + $file = fopen('php://temp', 'w'); + $this->html5->save($dom, $file); + $content = stream_get_contents($file, - 1, 0); + $this->assertRegExp('|

This is a test.

|', $content); + + // Test file + $tmpfname = tempnam(sys_get_temp_dir(), "html5-php"); + $this->html5->save($dom, $tmpfname); + $content = file_get_contents($tmpfname); + $this->assertRegExp('|

This is a test.

|', $content); + unlink($tmpfname); + } + + // This test reads a document into a dom, turn the dom into a document, + // then tries to read that document again. This makes sure we are reading, + // and generating a document that works at a high level. + public function testItWorks() + { + $dom = $this->html5->load(__DIR__ . '/Html5Test.html'); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($this->html5->getErrors()); + + $saved = $this->html5->saveHTML($dom); + + $dom2 = $this->html5->loadHTML($saved); + $this->assertInstanceOf('\DOMDocument', $dom2); + $this->assertEmpty($this->html5->getErrors()); + } + + public function testConfig() + { + $html5 = $this->getInstance(); + $options = $html5->getOptions(); + $this->assertEquals(false, $options['encode_entities']); + + $html5 = $this->getInstance(array( + 'foo' => 'bar', + 'encode_entities' => true + )); + $options = $html5->getOptions(); + $this->assertEquals('bar', $options['foo']); + $this->assertEquals(true, $options['encode_entities']); + + // Need to reset to original so future tests pass as expected. + // $this->getInstance()->setOption('encode_entities', false); + } + + public function testSvg() + { + $dom = $this->html5->loadHTML( + ' + + +
foo bar baz
+ + + + + + + Test Text. + + + + + '); + + $this->assertEmpty($this->html5->getErrors()); + + // Test a mixed case attribute. + $list = $dom->getElementsByTagName('svg'); + $this->assertNotEmpty($list->length); + $svg = $list->item(0); + $this->assertEquals("0 0 3 2", $svg->getAttribute('viewBox')); + $this->assertFalse($svg->hasAttribute('viewbox')); + + // Test a mixed case tag. + // Note: getElementsByTagName is not case sensetitive. + $list = $dom->getElementsByTagName('textPath'); + $this->assertNotEmpty($list->length); + $textPath = $list->item(0); + $this->assertEquals('textPath', $textPath->tagName); + $this->assertNotEquals('textpath', $textPath->tagName); + + $html = $this->html5->saveHTML($dom); + $this->assertRegExp('||', $html); + $this->assertRegExp('||', $html); + } + + public function testMathMl() + { + $dom = $this->html5->loadHTML( + ' + + +
foo bar baz
+ + x + + ± + + y + + + '); + + $this->assertEmpty($this->html5->getErrors()); + $list = $dom->getElementsByTagName('math'); + $this->assertNotEmpty($list->length); + + $list = $dom->getElementsByTagName('div'); + $this->assertNotEmpty($list->length); + $div = $list->item(0); + $this->assertEquals('http://example.com', $div->getAttribute('definitionurl')); + $this->assertFalse($div->hasAttribute('definitionURL')); + $list = $dom->getElementsByTagName('csymbol'); + $csymbol = $list->item(0); + $this->assertEquals('http://www.example.com/mathops/multiops.html#plusminus', $csymbol->getAttribute('definitionURL')); + $this->assertFalse($csymbol->hasAttribute('definitionurl')); + + $html = $this->html5->saveHTML($dom); + $this->assertRegExp('||', $html); + $this->assertRegExp('|y|', $html); + } + + public function testUnknownElements() + { + // The : should not have special handling accourding to section 2.9 of the + // spec. This is differenant than XML. Since we don't know these elements + // they are handled as normal elements. Note, to do this is really + // an invalid example and you should not embed prefixed xml in html5. + $dom = $this->html5->loadHTMLFragment( + " + Big rectangle thing + 40 + 80 + + um, yeah"); + + $this->assertEmpty($this->html5->getErrors()); + $markup = $this->html5->saveHTML($dom); + $this->assertRegExp('|Big rectangle thing|', $markup); + $this->assertRegExp('|um, yeah|', $markup); + } + + public function testElements() + { + // Should have content. + $res = $this->cycle('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + // Should be empty + $res = $this->cycle(''); + $this->assertRegExp('||', $res); + + // Should have content. + $res = $this->cycleFragment('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + // Should be empty + $res = $this->cycleFragment(''); + $this->assertRegExp('||', $res); + + // Elements with dashes and underscores + $res = $this->cycleFragment(''); + $this->assertRegExp('||', $res); + $res = $this->cycleFragment(''); + $this->assertRegExp('||', $res); + + // Should have no closing tag. + $res = $this->cycle('
'); + $this->assertRegExp('|
|', $res); + } + + public function testAttributes() + { + $res = $this->cycle('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + // XXX: Note that spec does NOT require attrs in the same order. + $res = $this->cycle('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + $res = $this->cycle('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + $res = $this->cycleFragment('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + // XXX: Note that spec does NOT require attrs in the same order. + $res = $this->cycleFragment('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + + $res = $this->cycleFragment('
FOO
'); + $this->assertRegExp('|
FOO
|', $res); + } + + public function testPCData() + { + $res = $this->cycle('This is a test.'); + $this->assertRegExp('|This is a test.|', $res); + + $res = $this->cycleFragment('This is a test.'); + $this->assertRegExp('|This is a test.|', $res); + + $res = $this->cycle('This + is + a + test.'); + + // Check that newlines are there, but don't count spaces. + $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res); + + $res = $this->cycleFragment('This + is + a + test.'); + + // Check that newlines are there, but don't count spaces. + $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res); + + $res = $this->cycle('This is a test.'); + $this->assertRegExp('|This is a test.|', $res); + + $res = $this->cycleFragment('This is a test.'); + $this->assertRegExp('|This is a test.|', $res); + } + + public function testUnescaped() + { + $res = $this->cycle(''); + $this->assertRegExp('|2 < 1|', $res); + + $res = $this->cycle(''); + $this->assertRegExp('|div>div>div|', $res); + + $res = $this->cycleFragment(''); + $this->assertRegExp('|2 < 1|', $res); + + $res = $this->cycleFragment(''); + $this->assertRegExp('|div>div>div|', $res); + } + + public function testEntities() + { + $res = $this->cycle('Apples & bananas.'); + $this->assertRegExp('|Apples & bananas.|', $res); + + $res = $this->cycleFragment('Apples & bananas.'); + $this->assertRegExp('|Apples & bananas.|', $res); + } + + public function testComment() + { + $res = $this->cycle('ab'); + $this->assertRegExp('||', $res); + + $res = $this->cycleFragment('ab'); + $this->assertRegExp('||', $res); + } + + public function testCDATA() + { + $res = $this->cycle('a a test. ]]>b'); + $this->assertRegExp('| a test\. \]\]>|', $res); + + $res = $this->cycleFragment('a a test. ]]>b'); + $this->assertRegExp('| a test\. \]\]>|', $res); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php b/core/vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php new file mode 100644 index 0000000..762bcc2 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php @@ -0,0 +1,44 @@ +assertEquals('&', CharacterReference::lookupName('amp')); + $this->assertEquals('<', CharacterReference::lookupName('lt')); + $this->assertEquals('>', CharacterReference::lookupName('gt')); + $this->assertEquals('"', CharacterReference::lookupName('quot')); + $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement')); + + $this->assertNull(CharacterReference::lookupName('StinkyCheese')); + } + + public function testLookupHex() + { + $this->assertEquals('<', CharacterReference::lookupHex('3c')); + $this->assertEquals('<', CharacterReference::lookupHex('003c')); + $this->assertEquals('&', CharacterReference::lookupHex('26')); + $this->assertEquals('}', CharacterReference::lookupHex('7d')); + $this->assertEquals('Σ', CharacterReference::lookupHex('3A3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('03A3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('3a3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('03a3')); + } + + public function testLookupDecimal() + { + $this->assertEquals('&', CharacterReference::lookupDecimal(38)); + $this->assertEquals('&', CharacterReference::lookupDecimal('38')); + $this->assertEquals('<', CharacterReference::lookupDecimal(60)); + $this->assertEquals('Σ', CharacterReference::lookupDecimal(931)); + $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931')); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php b/core/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php new file mode 100644 index 0000000..b2a2d39 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -0,0 +1,537 @@ +parse(); + $this->errors = $treeBuilder->getErrors(); + + return $treeBuilder->document(); + } + + /** + * Utility function for parsing a fragment of HTML5. + */ + protected function parseFragment($string) + { + $treeBuilder = new DOMTreeBuilder(true); + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $this->errors = $treeBuilder->getErrors(); + + return $treeBuilder->fragment(); + } + + public function testDocument() + { + $html = ""; + $doc = $this->parse($html); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI); + } + + public function testStrangeCapitalization() + { + $html = " + + + Hello, world! + + TheBody + "; + $doc = $this->parse($html); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + + $xpath = new \DOMXPath( $doc ); + $xpath->registerNamespace( "x", "http://www.w3.org/1999/xhtml" ); + + $this->assertEquals("Hello, world!", $xpath->query( "//x:title" )->item( 0 )->nodeValue); + $this->assertEquals("foo", $xpath->query( "//x:script" )->item( 0 )->nodeValue); + } + + public function testDocumentWithDisabledNamespaces() + { + $html = ""; + $doc = $this->parse($html, array('disable_html_ns' => true)); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertNull($doc->documentElement->namespaceURI); + } + + public function testDocumentWithATargetDocument() + { + $targetDom = new \DOMDocument(); + + $html = ""; + $doc = $this->parse($html, array('target_document' => $targetDom)); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertSame($doc, $targetDom); + $this->assertEquals('html', $doc->documentElement->tagName); + } + + public function testDocumentFakeAttrAbsence() + { + $html = "foo"; + $doc = $this->parse($html, array('xmlNamespaces'=>true)); + + $xp = new \DOMXPath($doc); + $this->assertEquals(0, $xp->query("//@html5-php-fake-id-attribute")->length); + } + + public function testFragment() + { + $html = "
test
test2"; + $doc = $this->parseFragment($html); + + $this->assertInstanceOf('\DOMDocumentFragment', $doc); + $this->assertTrue($doc->hasChildNodes()); + $this->assertEquals('div', $doc->childNodes->item(0)->tagName); + $this->assertEquals('test', $doc->childNodes->item(0)->textContent); + $this->assertEquals('span', $doc->childNodes->item(1)->tagName); + $this->assertEquals('test2', $doc->childNodes->item(1)->textContent); + } + + public function testElements() + { + $html = ""; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $this->assertEquals('html', $root->tagName); + $this->assertEquals('html', $root->localName); + $this->assertEquals('html', $root->nodeName); + + $this->assertEquals(2, $root->childNodes->length); + $kids = $root->childNodes; + + $this->assertEquals('head', $kids->item(0)->tagName); + $this->assertEquals('body', $kids->item(1)->tagName); + + $head = $kids->item(0); + $this->assertEquals(1, $head->childNodes->length); + $this->assertEquals('title', $head->childNodes->item(0)->tagName); + } + + public function testImplicitNamespaces() + { + $dom = $this->parse('foo'); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('xlink:href'); + $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI); + + $dom = $this->parse('foo'); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('xml:base'); + $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI); + } + + public function testCustomImplicitNamespaces() + { + $dom = $this->parse('foo', array( + 'implicitNamespaces' => array( + 't' => 'http://www.example.com' + ) + )); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('t:href'); + $this->assertEquals('http://www.example.com', $attr->namespaceURI); + + $dom = $this->parse('foo', array( + 'implicitNamespaces' => array( + 't' => 'http://www.example.com' + ) + )); + $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a'); + $this->assertEquals(1, $list->length); + } + + public function testXmlNamespaces() + { + $dom = $this->parse( + ' + + foo + +
foo
+ ', array( + 'xmlNamespaces' => true + )); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('t:href'); + $this->assertEquals('http://www.example.com', $attr->namespaceURI); + + $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body'); + $this->assertEquals(1, $list->length); + } + + public function testXmlNamespaceNesting() + { + $dom = $this->parse( + ' + + + + + + +
+ + + + ', array( + 'xmlNamespaces' => true + )); + + + $this->assertEmpty($this->errors); + + $div = $dom->getElementById('div'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI); + + $body = $dom->getElementById('body'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI); + + $bar1 = $dom->getElementById('bar1'); + $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI); + + $bar2 = $dom->getElementById('bar2'); + $this->assertEquals("http://www.prefixed.com/bar2", $bar2->namespaceURI); + + $bar3 = $dom->getElementById('bar3'); + $this->assertEquals("http://www.w3.org/1999/xhtml", $bar3->namespaceURI); + + $bar4 = $dom->getElementById('bar4'); + $this->assertEquals("http://www.prefixed.com/bar4", $bar4->namespaceURI); + + $svg = $dom->getElementById('svg'); + $this->assertEquals("http://www.w3.org/2000/svg", $svg->namespaceURI); + + $prefixed = $dom->getElementById('prefixed'); + $this->assertEquals("http://www.prefixed.com", $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5'); + $this->assertEquals("http://www.prefixed.com/xn", $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5_x'); + $this->assertEquals("http://www.prefixed.com/bar5_x", $prefixed->namespaceURI); + } + + public function testMoveNonInlineElements() + { + $doc = $this->parse('

line1


line2

'); + $this->assertEquals('

line1


line2', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.'); + + $doc = $this->parse('

line1

line2

'); + $this->assertEquals('

line1

line2
', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.'); + } + + public function testAttributes() + { + $html = " + + + + "; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $body = $root->GetElementsByTagName('body')->item(0); + $this->assertEquals('body', $body->tagName); + $this->assertTrue($body->hasAttributes()); + $this->assertEquals('a', $body->getAttribute('id')); + $this->assertEquals('b c', $body->getAttribute('class')); + + $body2 = $doc->getElementById('a'); + $this->assertEquals('body', $body2->tagName); + $this->assertEquals('a', $body2->getAttribute('id')); + } + + public function testSVGAttributes() + { + $html = " + + + + foo + + "; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $svg = $root->getElementsByTagName('svg')->item(0); + $this->assertTrue($svg->hasAttribute('viewBox')); + + $rect = $root->getElementsByTagName('rect')->item(0); + $this->assertTrue($rect->hasAttribute('textLength')); + + $ac = $root->getElementsByTagName('animateColor'); + $this->assertEquals(1, $ac->length); + } + + public function testMathMLAttribute() + { + $html = ' + + + + x + + ± + + y + + + '; + + $doc = $this->parse($html); + $root = $doc->documentElement; + + $csymbol = $root->getElementsByTagName('csymbol')->item(0); + $this->assertTrue($csymbol->hasAttribute('definitionURL')); + } + + public function testMissingHtmlTag() + { + $html = "test"; + $doc = $this->parse($html); + + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName); + } + + public function testComment() + { + $html = ''; + + $doc = $this->parse($html); + + $comment = $doc->documentElement->childNodes->item(0); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals("Hello World.", $comment->data); + + $html = ''; + $doc = $this->parse($html); + + $comment = $doc->childNodes->item(1); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals("Hello World.", $comment->data); + + $comment = $doc->childNodes->item(2); + $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType); + $this->assertEquals("html", $comment->tagName); + } + + public function testCDATA() + { + $html = "test"; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $cdata = $wrapper->childNodes->item(0); + $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType); + $this->assertEquals('test', $cdata->data); + } + + public function testText() + { + $html = "test"; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $data = $wrapper->childNodes->item(0); + $this->assertEquals(XML_TEXT_NODE, $data->nodeType); + $this->assertEquals('test', $data->data); + + // The DomTreeBuilder has special handling for text when in before head mode. + $html = " + Foo"; + $doc = $this->parse($html); + $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]); + $headElement = $doc->documentElement->firstChild; + $this->assertEquals('head', $headElement->tagName); + } + + public function testParseErrors() + { + $html = "test"; + $doc = $this->parse($html); + + // We're JUST testing that we can access errors. Actual testing of + // error messages happen in the Tokenizer's tests. + $this->assertGreaterThan(0, count($this->errors)); + $this->assertTrue(is_string($this->errors[0])); + } + + public function testProcessingInstruction() + { + // Test the simple case, which is where PIs are inserted into the DOM. + $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>'); + $this->assertEquals(1, $doc->documentElement->childNodes->length); + $pi = $doc->documentElement->firstChild; + $this->assertInstanceOf('\DOMProcessingInstruction', $pi); + $this->assertEquals('foo', $pi->nodeName); + $this->assertEquals('bar', $pi->data); + + // Leading xml PIs should be ignored. + $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>'); + + $this->assertEquals(2, $doc->childNodes->length); + $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0)); + $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1)); + } + + public function testAutocloseP() + { + $html = "<!DOCTYPE html><html><body><p><figure></body></html>"; + $doc = $this->parse($html); + + $p = $doc->getElementsByTagName('p')->item(0); + $this->assertEquals(0, $p->childNodes->length); + $this->assertEquals('figure', $p->nextSibling->tagName); + } + + public function testAutocloseLI() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <ul><li>Foo<li>Bar<li>Baz</ul> + </body> + </html>'; + + $doc = $this->parse($html); + $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length; + $this->assertEquals(3, $length); + } + + public function testMathML() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <math xmlns="http://www.w3.org/1998/Math/MathML"> + <mi>x</mi> + <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus"> + <mo>&PlusMinus;</mo> + </csymbol> + <mi>y</mi> + </math> + </body> + </html>'; + + $doc = $this->parse($html); + $math = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals('math', $math->tagName); + $this->assertEquals('math', $math->nodeName); + $this->assertEquals('math', $math->localName); + $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI); + } + + public function testSVG() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg"> + <rect width="1" height="2" x="2" fill="#d2232c" /> + <text font-family="Verdana" font-size="32"> + <textpath xlink:href="#Foo"> + Test Text. + </textPath> + </text> + </svg> + </body> + </html>'; + + $doc = $this->parse($html); + $svg = $doc->getElementsByTagName('svg')->item(0); + $this->assertEquals('svg', $svg->tagName); + $this->assertEquals('svg', $svg->nodeName); + $this->assertEquals('svg', $svg->localName); + $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI); + + $textPath = $doc->getElementsByTagName('textPath')->item(0); + $this->assertEquals('textPath', $textPath->tagName); + } + + public function testNoScript() + { + $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>'; + $doc = $this->parse($html); + $this->assertEmpty($this->errors); + $noscript = $doc->getElementsByTagName('noscript')->item(0); + $this->assertEquals('noscript', $noscript->tagName); + } + + /** + * Regression for issue #13 + */ + public function testRegressionHTMLNoBody() + { + $html = '<!DOCTYPE html><html><span id="test">Test</span></html>'; + $doc = $this->parse($html); + $span = $doc->getElementById('test'); + + $this->assertEmpty($this->errors); + + $this->assertEquals('span', $span->tagName); + $this->assertEquals('Test', $span->textContent); + } + + public function testInstructionProcessor() + { + $string = '<!DOCTYPE html><html><?foo bar ?></html>'; + + $treeBuilder = new DOMTreeBuilder(); + $is = new InstructionProcessorMock(); + $treeBuilder->setInstructionProcessor($is); + + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $dom = $treeBuilder->document(); + $div = $dom->getElementsByTagName('div')->item(0); + + $this->assertEquals(1, $is->count); + $this->assertEquals('foo', $is->name); + $this->assertEquals('bar ', $is->data); + $this->assertEquals('div', $div->tagName); + $this->assertEquals('foo', $div->textContent); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/EventStack.php b/core/vendor/masterminds/html5/test/HTML5/Parser/EventStack.php new file mode 100644 index 0000000..60e2abe --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/EventStack.php @@ -0,0 +1,116 @@ +<?php +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Elements; +use Masterminds\HTML5\Parser\EventHandler; + +/** + * This testing class gathers events from a parser and builds a stack of events. + * It is useful for checking the output of a tokenizer. + * + * IMPORTANT: + * + * The startTag event also kicks the parser into TEXT_RAW when it encounters + * script or pre tags. This is to match the behavior required by the HTML5 spec, + * which says that the tree builder must tell the tokenizer when to switch states. + */ +class EventStack implements EventHandler +{ + + protected $stack; + + public function __construct() + { + $this->stack = array(); + } + + /** + * Get the event stack. + */ + public function events() + { + return $this->stack; + } + + public function depth() + { + return count($this->stack); + } + + public function get($index) + { + return $this->stack[$index]; + } + + protected function store($event, $data = null) + { + $this->stack[] = array( + 'name' => $event, + 'data' => $data + ); + } + + public function doctype($name, $type = 0, $id = null, $quirks = false) + { + $args = array( + $name, + $type, + $id, + $quirks + ); + $this->store('doctype', $args); + } + + public function startTag($name, $attributes = array(), $selfClosing = false) + { + $args = func_get_args(); + $this->store('startTag', $args); + if ($name == 'pre' || $name == 'script') { + return Elements::TEXT_RAW; + } + } + + public function endTag($name) + { + $this->store('endTag', array( + $name + )); + } + + public function comment($cdata) + { + $this->store('comment', array( + $cdata + )); + } + + public function cdata($data) + { + $this->store('cdata', func_get_args()); + } + + public function text($cdata) + { + // fprintf(STDOUT, "Received TEXT event with: " . $cdata); + $this->store('text', array( + $cdata + )); + } + + public function eof() + { + $this->store('eof'); + } + + public function parseError($msg, $line, $col) + { + // throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col)); + // $this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col)); + $this->store('error', func_get_args()); + } + + public function processingInstruction($name, $data = null) + { + $this->store('pi', func_get_args()); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php b/core/vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php new file mode 100644 index 0000000..e58fdff --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php @@ -0,0 +1,6 @@ +<?php +namespace Masterminds\HTML5\Tests\Parser; + +class EventStackError extends \Exception +{ +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/FileInputStreamTest.html b/core/vendor/masterminds/html5/test/HTML5/Parser/FileInputStreamTest.html new file mode 100644 index 0000000..a976e8b --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/FileInputStreamTest.html @@ -0,0 +1,10 @@ +<!doctype html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <title>Test</title> + </head> + <body> + <p>This is a test.</p> + </body> +</html> \ No newline at end of file diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/FileInputStreamTest.php b/core/vendor/masterminds/html5/test/HTML5/Parser/FileInputStreamTest.php new file mode 100644 index 0000000..71dd828 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/FileInputStreamTest.php @@ -0,0 +1,195 @@ +<?php +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\FileInputStream; + +class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase +{ + + public function testConstruct() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s); + } + + public function testNext() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $s->next(); + $this->assertEquals('!', $s->current()); + $s->next(); + $this->assertEquals('d', $s->current()); + } + + public function testKey() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals(0, $s->key()); + + $s->next(); + $this->assertEquals(1, $s->key()); + } + + public function testPeek() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('!', $s->peek()); + + $s->next(); + $this->assertEquals('d', $s->peek()); + } + + public function testCurrent() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('<', $s->current()); + + $s->next(); + $this->assertEquals('!', $s->current()); + + $s->next(); + $this->assertEquals('d', $s->current()); + } + + public function testColumnOffset() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $this->assertEquals(1, $s->columnOffset()); + $s->next(); + $this->assertEquals(2, $s->columnOffset()); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + + // Make sure we get to the second line + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(0, $s->columnOffset()); + + $s->next(); + $canary = $s->current(); // h + $this->assertEquals('h', $canary); + $this->assertEquals(1, $s->columnOffset()); + } + + public function testCurrentLine() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals(1, $s->currentLine()); + + // Make sure we get to the second line + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(2, $s->currentLine()); + + // Make sure we get to the third line + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(3, $s->currentLine()); + } + + public function testRemainingChars() + { + $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html'); + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + $this->assertEquals($text, $s->remainingChars()); + + $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1); + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + $s->next(); // Pop one. + $this->assertEquals($text, $s->remainingChars()); + } + + public function testCharsUnitl() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('', $s->charsUntil('<')); + // Pointer at '<', moves to ' ' + $this->assertEquals('<!doctype', $s->charsUntil(' ', 20)); + + // Pointer at ' ', moves to '>' + $this->assertEquals(' html', $s->charsUntil('>')); + + // Pointer at '>', moves to '\n'. + $this->assertEquals('>', $s->charsUntil("\n")); + + // Pointer at '\n', move forward then to the next'\n'. + $s->next(); + $this->assertEquals('<html lang="en">', $s->charsUntil("\n")); + + // Ony get one of the spaces. + $this->assertEquals("\n ", $s->charsUntil('<', 2)); + + // Get the other space. + $this->assertEquals(" ", $s->charsUntil('<')); + + // This should scan to the end of the file. + $text = "<head> + <meta charset=\"utf-8\"> + <title>Test</title> + </head> + <body> + <p>This is a test.</p> + </body> +</html>"; + $this->assertEquals($text, $s->charsUntil("\t")); + } + + public function testCharsWhile() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('<!', $s->charsWhile('!<')); + $this->assertEquals('', $s->charsWhile('>')); + $this->assertEquals('doctype', $s->charsWhile('odcyept')); + $this->assertEquals(' htm', $s->charsWhile('html ', 4)); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php b/core/vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php new file mode 100644 index 0000000..32a2204 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php @@ -0,0 +1,26 @@ +<?php +namespace Masterminds\HTML5\Tests\Parser; + +class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor +{ + + public $name = null; + + public $data = null; + + public $count = 0; + + public function process(\DOMElement $element, $name, $data) + { + $this->name = $name; + $this->data = $data; + $this->count ++; + + $div = $element->ownerDocument->createElement("div"); + $div->nodeValue = 'foo'; + + $element->appendChild($div); + + return $div; + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php b/core/vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php new file mode 100644 index 0000000..8fa5110 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php @@ -0,0 +1,171 @@ +<?php +/** + * @file + * Test the Scanner. This requires the InputStream tests are all good. + */ +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\StringInputStream; +use Masterminds\HTML5\Parser\Scanner; + +class ScannerTest extends \Masterminds\HTML5\Tests\TestCase +{ + + /** + * A canary test to make sure the basics are setup and working. + */ + public function testConstruct() + { + $is = new StringInputStream("abc"); + $s = new Scanner($is); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s); + } + + public function testNext() + { + $s = new Scanner(new StringInputStream("abc")); + + $this->assertEquals('b', $s->next()); + $this->assertEquals('c', $s->next()); + } + + public function testPosition() + { + $s = new Scanner(new StringInputStream("abc")); + + $this->assertEquals(0, $s->position()); + + $s->next(); + $this->assertEquals(1, $s->position()); + } + + public function testPeek() + { + $s = new Scanner(new StringInputStream("abc")); + + $this->assertEquals('b', $s->peek()); + + $s->next(); + $this->assertEquals('c', $s->peek()); + } + + public function testCurrent() + { + $s = new Scanner(new StringInputStream("abc")); + + // Before scanning the string begins the current is empty. + $this->assertEquals('a', $s->current()); + + $c = $s->next(); + $this->assertEquals('b', $s->current()); + + // Test movement through the string. + $c = $s->next(); + $this->assertEquals('c', $s->current()); + } + + public function testUnconsume() + { + $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst")); + + // Get initial position. + $s->next(); + $start = $s->position(); + + // Move forward a bunch of positions. + $amount = 7; + for ($i = 0; $i < $amount; $i ++) { + $s->next(); + } + + // Roll back the amount we moved forward. + $s->unconsume($amount); + + $this->assertEquals($start, $s->position()); + } + + public function testGetHex() + { + $s = new Scanner(new StringInputStream("ab13ck45DE*")); + + $this->assertEquals('ab13c', $s->getHex()); + + $s->next(); + $this->assertEquals('45DE', $s->getHex()); + } + + public function testGetAsciiAlpha() + { + $s = new Scanner(new StringInputStream("abcdef1%mnop*")); + + $this->assertEquals('abcdef', $s->getAsciiAlpha()); + + // Move past the 1% to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('mnop', $s->getAsciiAlpha()); + } + + public function testGetAsciiAlphaNum() + { + $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op")); + + $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum()); + + // Move past the # to scan the next group of text. + $s->next(); + $this->assertEquals('mn94op', $s->getAsciiAlphaNum()); + } + + public function testGetNumeric() + { + $s = new Scanner(new StringInputStream("1784a 45 9867 #")); + + $this->assertEquals('1784', $s->getNumeric()); + + // Move past the 'a ' to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('45', $s->getNumeric()); + } + + public function testCurrentLine() + { + $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test.")); + + $this->assertEquals(1, $s->currentLine()); + + // Move to the next line. + $s->getAsciiAlphaNum(); + $s->next(); + $this->assertEquals(2, $s->currentLine()); + } + + public function testColumnOffset() + { + $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test.")); + + // Move the pointer to the space. + $s->getAsciiAlphaNum(); + $this->assertEquals(5, $s->columnOffset()); + + // We move the pointer ahead. There must be a better way to do this. + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + } + + public function testRemainingChars() + { + $string = "\n45\n9867 #\nThis is a test."; + $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test.")); + + $s->getAsciiAlphaNum(); + $this->assertEquals($string, $s->remainingChars()); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/StringInputStreamTest.php b/core/vendor/masterminds/html5/test/HTML5/Parser/StringInputStreamTest.php new file mode 100644 index 0000000..f87cc10 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/StringInputStreamTest.php @@ -0,0 +1,327 @@ +<?php +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\StringInputStream; + +class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase +{ + + /** + * A canary test to make sure the basics are setup and working. + */ + public function testConstruct() + { + $s = new StringInputStream("abc"); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s); + } + + public function testNext() + { + $s = new StringInputStream("abc"); + + $s->next(); + $this->assertEquals('b', $s->current()); + $s->next(); + $this->assertEquals('c', $s->current()); + } + + public function testKey() + { + $s = new StringInputStream("abc"); + + $this->assertEquals(0, $s->key()); + + $s->next(); + $this->assertEquals(1, $s->key()); + } + + public function testPeek() + { + $s = new StringInputStream("abc"); + + $this->assertEquals('b', $s->peek()); + + $s->next(); + $this->assertEquals('c', $s->peek()); + } + + public function testCurrent() + { + $s = new StringInputStream("abc"); + + // Before scanning the string begins the current is empty. + $this->assertEquals('a', $s->current()); + + $s->next(); + $this->assertEquals('b', $s->current()); + + // Test movement through the string. + $s->next(); + $this->assertEquals('c', $s->current()); + } + + public function testColumnOffset() + { + $s = new StringInputStream("abc\ndef\n"); + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $this->assertEquals(1, $s->columnOffset()); + $s->next(); + $this->assertEquals(2, $s->columnOffset()); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + $s->next(); // LF + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $canary = $s->current(); // e + $this->assertEquals('e', $canary); + $this->assertEquals(1, $s->columnOffset()); + + $s = new StringInputStream("abc"); + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $this->assertEquals(1, $s->columnOffset()); + $s->next(); + $this->assertEquals(2, $s->columnOffset()); + } + + public function testCurrentLine() + { + $txt = "1\n2\n\n\n\n3"; + $stream = new StringInputStream($txt); + $this->assertEquals(1, $stream->currentLine()); + + // Advance over 1 and LF on to line 2 value 2. + $stream->next(); + $stream->next(); + $canary = $stream->current(); + $this->assertEquals(2, $stream->currentLine()); + $this->assertEquals('2', $canary); + + // Advance over 4x LF + $stream->next(); + $stream->next(); + $stream->next(); + $stream->next(); + $stream->next(); + $this->assertEquals(6, $stream->currentLine()); + $this->assertEquals('3', $stream->current()); + + // Make sure it doesn't do 7. + $this->assertEquals(6, $stream->currentLine()); + } + + public function testRemainingChars() + { + $text = "abcd"; + $s = new StringInputStream($text); + $this->assertEquals($text, $s->remainingChars()); + + $text = "abcd"; + $s = new StringInputStream($text); + $s->next(); // Pop one. + $this->assertEquals('bcd', $s->remainingChars()); + } + + public function testCharsUnitl() + { + $text = "abcdefffffffghi"; + $s = new StringInputStream($text); + $this->assertEquals('', $s->charsUntil('a')); + // Pointer at 'a', moves 2 to 'c' + $this->assertEquals('ab', $s->charsUntil('w', 2)); + + // Pointer at 'c', moves to first 'f' + $this->assertEquals('cde', $s->charsUntil('fzxv')); + + // Only get five 'f's + $this->assertEquals('fffff', $s->charsUntil('g', 5)); + + // Get just the last two 'f's + $this->assertEquals('ff', $s->charsUntil('g')); + + // This should scan to the end. + $this->assertEquals('ghi', $s->charsUntil('w', 9)); + } + + public function testCharsWhile() + { + $text = "abcdefffffffghi"; + $s = new StringInputStream($text); + + $this->assertEquals('ab', $s->charsWhile('ba')); + + $this->assertEquals('', $s->charsWhile('a')); + $this->assertEquals('cde', $s->charsWhile('cdeba')); + $this->assertEquals('ff', $s->charsWhile('f', 2)); + $this->assertEquals('fffff', $s->charsWhile('f')); + $this->assertEquals('g', $s->charsWhile('fg')); + $this->assertEquals('hi', $s->charsWhile('fghi', 99)); + } + + public function testBOM() + { + // Ignore in-text BOM. + $stream = new StringInputStream("a\xEF\xBB\xBF"); + $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain'); + + // Strip leading BOM + $leading = new StringInputStream("\xEF\xBB\xBFa"); + $this->assertEquals('a', $leading->current(), 'BOM should be stripped'); + } + + public function testCarriageReturn() + { + // Replace NULL with Unicode replacement. + $stream = new StringInputStream("\0\0\0"); + $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD'); + $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, true)); + + // Remove CR when next to LF. + $stream = new StringInputStream("\r\n"); + $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF'); + + // Convert CR to LF when on its own. + $stream = new StringInputStream("\r"); + $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF'); + } + + public function invalidParseErrorTestHandler($input, $numErrors, $name) + { + $stream = new StringInputStream($input, 'UTF-8'); + $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)'); + $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)'); + } + + public function testInvalidReplace() + { + $invalidTest = array( + + // Min/max overlong + "\xC0\x80a" => 'Overlong representation of U+0000', + "\xE0\x80\x80a" => 'Overlong representation of U+0000', + "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000', + "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000', + "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000', + "\xC1\xBFa" => 'Overlong representation of U+007F', + "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF', + "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF', + + "a\xDF" => 'Incomplete two byte sequence (missing final byte)', + "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)', + "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)', + + // Min/max continuation bytes + "a\x80" => 'Lone 80 continuation byte', + "a\xBF" => 'Lone BF continuation byte', + + // Invalid bytes (these can never occur) + "a\xFE" => 'Invalid FE byte', + "a\xFF" => 'Invalid FF byte' + ); + foreach ($invalidTest as $test => $note) { + $stream = new StringInputStream($test); + $this->assertEquals('a', $stream->remainingChars(), $note); + } + + // MPB: + // It appears that iconv just leaves these alone. Not sure what to + // do. + /* + * $converted = array( "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.', ); foreach ($converted as $test => $note) { $stream = new StringInputStream($test); $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note); } + */ + } + + public function testInvalidParseError() + { + // C0 controls (except U+0000 and U+000D due to different handling) + $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)'); + $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)'); + $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)'); + $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)'); + $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)'); + $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)'); + $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)'); + $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)'); + $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)'); + $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)'); + $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)'); + $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)'); + $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)'); + $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)'); + $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)'); + $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)'); + $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)'); + $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)'); + $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)'); + $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)'); + $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)'); + $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)'); + $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)'); + $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)'); + $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)'); + $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)'); + $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)'); + $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)'); + $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)'); + $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)'); + + // DEL (U+007F) + $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F'); + + // C1 Controls + $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)'); + $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)'); + $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)'); + + // Charcters surrounding surrogates + $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)'); + + // Permanent noncharacters + $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)'); + + // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the + // business of writing tests against iconv, I've just commented these out. Should revisit + // at a later point. + /* + * $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)'); // Paired UTF-16 surrogates $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)'); + */ + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php b/core/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php new file mode 100644 index 0000000..3d834fd --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php @@ -0,0 +1,970 @@ +<?php +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\UTF8Utils; +use Masterminds\HTML5\Parser\StringInputStream; +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; + +class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase +{ + // ================================================================ + // Additional assertions. + // ================================================================ + /** + * Tests that an event matches both the event type and the expected value. + * + * @param string $type + * Expected event type. + * @param string $expects + * The value expected in $event['data'][0]. + */ + public function assertEventEquals($type, $expects, $event) + { + $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true)); + if (is_array($expects)) { + $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ": " . print_r($event, true)); + } else { + $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, true)); + } + } + + /** + * Assert that a given event is 'error'. + */ + public function assertEventError($event) + { + $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, true)); + } + + /** + * Asserts that all of the tests are good. + * + * This loops through a map of tests/expectations and runs a few assertions on each test. + * + * Checks: + * - depth (if depth is > 0) + * - event name + * - matches on event 0. + */ + protected function isAllGood($name, $depth, $tests, $debug = false) + { + foreach ($tests as $try => $expects) { + if ($debug) { + fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true)); + } + $e = $this->parse($try); + if ($depth > 0) { + $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true)); + } + $this->assertEventEquals($name, $expects, $e->get(0)); + } + } + + // ================================================================ + // Utility functions. + // ================================================================ + public function testParse() + { + list ($tok, $events) = $this->createTokenizer(''); + + $tok->parse(); + $e1 = $events->get(0); + + $this->assertEquals(1, $events->Depth()); + $this->assertEquals('eof', $e1['name']); + } + + public function testWhitespace() + { + $spaces = ' '; + list ($tok, $events) = $this->createTokenizer($spaces); + + $tok->parse(); + + $this->assertEquals(2, $events->depth()); + + $e1 = $events->get(0); + + $this->assertEquals('text', $e1['name']); + $this->assertEquals($spaces, $e1['data'][0]); + } + + public function testCharacterReference() + { + $good = array( + '&amp;' => '&', + '&#x0003c;' => '<', + '&#38;' => '&', + '&' => '&' + ); + $this->isAllGood('text', 2, $good); + + // Test with broken charref + $str = '&foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = '&#xfoo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = '&#foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + // FIXME: Once the text processor is done, need to verify that the + // tokens are transformed correctly into text. + } + + public function testBogusComment() + { + $bogus = array( + '</+this is a bogus comment. +>', + '<!+this is a bogus comment. !>', + '<!D OCTYPE foo bar>', + '<!DOCTYEP foo bar>', + '<![CADATA[ TEST ', + '', + ' Hello [[>', + '<!CDATA[[ test ', + '', + '<![CDATA[hellooooo hello', + '<? Hello World ?>', + '<? Hello World' + ); + foreach ($bogus as $str) { + $events = $this->parse($str); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $str, $events->get(1)); + } + } + + public function testEndTag() + { + $succeed = array( + '</a>' => 'a', + '</test>' => 'test', + '</test + >' => 'test', + '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend', + // See 8.2.4.10, which requires this and does not say error. + '</a<b>' => 'a<b' + ); + $this->isAllGood('endTag', 2, $succeed); + + // Recoverable failures + $fail = array( + '</a class="monkey">' => 'a', + '</a <b>' => 'a', + '</a <b <c>' => 'a', + '</a is the loneliest letter>' => 'a', + '</a' => 'a' + ); + foreach ($fail as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + // Should have triggered an error. + $this->assertEventError($events->get(0)); + // Should have tried to parse anyway. + $this->assertEventEquals('endTag', $result, $events->get(1)); + } + + // BogoComments + $comments = array( + '</>' => '</>', + '</ >' => '</ >', + '</ a>' => '</ a>' + ); + foreach ($comments as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + + // Should have triggered an error. + $this->assertEventError($events->get(0)); + + // Should have tried to parse anyway. + $this->assertEventEquals('comment', $result, $events->get(1)); + } + } + + public function testComment() + { + $good = array( + '<!--easy-->' => 'easy', + '<!-- 1 > 0 -->' => ' 1 > 0 ', + '<!-- --$i -->' => ' --$i ', + '<!----$i-->' => '--$i', + '<!-- 1 > 0 -->' => ' 1 > 0 ', + "<!--\nHello World.\na-->" => "\nHello World.\na", + '<!-- <!-- -->' => ' <!-- ' + ); + foreach ($good as $test => $expected) { + $events = $this->parse($test); + $this->assertEventEquals('comment', $expected, $events->get(0)); + } + + $fail = array( + '<!-->' => '', + '<!--Hello' => 'Hello', + "<!--\0Hello" => UTF8Utils::FFFD . 'Hello', + '<!--' => '' + ); + foreach ($fail as $test => $expected) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $expected, $events->get(1)); + } + } + + public function testCDATASection() + { + $good = array( + '<![CDATA[ This is a test. ' => ' This is a test. ', + 'CDATA' => 'CDATA', + ' ]] > ' => ' ]] > ', + ' ' => ' ' + ); + $this->isAllGood('cdata', 2, $good); + } + + public function testDoctype() + { + $good = array( + '' => array( + 'html', + 0, + null, + false + ), + '' => array( + 'html', + 0, + null, + false + ), + '' => array( + 'html', + 0, + null, + false + ), + "" => array( + 'html', + 0, + null, + false + ), + "" => array( + 'html', + 0, + null, + false + ), + '' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false + ), + "" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false + ), + '' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false + ), + "" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false + ), + '' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false + ), + "" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false + ), + '' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo/bar', + false + ), + "" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false + ) + ); + $this->isAllGood('doctype', 2, $good); + + $bad = array( + '' => array( + null, + EventStack::DOCTYPE_NONE, + null, + true + ), + '' => array( + null, + EventStack::DOCTYPE_NONE, + null, + true + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + + // Can't tell whether these are ids or ID types, since the context is chopped. + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true + ), + + ' array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + true + ), + '' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + true + ) + ); + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . PHP_EOL); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('doctype', $expects, $events->get(1)); + } + } + + public function testProcessorInstruction() + { + $good = array( + '' => 'hph', + '' => array( + 'hph', + 'echo "Hello World"; ' + ), + "" => array( + 'hph', + "echo 'Hello World';\n" + ) + ); + $this->isAllGood('pi', 2, $good); + } + + /** + * This tests just simple tags. + */ + public function testSimpleTags() + { + $open = array( + '' => 'foo', + '' => 'foo', + '' => 'foo', + '' => 'foo', + "" => 'foo', + '' => 'foo:bar' + ); + $this->isAllGood('startTag', 2, $open); + + $selfClose = array( + '' => 'foo', + '' => 'foo', + '' => 'foo', + "" => 'foo', + '' => 'foo:bar' + ); + foreach ($selfClose as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, true)); + $this->assertEventEquals('startTag', $expects, $events->get(0)); + $this->assertEventEquals('endTag', $expects, $events->get(1)); + } + + $bad = array( + ' 'foo', + ' 'foo', + ' 'foo', + ' 'foo' + ); + + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + } + + public function testTagsWithAttributeAndMissingName() + { + $cases = array( + '' => 'id', + '' => 'color', + "" => 'class', + '' => 'bgcolor', + '' => 'class' + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', $expected, $events->get(3)); + $this->assertEventEquals('eof', null, $events->get(4)); + } + } + + public function testTagNotClosedAfterTagName() + { + $cases = array( + "" => array( + 'noscript', + 'img' + ), + '' => array( + 'center', + 'a' + ), + '' => array( + 'br', + 'br' + ) + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected[0], $events->get(1)); + $this->assertEventEquals('startTag', $expected[1], $events->get(2)); + $this->assertEventEquals('eof', null, $events->get(3)); + } + + $events = $this->parse('02'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'span', $events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('text', '>02', $events->get(3)); + $this->assertEventEquals('endTag', 'span', $events->get(4)); + $this->assertEventEquals('eof', null, $events->get(5)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'p', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + $this->assertEventEquals('eof', null, $events->get(3)); + + $events = $this->parse(''); + $this->assertEventEquals('startTag', 'strong', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); + $this->assertEventEquals('endTag', 'strong', $events->get(3)); + $this->assertEventEquals('eof', null, $events->get(4)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', 'src', $events->get(3)); + $this->assertEventEquals('startTag', 'a', $events->get(4)); + $this->assertEventEquals('eof', null, $events->get(5)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('eof', null, $events->get(2)); + } + + public function testIllegalTagNames() + { + $cases = array( + '' => 'li', + '' => 'p', + '' => 'b', + '' => 'static', + '' => 'h', + '' => 'st', + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected, $events->get(1)); + } + } + + /** + * @depends testCharacterReference + */ + public function testTagAttributes() + { + // Opening tags. + $good = array( + '' => array( + 'foo', + array( + 'bar' => 'baz' + ), + false + ), + '' => array( + 'foo', + array( + 'bar' => ' baz ' + ), + false + ), + "" => array( + 'foo', + array( + 'bar' => "\nbaz\n" + ), + false + ), + "" => array( + 'foo', + array( + 'bar' => 'baz' + ), + false + ), + '' => array( + 'foo', + array( + 'bar' => 'A full sentence.' + ), + false + ), + "" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2' + ), + false + ), + "" => array( + 'foo', + array( + 'ns:bar' => 'baz' + ), + false + ), + "" => array( + 'foo', + array( + 'a' => 'blue&red' + ), + false + ), + "" => array( + 'foo', + array( + 'a' => 'blue&&red' + ), + false + ), + "" => array( + 'foo', + array( + 'bar' => 'baz' + ), + false + ), + '' => array( + 'doe', + array( + 'a' => null, + 'deer' => null + ), + false + ), + '' => array( + 'foo', + array( + 'bar' => 'baz' + ), + false + ), + + // Updated for 8.1.2.3 + '' => array( + 'foo', + array( + 'bar' => 'baz' + ), + false + ), + + // The spec allows an unquoted value '/'. This will not be a closing + // tag. + '' => array( + 'foo', + array( + 'bar' => '/' + ), + false + ), + '' => array( + 'foo', + array( + 'bar' => 'baz/' + ), + false + ) + ); + $this->isAllGood('startTag', 2, $good); + + // Self-closing tags. + $withEnd = array( + '' => array( + 'foo', + array( + 'bar' => 'baz' + ), + true + ), + '' => array( + 'foo', + array( + 'bar' => 'baz' + ), + true + ), + '' => array( + 'foo', + array( + 'bar' => 'BAZ' + ), + true + ), + "" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2', + 'c' => '3', + 'd' => null + ), + true + ) + ); + $this->isAllGood('startTag', 3, $withEnd); + + // Cause a parse error. + $bad = array( + // This will emit an entity lookup failure for &red. + "" => array( + 'foo', + array( + 'a' => 'blue&red' + ), + false + ), + "" => array( + 'foo', + array( + 'a' => 'blue&&&red' + ), + false + ), + '' => array( + 'foo', + array( + 'bar' => null + ), + false + ), + '' => array( + 'foo', + array( + 'bar' => 'oh"' + ), + false + ), + + // these attributes are ignored because of current implementation + // of method "DOMElement::setAttribute" + // see issue #23: https://github.com/Masterminds/html5-php/issues/23 + '' => array( + 'foo', + array(), + false + ), + '' => array( + 'foo', + array(), + false + ), + '' => array( + 'foo', + array(), + false + ), + '' => array( + 'foo', + array(), + false + ) + ) + ; + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Cause multiple parse errors. + $reallyBad = array( + '' => array( + 'foo', + array( + '=' => null, + '"bar"' => null + ), + false + ), + '' => array( + 'foo', + array(), + true + ), + // character "&" in unquoted attribute shouldn't cause an infinite loop + '' => array( + 'foo', + array( + 'bar' => 'index.php?str=1&id=29' + ), + false + ) + ); + foreach ($reallyBad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + // $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Regression: Malformed elements should be detected. + // '' => array('foo', array('baz' => '1'), false), + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', array( + 'foo', + array( + 'baz' => '1' + ), + false + ), $events->get(1)); + $this->assertEventEquals('startTag', array( + 'bar', + array(), + false + ), $events->get(2)); + $this->assertEventEquals('endTag', array( + 'foo' + ), $events->get(3)); + } + + public function testRawText() + { + $good = array( + ' ' => 'abcd efg hijk lmnop', + '' => '', + '' => '<<<<<<<<', + '' => 'hello\nhello" => "\nhello&' => '&', + '' => '', + '' => '' + ); + foreach ($good as $test => $expects) { + $events = $this->parse($test); + $this->assertEventEquals('startTag', 'script', $events->get(0)); + $this->assertEventEquals('text', $expects, $events->get(1)); + $this->assertEventEquals('endTag', 'script', $events->get(2)); + } + + $bad = array( + ' + + +
foo bar baz
+ + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $script = $dom->getElementsByTagName('script'); + $r->element($script->item(0)); + $this->assertEquals( + '', stream_get_contents($stream, - 1, 0)); + } + + public function testElementWithStyle() + { + $dom = $this->html5->loadHTML( + ' + + + + + +
foo bar baz
+ + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $style = $dom->getElementsByTagName('style'); + $r->element($style->item(0)); + $this->assertEquals('', stream_get_contents($stream, - 1, 0)); + } + + public function testOpenTag() + { + $dom = $this->html5->loadHTML(' + + +
foo bar baz
+ + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('div'); + $m = $this->getProtectedMethod('openTag'); + $m->invoke($r, $list->item(0)); + $this->assertEquals('
', stream_get_contents($stream, - 1, 0)); + } + + public function testCData() + { + $dom = $this->html5->loadHTML(' + + +
+ + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('div'); + $r->cdata($list->item(0)->childNodes->item(0)); + $this->assertEquals('', stream_get_contents($stream, - 1, 0)); + + $dom = $this->html5->loadHTML(' + + +
+ + '); + + $dom->getElementById('foo')->appendChild(new \DOMCdataSection("]]>Foo<[![CDATA test ]]>")); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + $list = $dom->getElementsByTagName('div'); + $r->cdata($list->item(0)->childNodes->item(0)); + + $this->assertEquals('Foo<[![CDATA test ]]]]>]]>', stream_get_contents($stream, - 1, 0)); + } + + public function testComment() + { + $dom = $this->html5->loadHTML(' + + +
+ + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('div'); + $r->comment($list->item(0)->childNodes->item(0)); + $this->assertEquals('', stream_get_contents($stream, - 1, 0)); + + $dom = $this->html5->loadHTML(' + + +
+ + '); + $dom->getElementById('foo')->appendChild(new \DOMComment(' --> Foo -->')); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('div'); + $r->comment($list->item(0)->childNodes->item(0)); + + // Could not find more definitive guidelines on what this should be. Went with + // what the HTML5 spec says and what \DOMDocument::saveXML() produces. + $this->assertEquals(' --> Foo -->-->', stream_get_contents($stream, - 1, 0)); + } + + public function testText() + { + $dom = $this->html5->loadHTML(' + + + + + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('script'); + $r->text($list->item(0)->childNodes->item(0)); + $this->assertEquals('baz();', stream_get_contents($stream, - 1, 0)); + + $dom = $this->html5->loadHTML(' + + + '); + $foo = $dom->getElementById('foo'); + $foo->appendChild(new \DOMText('')); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $r->text($foo->firstChild); + $this->assertEquals('<script>alert("hi");</script>', stream_get_contents($stream, - 1, 0)); + } + + public function testNl() + { + list ($o, $s) = $this->getOutputRules(); + + $m = $this->getProtectedMethod('nl'); + $m->invoke($o); + $this->assertEquals(PHP_EOL, stream_get_contents($s, - 1, 0)); + } + + public function testWr() + { + list ($o, $s) = $this->getOutputRules(); + + $m = $this->getProtectedMethod('wr'); + $m->invoke($o, 'foo'); + $this->assertEquals('foo', stream_get_contents($s, - 1, 0)); + } + + public function getEncData() + { + return array( + array( + false, + '&\'<>"', + '&\'<>"', + '&'<>"' + ), + array( + false, + 'This + is. a < test', + 'This + is. a < test', + 'This + is. a < test' + ), + array( + false, + '.+#', + '.+#', + '.+#' + ), + + array( + true, + '.+#\'', + '.+#\'', + '.+#'' + ), + array( + true, + '&".<', + '&".<', + '&".<' + ), + array( + true, + '&\'<>"', + '&\'<>"', + '&'<>"' + ), + array( + true, + "\xc2\xa0\"'", + ' "\'', + ' "'' + ) + ); + } + + /** + * Test basic encoding of text. + * @dataProvider getEncData + */ + public function testEnc($isAttribute, $test, $expected, $expectedEncoded) + { + list ($o, $s) = $this->getOutputRules(); + $m = $this->getProtectedMethod('enc'); + + $this->assertEquals($expected, $m->invoke($o, $test, $isAttribute)); + + list ($o, $s) = $this->getOutputRules(array( + 'encode_entities' => true + )); + $m = $this->getProtectedMethod('enc'); + $this->assertEquals($expectedEncoded, $m->invoke($o, $test, $isAttribute)); + } + + /** + * Test basic encoding of text. + * @dataProvider getEncData + */ + public function testEscape($isAttribute, $test, $expected, $expectedEncoded) + { + list ($o, $s) = $this->getOutputRules(); + $m = $this->getProtectedMethod('escape'); + + $this->assertEquals($expected, $m->invoke($o, $test, $isAttribute)); + } + + public function booleanAttributes() + { + return array( + array(''), + array(''), + array(''), + array(''), + array(''), + array(''), + array('
foo
'), + array(''), + ); + } + /** + * @dataProvider booleanAttributes + */ + public function testBooleanAttrs($html) + { + $dom = $this->html5->loadHTML(''.$html.''); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $node = $dom->getElementsByTagName('body')->item(0)->firstChild; + + $m = $this->getProtectedMethod('attrs'); + $m->invoke($r, $node); + + $content = stream_get_contents($stream, - 1, 0); + $this->assertContains($content, $html); + + } + + public function testAttrs() + { + $dom = $this->html5->loadHTML(' + + +
foo bar baz
+ + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('div'); + + $m = $this->getProtectedMethod('attrs'); + $m->invoke($r, $list->item(0)); + + $content = stream_get_contents($stream, - 1, 0); + $this->assertEquals(' id="foo" class="bar baz"', $content); + } + + public function testSvg() + { + $dom = $this->html5->loadHTML( + ' + + +
foo bar baz
+ + + + + + + + + + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('svg'); + $r->element($list->item(0)); + $contents = stream_get_contents($stream, - 1, 0); + $this->assertRegExp('||', $contents); + $this->assertRegExp('||', $contents); + $this->assertRegExp('||', $contents); + } + + public function testMath() + { + $dom = $this->html5->loadHTML( + ' + + +
foo bar baz
+ + x + + ± + + y + + + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('math'); + $r->element($list->item(0)); + $content = stream_get_contents($stream, - 1, 0); + $this->assertRegExp('||', $content); + $this->assertRegExp('||', $content); + } + + public function testProcessorInstruction() + { + $dom = $this->html5->loadHTMLFragment(''); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $r->processorInstruction($dom->firstChild); + $content = stream_get_contents($stream, - 1, 0); + $this->assertRegExp('|<\?foo bar \?>|', $content); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/Serializer/TraverserTest.php b/core/vendor/masterminds/html5/test/HTML5/Serializer/TraverserTest.php new file mode 100644 index 0000000..c914633 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/Serializer/TraverserTest.php @@ -0,0 +1,105 @@ + + + + + Test + + +

This is a test.

+ + '; + + public function setUp() + { + $this->html5 = $this->getInstance(); + } + + /** + * Using reflection we make a protected method accessible for testing. + * + * @param string $name + * The name of the method on the Traverser class to test. + * + * @return \ReflectionMethod \ReflectionMethod for the specified method + */ + public function getProtectedMethod($name) + { + $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\Traverser'); + $method = $class->getMethod($name); + $method->setAccessible(true); + + return $method; + } + + public function getTraverser() + { + $stream = fopen('php://temp', 'w'); + + $dom = $this->html5->loadHTML($this->markup); + $t = new Traverser($dom, $stream, $html5->getOptions()); + + // We return both the traverser and stream so we can pull from it. + return array( + $t, + $stream + ); + } + + public function testConstruct() + { + // The traverser needs a place to write the output to. In our case we + // use a stream in temp space. + $stream = fopen('php://temp', 'w'); + + $html5 = $this->getInstance(); + + $r = new OutputRules($stream, $this->html5->getOptions()); + $dom = $this->html5->loadHTML($this->markup); + + $t = new Traverser($dom, $stream, $r, $html5->getOptions()); + + $this->assertInstanceOf('\Masterminds\HTML5\Serializer\Traverser', $t); + } + + public function testFragment() + { + $html = 'foo
bar
'; + $input = new \Masterminds\HTML5\Parser\StringInputStream($html); + $dom = $this->html5->parseFragment($input); + + $this->assertInstanceOf('\DOMDocumentFragment', $dom); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $out = $t->walk(); + $this->assertEquals($html, stream_get_contents($stream, - 1, 0)); + } + + public function testProcessorInstruction() + { + $html = ''; + $input = new \Masterminds\HTML5\Parser\StringInputStream($html); + $dom = $this->html5->parseFragment($input); + + $this->assertInstanceOf('\DOMDocumentFragment', $dom); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $out = $t->walk(); + $this->assertEquals($html, stream_get_contents($stream, - 1, 0)); + } +} diff --git a/core/vendor/masterminds/html5/test/HTML5/TestCase.php b/core/vendor/masterminds/html5/test/HTML5/TestCase.php new file mode 100644 index 0000000..3cb8645 --- /dev/null +++ b/core/vendor/masterminds/html5/test/HTML5/TestCase.php @@ -0,0 +1,27 @@ +test'; + + const DOC_CLOSE = ''; + + public function testFoo() + { + // Placeholder. Why is PHPUnit emitting warnings about no tests? + } + + public function getInstance(array $options = array()) + { + return new HTML5($options); + } + + protected function wrap($fragment) + { + return self::DOC_OPEN . $fragment . self::DOC_CLOSE; + } +}