Index: node.module =================================================================== --- node.module (revision 97) +++ node.module (working copy) @@ -188,10 +188,10 @@ foreach ($breakpoints as $point => $charnum) { if ($length = strpos($body, $point, $size)) { - return substr($body, 0, $length + $charnum); + return _node_htmlcorrector_process(substr($body, 0, $length + $charnum)); } } // If all else fails, we simply truncate the string. - return truncate_utf8($body, $size); + return _node_htmlcorrector_process(truncate_utf8($body, $size)); } @@ -2470,3 +2470,72 @@ */ +/* + * Utility for ensuring closed tags in auto-generated teasers + * + */ + +function _node_htmlcorrector_process($text) { + // Tags which cannot be nested but are typically left unclosed. + $nonesting = array('li', 'p'); + + // Single use tags in HTML4 + $singleuse = array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'); + + // Properly entify angles + $text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text); + // Splits tags from text + $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // Note: PHP ensures the array consists of alternating delimiters and literals + // and begins and ends with a literal (inserting $null as required). + + $tag = false; // Odd/even counter. Tag or no tag. + $stack = array(); + $output = ''; + foreach ($split as $value) { + // HTML tag + if ($tag) { + list($tagname) = explode(' ', strtolower($value), 2); + // Closing tag + if ($tagname{0} == '/') { + $tagname = substr($tagname, 1); + if (!in_array($tagname, $singleuse)) { + // See if we have other tags lingering first, and close them + while (($stack[0] != $tagname) && count($stack)) { + $output .= ''; + } + // If the tag was not found, just leave it out; + if (count($stack)) { + $output .= ''; + } + } + } + // Opening tag + else { + // See if we have an identical tag already open and close it if desired. + if (count($stack) && ($stack[0] == $tagname) && in_array($stack[0], $nonesting)) { + $output .= ''; + } + // Push non-single-use tags onto the stack + if (!in_array($tagname, $singleuse)) { + array_unshift($stack, $tagname); + } + // Add trailing slash to single-use tags as per X(HT)ML. + else { + $value = rtrim($value, ' /') . ' /'; + } + $output .= '<'. $value .'>'; + } + } + else { + // Passthrough + $output .= $value; + } + $tag = !$tag; + } + // Close remaining tags + while (count($stack) > 0) { + $output .= ''; + } + return $output; +}