loadHTML($html); // the body tag node, our html fragment is automatically wrapped in // a etc... skeleton which we will strip later $this->startNode = $dom->getElementsByTagName("body")->item(0); $this->limit = $limit; $this->ellipsis = $ellipsis; $this->charCount = 0; $this->wordCount = 0; $this->foundBreakpoint = false; return $dom; } public function truncateChars($html, $limit, $ellipsis = '...') { if($limit <= 0 || $limit >= strlen(strip_tags($html))) { return $html; } $dom = $this->init($html, $limit, $ellipsis); $this->domNodeTruncateChars($this->startNode); //pass the body node on to be processed //hack to remove the html skeleton that is added, unfortunately this can't be avoided unless php > 5.3 return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $dom->saveHTML()); } public function truncateWords($html, $limit, $ellipsis = '...') { if($limit <= 0 || $limit >= $this->countWords(strip_tags($html))) { return $html; } $dom = $this->init($html, $limit, $ellipsis); //pass the body node on to be processed $this->domNodeTruncateWords($this->startNode); //hack to remove the html skeleton that is added, unfortunately this can't be avoided unless php > 5.3 return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $dom->saveHTML()); } private function domNodeTruncateChars(\DOMNode $domNode) { foreach ($domNode->childNodes as $node) { if ($this->foundBreakpoint == TRUE) { return; } if ($node->hasChildNodes()) { $this->domNodeTruncateChars($node); } else { if(($this->charCount + strlen($node->nodeValue)) >= $this->limit) { //we have found our end point $node->nodeValue = substr($node->nodeValue, 0, $this->limit - $this->charCount); $this->removeProceedingNodes($node); $this->insertEllipsis($node); $this->foundBreakpoint = true; return; } else { $this->charCount += strlen($node->nodeValue); } } } } private function domNodeTruncateWords(\DOMNode $domNode) { foreach ($domNode->childNodes as $node) { if($this->foundBreakpoint == true) { return; } if($node->hasChildNodes()) { $this->domNodeTruncateWords($node); } else { $curWordCount = $this->countWords($node->nodeValue); if(($this->wordCount + $curWordCount) >= $this->limit) { //we have found our end point if($curWordCount > 1 && ($this->limit - $this->wordCount) < $curWordCount) { $words = preg_split("/[\n\r\t ]+/", $node->nodeValue, ($this->limit - $this->wordCount) + 1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_OFFSET_CAPTURE); end($words); $last_word = prev($words); $node->nodeValue = substr($node->nodeValue, 0, $last_word[1] + strlen($last_word[0])); } $this->removeProceedingNodes($node); $this->insertEllipsis($node); $this->foundBreakpoint = true; return; } else { $this->wordCount += $curWordCount; } } } } private function removeProceedingNodes(\DOMNode $domNode) { $nextNode = $domNode->nextSibling; if($nextNode !== NULL) { $this->removeProceedingNodes($nextNode); $domNode->parentNode->removeChild($nextNode); } else { //scan upwards till we find a sibling $curNode = $domNode->parentNode; while($curNode !== $this->startNode) { if($curNode->nextSibling !== NULL) { $curNode = $curNode->nextSibling; $this->removeProceedingNodes($curNode); $curNode->parentNode->removeChild($curNode); break; } $curNode = $curNode->parentNode; } } } private function insertEllipsis(\DOMNode $domNode) { //html tags to avoid appending the ellipsis to $avoid = array('a', 'strong', 'em', 'h1', 'h2', 'h3', 'h4', 'h5'); if( in_array($domNode->parentNode->nodeName, $avoid) && ($domNode->parentNode->parentNode !== NULL || $domNode->parentNode->parentNode !== $this->startNode)) { // Append as text node to parent instead $textNode = new \DOMText($this->ellipsis); if($domNode->parentNode->parentNode->nextSibling) { $domNode->parentNode->parentNode->insertBefore($textNode, $domNode->parentNode->parentNode->nextSibling); } else { $domNode->parentNode->parentNode->appendChild($textNode); } } else { // Append to current node $domNode->nodeValue = rtrim($domNode->nodeValue). $this->ellipsis; } } private function countWords($text) { $words = preg_split("/[\n\r\t ]+/", $text, -1, PREG_SPLIT_NO_EMPTY); return count($words); } }