--- modules/node/node.module +++ modules/node/node.module @@ -22,6 +22,12 @@ define('NODE_BUILD_RSS', 4); define('NODE_BUILD_PRINT', 5); +// see node_compare_teaser_and_body() +define('NODE_TEASER_OFF', 0); +define('NODE_TEASER_EQUAL', 1); +define('NODE_TEASER_START', 2); +define('NODE_TEASER_DIFF', 3); + /** * Implementation of hook_help(). */ @@ -292,6 +298,15 @@ * place such as the end of a paragraph, a line break, or the end of a * sentence (in that order of preference). * + * @note + * Note that the function uses strlen(), strpos(), etc. because it doesn't + * affect the content and those functions are faster than the mb_...() functions. + * Remember that when we cut a paragraph, we cut at a space or a tag so we won't + * inadvertendly cut a UTF-8 character. (UTF-8 spaces are not considered as a + * place where we want to cut the paragraph.) However, the $size parameter is + * given in character so the function uses drupal_strlen() to compare the size + * of the characters against $size. + * * @param $body * The content for which a teaser will be generated. * @param $format @@ -302,10 +317,12 @@ * The desired character length of the teaser. If omitted, the default * value will be used. Ignored if the special delimiter is present * in $body. + * @param $teaser_len + * (OUT) The length of the teaser before closing the HTML tags. * @return * The generated teaser. */ -function node_teaser($body, $format = NULL, $size = NULL) { +function node_teaser($body, $format = NULL, $size = NULL, &$teaser_len = NULL) { if (!isset($size)) { $size = variable_get('teaser_length', 600); @@ -316,11 +333,19 @@ // If the size is zero, and there is no delimiter, the entire body is the teaser. if ($size == 0 && $delimiter === FALSE) { + // caller requested length of teaser? + if (isset($teaser_len)) { + $teaser_len = strlen($body); + } return $body; } // If a valid delimiter has been specified, use it to chop off the teaser. if ($delimiter !== FALSE) { + // caller requested length of teaser? + if (isset($teaser_len)) { + $teaser_len = $delimiter; + } return substr($body, 0, $delimiter); } @@ -330,72 +355,216 @@ if (isset($format)) { $filters = filter_list_format($format); if (isset($filters['php/0']) && strpos($body, '') !== FALSE) { + // caller requested length of teaser? + if (isset($teaser_len)) { + $teaser_len = strlen($body); + } return $body; } } // If we have a short body, the entire body is the teaser. if (drupal_strlen($body) <= $size) { + // caller requested length of teaser? + if (isset($teaser_len)) { + $teaser_len = strlen($body); + } return $body; } - // If the delimiter has not been specified, try to split at paragraph or - // sentence boundaries. + $filter_newline = isset($filters['filter/1']); + $len = strlen($body); - // The teaser may not be longer than maximum length specified. Initial slice. - $teaser = truncate_utf8($body, $size); + $p = 0; + $l = 0; + $s = array(); // stack + while ($p < $len && $l < $size) { + $last_tag = FALSE; + $o = strpos($body, '<', $p); + if ($o === FALSE) { + // no more tags till the end + $a = drupal_strlen(substr($body, $p, $len - $p)); // UTF-8 length + $n = $len; + } + else { + // count characters between previous position and + // beginning of tag + $a = drupal_strlen(substr($body, $p, $o - $p)); // UTF-8 length - // Store the actual length of the UTF8 string -- which might not be the same - // as $size. - $max_rpos = strlen($teaser); + ++$o; // skip the '<' + $n = strpos($body, '>', $o); - // How much to cut off the end of the teaser so that it doesn't end in the - // middle of a paragraph, sentence, or word. - // Initialize it to maximum in order to find the minimum. - $min_rpos = $max_rpos; + if ($body[$o] == '/') { + // closing tag, pop the opening tag too + array_pop($s); + } + elseif ($body[$n - 1] != '/') { // skip empty tags + // opening tag, save its name on the stack so we can close it later + $end_name = strpos($body, ' ', $o); + if ($end_name === FALSE || $end_name > $n) { + $end_name = $n; + } + $tag_name = substr($body, $o, $end_name - $o); + switch ($tag_name) { // ignore empty tags that were not properly closed + case 'br': + case 'hr': + case 'img': + case 'input': + break; - // Store the reverse of the teaser. We use strpos on the reversed needle and - // haystack for speed and convenience. - $reversed = strrev($teaser); + default: + $s[] = $tag_name; + $last_tag = TRUE; + break; - // Build an array of arrays of break points grouped by preference. - $break_points = array(); + } + } - // A paragraph near the end of sliced teaser is most preferable. - $break_points[] = array('
' => 0); + // skip the tag now (we assume properly opening/closing tag boundaries!) + if ($n === FALSE) { + // last tag not closed or it wasn't a tag?! + $n = $len; + } + else { + ++$n; // skip the '>' character + } + } - // If no complete paragraph then treat line breaks as paragraphs. - $line_breaks = array('[\n\r]*)+$%', '', $remainder); + // if empty, then $teaser == $body! + return trim($remainder) ? NODE_TEASER_START : NODE_TEASER_EQUAL; + } + + return NODE_TEASER_DIFF; } /** @@ -829,12 +998,13 @@ // module-provided 'teaser' form item). if (!isset($node->teaser)) { if (isset($node->body)) { - $node->teaser = node_teaser($node->body, isset($node->format) ? $node->format : NULL); + $teaser_len = -1; + $node->teaser = node_teaser($node->body, isset($node->format) ? $node->format : NULL, NULL, $teaser_len); // Chop off the teaser from the body if needed. The teaser_include // property might not be set (eg. in Blog API postings), so only act on // it, if it was set with a given value. - if (isset($node->teaser_include) && !$node->teaser_include && $node->teaser == substr($node->body, 0, strlen($node->teaser))) { - $node->body = substr($node->body, strlen($node->teaser)); + if (isset($node->teaser_include) && !$node->teaser_include && node_compare_teaser_and_body($node) != NODE_TEASER_DIFF && $teaser_len > 0) { + $node->body = substr($node->body, $teaser_len); } } else { @@ -1036,7 +1206,7 @@ // First we'll overwrite the existing node teaser and body with // the filtered copies! Then, we'll stick those into the content // array and set the read more flag if appropriate. - $node->readmore = $node->teaser != $node->body; + $node->readmore = node_compare_teaser_and_body($node) != NODE_TEASER_EQUAL; if ($teaser == FALSE) { $node->body = check_markup($node->body, $node->format, FALSE);