? .cache
? .project
? .projectOptions
? files
? htmlcorrector_3.patch
? profiles/single_user_blog
? sites/all/modules
? sites/default/settings.php
Index: CHANGELOG.txt
===================================================================
RCS file: /cvs/drupal/drupal/CHANGELOG.txt,v
retrieving revision 1.199
diff -u -r1.199 CHANGELOG.txt
--- CHANGELOG.txt	17 May 2007 20:57:19 -0000	1.199
+++ CHANGELOG.txt	18 May 2007 16:45:13 -0000
@@ -39,6 +39,9 @@
     * Added form to provide initial site information during installation.
     * Added ability to provide extra installation steps programmatically.
     * Made it possible to import interface translations at install time.
+- Added the HTML corrector filter:
+    * Fixes faulty and chopped off HTML in postings. 
+    * Tags are now automatically closed at the end of the teaser.
 
 Drupal 5.0, 2007-01-15
 ----------------------
Index: modules/filter/filter.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/filter/filter.module,v
retrieving revision 1.173
diff -u -r1.173 filter.module
--- modules/filter/filter.module	14 May 2007 13:43:35 -0000	1.173
+++ modules/filter/filter.module	18 May 2007 16:40:05 -0000
@@ -955,7 +955,7 @@
 function filter_filter($op, $delta = 0, $format = -1, $text = '') {
   switch ($op) {
     case 'list':
-      return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'));
+      return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector'));
 
     case 'description':
       switch ($delta) {
@@ -965,6 +965,8 @@
           return t('Converts line breaks into HTML (i.e. &lt;br&gt; and &lt;p&gt; tags).');
         case 2:
           return t('Turns web and e-mail addresses into clickable links.');
+        case 3:
+          return t('Corrects faulty and chopped off HTML in postings.');
         default:
           return;
       }
@@ -977,6 +979,8 @@
           return _filter_autop($text);
         case 2:
           return _filter_url($text, $format);
+        case 3:
+          return _filter_htmlcorrector($text);
         default:
           return $text;
       }
@@ -1099,6 +1103,80 @@
 }
 
 /**
+ * Scan input and make sure that all HTML tags are properly closed and nested.
+ */
+function _filter_htmlcorrector($text) {
+  // Prepare tag lists.
+  static $no_nesting, $single_use;
+  if (!isset($no_nesting)) {
+    // Tags which cannot be nested but are typically left unclosed.
+    $no_nesting = drupal_map_assoc(array('li', 'p'));
+
+    // Single use tags in HTML4
+    $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
+  }
+
+  // Properly entify angles.
+  $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);
+
+  // Split tags from text.
+  $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+  // Note: PHP ensures the array consists of alternating delimiters and literals
+  // and begins and ends with a literal (inserting $null as required).
+
+  $tag = false; // Odd/even counter. Tag or no tag.
+  $stack = array();
+  $output = '';
+  foreach ($split as $value) {
+    // Process HTML tags.
+    if ($tag) {
+      list($tagname) = explode(' ', strtolower($value), 2);
+      // Closing tag
+      if ($tagname{0} == '/') {
+        $tagname = substr($tagname, 1);
+        // Discard XHTML closing tags for single use tags.
+        if (!isset($single_use[$tagname])) {
+          // See if we possibly have a matching opening tag on the stack.
+          if (in_array($tagname, $stack)) {
+            // Close other tags lingering first.
+            do {
+              $output .= '</'. $stack[0] .'>';
+            } while (array_shift($stack) != $tagname);
+          }
+          // Otherwise, discard it.
+        }
+      }
+      // Opening tag
+      else {
+        // See if we have an identical 'no nesting' tag already open and close it if found.
+        if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
+          $output .= '</'. array_shift($stack) .'>';
+        }
+        // Push non-single-use tags onto the stack
+        if (!isset($single_use[$tagname])) {
+          array_unshift($stack, $tagname);
+        }
+        // Add trailing slash to single-use tags as per X(HT)ML.
+        else {
+          $value = rtrim($value, ' /') . ' /';
+        }
+        $output .= '<'. $value .'>';
+      }
+    }
+    else {
+      // Passthrough all text.
+      $output .= $value;
+    }
+    $tag = !$tag;
+  }
+  // Close remaining tags.
+  while (count($stack) > 0) {
+    $output .= '</'. array_shift($stack) .'>';
+  }
+  return $output;
+}
+
+/**
  * Make links out of absolute URLs.
  */
 function _filter_url_parse_full_links($match) {
Index: modules/system/system.install
===================================================================
RCS file: /cvs/drupal/drupal/modules/system/system.install,v
retrieving revision 1.111
diff -u -r1.111 system.install
--- modules/system/system.install	17 May 2007 07:28:42 -0000	1.111
+++ modules/system/system.install	18 May 2007 16:40:05 -0000
@@ -1209,12 +1209,16 @@
   db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 0, 1)");
   // Line break filter.
   db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 1, 2)");
+  // HTML corrector filter.
+  db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)");
 
   // Full HTML:
   // URL filter.
   db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 2, 0)");
   // Line break filter.
   db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 1, 1)");
+  // HTML corrector filter.
+  db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)");
 
   db_query("INSERT INTO {variable} (name,value) VALUES ('filter_html_1','i:1;')");
 
@@ -4033,6 +4037,30 @@
 }
 
 /**
+ * Add HTML corrector to HTML formats or replace the old module if it was in use.
+ */
+function system_update_6017() {
+  $ret = array();
+
+  // Disable htmlcorrector.module, if it exists and replace its filter.
+  if (module_exists('htmlcorrector')) {
+    module_disable(array('htmlcorrector'));
+    $ret[] = update_sql("UPDATE {filter_formats} SET module = 'filter', delta = 3 WHERE module = 'htmlcorrector'");
+    $ret[] = t('HTML Corrector module was disabled; this functionality has now been added to core.');
+    return $ret;
+  }
+
+  // Otherwise, find any format with 'HTML' in its name and add the filter at the end.
+  $result = db_query("SELECT format FROM {filter_formats} WHERE name LIKE '%HTML%'");
+  while ($format = db_fetch_object($result)) {
+    $weight = db_result(db_query("SELECT MAX(weight) FROM {filters} WHERE format = %d", $format->format));
+    db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (%d, '%s', %d, %d)", $format->format, 'filter', 3, max(10, $weight + 1));
+  }
+
+  return $ret;
+}
+
+/**
  * @} End of "defgroup updates-5.x-to-6.x"
  * The next series of updates should start at 7000.
  */
