Index: modules/aggregator.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/aggregator.module,v
retrieving revision 1.209
diff -u -F^f -r1.209 aggregator.module
--- modules/aggregator.module	14 Sep 2004 01:58:00 -0000	1.209
+++ modules/aggregator.module	15 Sep 2004 22:35:30 -0000
@@ -275,66 +275,9 @@ function aggregator_remove($feed) {
 }
 
 /**
- * Call-back function used by the XML parser.
- */
-function aggregator_element_start($parser, $name, $attributes) {
-  global $item, $element, $tag;
-
-  switch ($name) {
-    case 'IMAGE':
-    case 'TEXTINPUT':
-      $element = $name;
-      break;
-    case 'ITEM':
-      $element = $name;
-      $item += 1;
-  }
-
-  $tag = $name;
-}
-
-/**
- * Call-back function used by the XML parser.
- */
-function aggregator_element_end($parser, $name) {
-  global $element;
-
-  switch ($name) {
-    case 'IMAGE':
-    case 'TEXTINPUT':
-    case 'ITEM':
-      $element = '';
-  }
-}
-
-/**
- * Call-back function used by the XML parser.
- */
-function aggregator_element_data($parser, $data) {
-  global $channel, $element, $items, $item, $image, $tag;
-
-  switch ($element) {
-    case 'ITEM':
-      $items[$item][$tag] .= $data;
-      break;
-    case 'IMAGE':
-      $image[$tag] .= $data;
-      break;
-    case 'TEXTINPUT':
-      // The sub-element is not supported. However, we must recognize
-      // it or its contents will end up in the item array.
-      break;
-    default:
-      $channel[$tag] .= $data;
-  }
-}
-
-/**
  * Checks a news feed for new items.
  */
 function aggregator_refresh($feed) {
-  global $channel, $image;
-
   // Generate conditional GET headers.
   $headers = array();
   if ($feed['etag']) {
@@ -360,37 +303,8 @@ function aggregator_refresh($feed) {
     case 302:
     case 307:
       // Filter the input data:
-     if (aggregator_parse_feed($result->data, $feed)) {
-
-        if ($result->headers['Last-Modified']) {
-          $modified = strtotime($result->headers['Last-Modified']);
-        }
-
-        /*
-        ** Prepare the image data (if any):
-        */
-
-        foreach ($image as $key => $value) {
-          $image[$key] = trim($value);
-        }
-
-        if ($image['LINK'] && $image['URL'] && $image['TITLE']) {
-          $image = '<a href="'. $image['LINK'] .'"><img src="'. $image['URL'] .'" alt="'. $image['TITLE'] .'" /></a>';
-        }
-        else {
-          $image = NULL;
-        }
-
-        /*
-        ** Update the feed data:
-        */
-
-        db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), strip_tags($channel['LINK']), strip_tags($channel['DESCRIPTION']), $image, $result->headers['ETag'], $modified, $feed['fid']);
-
-        /*
-        ** Clear the cache:
-        */
-
+      if (aggregator_update_feed($result->data, $feed, $result->headers['Last-Modified'])) {
+        // Clear the cache:
         cache_clear_all();
 
         watchdog('regular', t('Aggregator: syndicated content from %site.', array('%site' => '<em>'. $feed[title] .'</em>')));
@@ -440,40 +354,29 @@ function aggregator_parse_w3cdtf($date_s
   }
 }
 
-function aggregator_parse_feed(&$data, $feed) {
-  global $items, $image, $channel;
+function aggregator_update_feed(&$data, $feed, $modified) {
+  include_once 'includes/rssparse.inc';
+
+  // parse the feed
+  $rss = rssparse_parse_feed($data);
 
-  // Unset the global variables before we use them:
-  unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);
-  $items = array();
-  $image = array();
-  $channel = array();
-
-  // parse the data:
-  $xml_parser = drupal_xml_parser_create($data);
-  xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end');
-  xml_set_character_data_handler($xml_parser, 'aggregator_element_data');
-
-  if (!xml_parse($xml_parser, $data, 1)) {
-    watchdog('error', t('Aggregator: failed to parse RSS feed %site: %error at line %line.', array('%site' => '<em>'. $feed['title'] .'</em>', '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))));
-    drupal_set_message(t('Failed to parse RSS feed %site: %error at line %line.', array('%site' => '<em>'. $feed['title'] .'</em>', '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error');
+  // Check if there was an error
+  if ($rss['error']) {
+    watchdog('error', t('Aggregator: failed to parse RSS feed %site: %error', array('%site' => '<em>'. $feed['title'] .'</em>', '%error' => $rss['error'])));
+    drupal_set_message(t('Failed to parse RSS feed %site: %error', array('%site' => '<em>'. $feed['title'] .'</em>', '%error' => $rss['error'])), 'error');
     return 0;
   }
-  xml_parser_free($xml_parser);
 
   // initialize the translation table:
   $tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
   $tt['&apos;'] = "'";
 
-  /*
-  ** We reverse the array such that we store the first item last,
-  ** and the last item first.  In the database, the newest item
-  ** should be at the top.
-  */
+  // We reverse the array such that we store the first item last,
+  // and the last item first.  In the database, the newest item
+  // should be at the top.
+  $rss['items'] = array_reverse($rss['items']);
 
-  $items = array_reverse($items);
-
-  foreach ($items as $item) {
+  foreach ($rss['items'] as $item) {
     unset($title, $link, $author, $description);
 
     // Prepare the item:
@@ -486,12 +389,9 @@ function aggregator_parse_feed(&$data, $
       $item[$key] = $value;
     }
 
-    /*
-    ** Resolve the item's title.  If no title is found, we use
-    ** up to 40 characters of the description ending at a word
-    ** boundary but not splitting potential entities.
-    */
-
+    // Resolve the item's title.  If no title is found, we use
+    // up to 40 characters of the description ending at a word
+    // boundary but not splitting potential entities.
     if ($item['TITLE']) {
       $title = $item['TITLE'];
     }
@@ -499,10 +399,7 @@ function aggregator_parse_feed(&$data, $
       $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40));
     }
 
-    /*
-    ** Resolve the items link.
-    */
-
+    // Resolve the items link.
     if ($item['LINK']) {
       $link = $item['LINK'];
     }
@@ -513,11 +410,8 @@ function aggregator_parse_feed(&$data, $
       $link = $feed['link'];
     }
 
-    /*
-    ** Try to resolve and parse the item's publication date.  If no
-    ** date is found, we use the current date instead.
-    */
-
+    // Try to resolve and parse the item's publication date.  If no
+    // date is found, we use the current date instead.
     if ($item['PUBDATE']) $date = $item['PUBDATE'];                        // RSS 2.0
     else if ($item['DC:DATE']) $date = $item['DC:DATE'];                   // Dublin core
     else if ($item['DCTERMS:ISSUED']) $date = $item['DCTERMS:ISSUED'];     // Dublin core
@@ -533,12 +427,9 @@ function aggregator_parse_feed(&$data, $
       }
     }
 
-    /*
-    ** Save this item.  Try to avoid duplicate entries as much as
-    ** possible.  If we find a duplicate entry, we resolve it and
-    ** pass along it's ID such that we can update it if needed.
-    */
-
+    // Save this item.  Try to avoid duplicate entries as much as
+    // possible.  If we find a duplicate entry, we resolve it and
+    // pass along it's ID such that we can update it if needed.
     if ($link && $link != $feed['link'] && $link != $feed['url']) {
       $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND link = '%s'", $feed['fid'], $link));
     }
@@ -554,22 +445,41 @@ function aggregator_parse_feed(&$data, $
     }
   }
 
-  /*
-  ** Remove all items that are older than 3 months:
-  */
-
+  // Remove all items that are older than 3 months:
   $age = time() - 8035200; // 60 * 60 * 24 * 31 * 3
   $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d AND timestamp < %d', $feed['fid'], $age);
-
   if (db_num_rows($result)) {
     $items = array();
     while ($item = db_fetch_object($result)) {
       $items[] = $item->iid;
     }
-    db_query('DELETE FROM {aggregator_category_item} WHERE iid IN ('. implode(', ', $items) .')');
+    db_query('DELETE FROM {aggregator_category_item} WHERE iid IN ('. implode(', ', $items) .') ');
     db_query('DELETE FROM {aggregator_item} WHERE fid = %d AND timestamp < %d', $feed['fid'], $age);
   }
 
+  // Get a last modified timestamp
+  if ($modified) {
+    $modified = strtotime($modified);
+  }
+  else {
+    $modified = NULL;
+  }
+
+  // Prepare the image data (if any):
+  foreach ($rss['image'] as $key => $value) {
+    $rss['image'][$key] = trim($value);
+  }
+
+  if ($rss['image']['LINK'] && $rss['image']['URL'] && $rss['image']['TITLE']) {
+    $image = '<a href="'. $rss['image']['LINK'] .'"><img src="'. $rss['image']['URL'] .'" alt="'. $rss['image']['TITLE'] .'" /></a>';
+  }
+  else {
+    $image = NULL;
+  }
+
+  // Update the feed data:
+  db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), strip_tags($rss['channel']['LINK']), strip_tags($rss['channel']['DESCRIPTION']), $image, $result->headers['ETag'], $modified, $feed['fid']);
+
   return 1;
 }
 
