Index: includes/common.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/common.inc,v
retrieving revision 1.788
diff -u -p -r1.788 common.inc
--- includes/common.inc	21 Aug 2008 19:36:36 -0000	1.788
+++ includes/common.inc	3 Sep 2008 15:36:53 -0000
@@ -3562,3 +3562,77 @@ function _drupal_flush_css_js() {
   }
   variable_set('css_js_query_string', $new_character . substr($string_history, 0, 19));
 }
+
+/**
+ * Fetch a feed from URL.
+ *
+ * @param $url
+ *   A string containing a fully qualified URI.
+ * @param $modified
+ *   Optional timestamp of last check.
+ * @param $etag
+ *   Optional Etag for the header checks.
+ * @param $md5
+ *   Optional md5 hash of last retrieved feed data.
+ * @return
+ *   An object containing the feed data and headers.
+ */
+function drupal_retrieve_feed($url, $modified = NULL, $etag = NULL, $md5 = NULL) {
+  $feed = new stdClass();
+  $feed->items = array();
+
+  // Generate conditional GET headers.
+  $headers = array();
+  if ($etag) {
+    $headers['If-None-Match'] = $etag;
+  }
+  if ($modified) {
+    $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $modified) . ' GMT';
+  }
+
+  // Request feed.
+  $result = drupal_http_request($url, $headers);
+
+  // Process HTTP response code.
+  switch ($result->code) {
+    case 304:
+      break;
+    case 301:
+      $redirect_url = $result->redirect_url;
+    case 200:
+    case 302:
+    case 307:
+      if (!isset($result->data)) {
+        break;
+      }
+
+      // Allow alternate feed parsing libraries.
+      require_once variable_get('feed_inc', './includes/feed.inc');
+
+      feed_set_data($result->data);
+      feed_set_headers($result->headers);
+
+      // We store the md5 hash of feed data in the database. When refreshing a
+      // feed we compare stored hash and new hash calculated from downloaded
+      // data. If both are equal we say that feed is not updated.
+      $new_md5 = '';
+      if ($md5 != NULL) {
+        $new_md5 = feed_get_hash();
+        if ($new_md5 == $md5) {
+          break;
+        }
+      }
+
+      $feed = feed_parse();
+      if (!isset($feed->error)) {
+        if (isset($redirect_url)) {
+          $feed->redirect_url = $redirect_url;
+        }
+      }
+      break;
+    default:
+      module_invoke('system', 'check_http_request');
+  }
+
+  return $feed;
+}
Index: includes/feed.inc
===================================================================
RCS file: includes/feed.inc
diff -N includes/feed.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ includes/feed.inc	3 Sep 2008 15:36:53 -0000
@@ -0,0 +1,446 @@
+<?php
+// $Id$
+
+/**
+ * @file
+ * API for parsing RSS, RDF and Atom formatted feeds.
+ */
+
+/**
+ * Detects the format of the feed.
+ *
+ * @param $data
+ *   The SimpleXML object of the feed.
+ * @return
+ *   Possible values: rss, atom rdf or if it's not a valid feed, FALSE.
+ */
+function feed_parse_format_detect(SimpleXMLElement $data) {
+  if (is_object($data)) {
+    $attr = $data->attributes();
+    $type = strtolower($data->getName());
+    if (isset($data->entry) || $type == "feed") {
+      return "atom";
+    }
+    if ($type == "rdf") {
+      return "rdf";
+    }
+    if ($type == "rss" && in_array($attr["version"], array('0.91', "0.92", "2.0"))) {
+      return "rss";
+    }
+  }
+  return FALSE;
+}
+
+/**
+ * Parses RSS 2.0, 0.91, 0.92 feeds.
+ * 
+ * @param $data
+ *   The SimpleXML object of the feed.
+ * @return
+ *   Nested associative array. See $feed['items'] for accessing to the items.
+ */
+function feed_parse_rss(SimpleXMLElement $data) {
+  $feed = new stdClass();
+  $feed->channel = array();
+  $dc = $data->channel->children('http://purl.org/dc/elements/1.1/');
+  $feed->channel['TITLE'] = htmlspecialchars_decode(_feed_parse_choose("{$data->channel->title}", "{$dc->title}"));
+  $feed->channel['DESCRIPTION'] = htmlspecialchars_decode(_feed_parse_choose("{$data->channel->description}", "{$dc->subject}"));
+  $feed->channel['LINK'] = isset($data->channel->link) ? "{$data->channel->link}" : "";
+  $feed->image = array();
+  $feed->image['URL'] = isset($data->channel->image->url) ? "{$data->channel->image->url}" : '';
+  $feed->image['LINK'] = isset($data->channel->image->url) ? "{$data->channel->image->link}" : '';
+  $feed->image['TITLE'] = isset($data->channel->image->url) ? "{$data->channel->image->title}" : '';
+  $feed->items = array();
+  $category_splitter = '.';
+  foreach ($data->xpath('//item') as $news) {
+    // Get important namespaces.
+    $content = $news->children('http://purl.org/rss/1.0/modules/content/');
+    $dc = $news->children('http://purl.org/dc/elements/1.1/');
+    $item = array();
+    $item['GUID'] = isset($news->guid) ? "{$news->guid}" : NULL;
+    $item['TITLE'] = htmlspecialchars_decode(_feed_parse_choose("{$news->title}", "{$dc->title}"));
+    $item['DESCRIPTION'] = _feed_parse_choose("{$news->description}", "{$news->encoded}", "{$content->encoded}", "{$dc->description}");
+    $item['LINK'] = _feed_parse_choose("{$news->link}");
+    $item['TIMESTAMP'] = _feed_parse_date("{$news->pubDate}");
+    $item['CATEGORIES'] = array();
+    if (isset($news->category)) {
+      foreach ($news->category as $cat) {
+        if (is_object($cat)) {
+          $item['CATEGORIES'][] = trim(strip_tags("$cat"));
+        }
+        else {
+          foreach (explode($category_splitter, $cat) as $tag) {
+            $item['CATEGORIES'][] = $tag;
+          }
+        }
+      }
+    }
+    $item['CATEGORIES'] = array_unique($item['CATEGORIES']);
+    $item['NAMESPACES'] = feed_parse_extract_namespaces($news, $data->getNamespaces(TRUE));
+    $item['ENCLOSURES'] = feed_parse_extract_enclosures($news);
+    $feed->items[] = $item;
+  }
+  return $feed;
+}
+
+/**
+ * Parses Atom 1.0 feeds.
+ * 
+ * @param $data
+ *   The SimpleXML object of the feed.
+ * @return
+ *   Nested associative array. See $feed['items'] for accessing to the items.
+ */
+function feed_parse_atom(SimpleXMLElement $data) {
+  $feed = new stdClass();
+  $feed->channel = array();
+  $feed->channel['TITLE'] = isset($data->title) ? "{$data->title}" : "";
+  $feed->channel['DESCRIPTION'] = isset($data->subtitle) ? "{$data->subtitle}" : "";
+  $feed->channel['LINK'] = '';
+  if (count($data->link) > 0) {
+    $link = $data->link;
+    $link = $link->attributes();
+    $feed->channel['LINK'] = isset($link["href"]) ? "{$link["href"]}" : "";
+  }
+  $feed->items = array();
+  foreach ($data->entry as $news) {
+    $item = array();
+    $item['GUID'] = !empty($news->id) ? "{$news->id}" : NULL;
+
+    $link_element = "{$news->link['href']}";
+    $link_guid = valid_url($item['GUID']) ? $item['GUID'] : '';
+    $item['LINK'] = _feed_parse_choose($link_element, $link_guid);
+    $item['TITLE'] = "{$news->title}";
+    $body = '';
+    if (!empty($news->content)) {
+      foreach ($news->content->children() as $child)  {
+        $body .= $child->asXML();
+      }
+      $body .= "{$news->content}";
+    }
+    else if (!empty($news->summary)) {
+      foreach ($news->summary->children() as $child)  {
+        $body .= $child->asXML();
+      }
+      $body .= "{$news->summary}";
+    }
+    $item['DESCRIPTION'] = $body;
+    $item['TIMESTAMP'] = _feed_parse_date("{$news->published}");
+    $item['CATEGORIES'] = array();
+    if (isset($news->category)) {
+      foreach ($news->category as $category)
+      $item['CATEGORIES'][] = trim(strip_tags("{$category['term']}"));
+    }
+    $item['CATEGORIES'] = array_unique($item['CATEGORIES']);
+    $item['NAMESPACES'] = feed_parse_extract_namespaces($news, $data->getNamespaces(TRUE));
+    $item['ENCLOSURES'] = feed_parse_extract_enclosures($news);
+    $feed->items[] = $item;
+  }
+  return $feed;
+}
+
+/**
+ * Parses RDF feeds.
+ * 
+ * @param $data
+ *   The SimpleXML object of the feed.
+ * @return
+ *   Nested associative array. See $feed['items'] for accessing to the items.
+ */
+function feed_parse_rdf(SimpleXMLElement $data) {
+  $feed = new stdClass();
+  $feed->channel = array();
+  $feed->channel['TITLE'] = isset($data->channel->title) ? "{$data->channel->title}" : "";
+  $feed->channel['DESCRIPTION'] = isset($data->channel->description) ? "{$data->channel->description}" : "";
+  $feed->channel['LINK'] = isset($data->channel->link) ? "{$data->channel->link}" : "";
+  $namespaces = $data->getNamespaces(TRUE);
+  // Set category splitter (space is for del.icio.us feed).
+  $category_splitter = ' ';
+  $feed->items = array();
+  foreach ($data->item as $news) {
+    // Initialization.
+    $id = $original_url = NULL;
+    $title = $body = '';
+    $categories = array();
+    foreach ($namespaces as $ns_link) {
+      // Get about attribute as guid.
+      foreach ($news->attributes($ns_link) as $name => $value) {
+        if ($name == 'about') {
+          $id = "{$value}";
+        }
+      }
+
+      // Get children for current namespace.
+      if (version_compare(phpversion(), '5.1.2', '<')) {
+        $ns = (array) $news;
+      }
+      else {
+        $ns = (array) $news->children($ns_link);
+      }
+
+      // Title
+      if (!empty($ns['title'])) {
+        $title = "{$ns['title']}";
+      }
+
+      // Description or dc:description
+      if (!empty($ns['description']) && $body == '') {
+        $body = "{$ns['description']}";
+      }
+
+      // Link
+      if (!empty($ns['link'])) {
+        $link = "{$ns['link']}";
+      }
+
+      // content:encoded
+      if (!empty($ns['encoded'])) {
+        $body = "{$ns['encoded']}";
+      }
+
+      $time_in = (empty($ns['pubDate']) ? (empty($ns['date']) ? '' : "{$ns['date']}")  : "{$ns['pubDate']}");
+      $timestamp = _feed_parse_date($time_in);
+
+      // dc:subject
+      if (!empty($ns['subject'])) {
+        // there can be multiple category tags
+        if (is_array($ns['subject'])) {
+          foreach ($ns['subject'] as $cat) {
+            if (is_object($cat)) {
+              $categories[] = trim(strip_tags($cat->asXML()));
+            }
+            else {
+              $categories[] = $cat;
+            }
+          }
+        }
+        else { //or single tag
+          $categories = explode($category_splitter, "{$ns['subject']}");
+        }
+      }
+    }
+    if (empty($original_url) && !empty($id)) {
+      $original_url = $id;
+    }
+    $item = array();
+    $item['TITLE'] = $title;
+    $item['DESCRIPTION'] = $body;
+    $item['TIMESTAMP'] = $timestamp;
+    $item['LINK'] = isset($link) ? $link : '';
+    $item['GUID'] = $id;
+    $item['CATEGORIES'] = $categories;
+    $item['NAMESPACES'] = feed_parse_extract_namespaces($news, $data->getNamespaces(TRUE));
+    $item['ENCLOSURES'] = feed_parse_extract_enclosures($news);
+    $feed->items[] = $item;
+  }
+  return $feed;
+}
+
+/**
+ * Extracts all the namespace-contained information to ->namespaces structure.
+ * 
+ * @param $item
+ *   A SimpleXML object.
+ * @param $namespaces
+ *    Array of namespaces, indexed with the namespace prefix, the value is the namespace URL.
+ * @return
+ *   Namespace items in an array, array($ns_prefix => array('key' => 'value')).
+ */
+function feed_parse_extract_namespaces(SimpleXMLElement $item, $namespaces) {
+  $result = array();
+  foreach ($namespaces as $prefix => $url) {
+    $ns = (array) $item->children($url);
+    if (!(empty($ns) || empty($prefix))) {
+      $result[$prefix] = $ns;
+    }
+  }
+  return $result;
+}
+
+/**
+ * Extracts all enclosures inside an item.
+ * 
+ * @param $item
+ *   A SimpleXML object.
+ * @return
+ *   Enclosures in an array, array('key' => 'value').
+ */
+function feed_parse_extract_enclosures(SimpleXMLElement $item) {
+  $result = array();
+  @$item = simplexml_load_string($item->asXML());
+  $possible_enclosures = $item->xpath("//enclosure") + $item->xpath("//link[@rel='enclosure']");
+  foreach ($possible_enclosures as $enc) {
+    $add_enc = array();
+    foreach ($enc->attributes() as $k => $v) {
+      $add_enc[$k] = "{$v}";
+    }
+    $result[] = $add_enc;
+  }
+  return $result;
+}
+
+/**
+ * Chooses the first argument which is not empty and return with it.
+ */
+function _feed_parse_choose() {
+  $args = func_get_args();
+  foreach ($args as $arg) {
+    if (strlen($arg) > 1) {
+      return is_string($arg) ? trim($arg) : $arg;
+    }
+  }
+  return '';
+}
+
+/**
+ * Parses a date comes from a feed.
+ *
+ * @param $date_str
+ *   The date string in various formats.
+ * @return
+ *   The timestamp of the string or the current time if can't be parsed
+ */
+function _feed_parse_date($date_str) {
+  $parsed_date = strtotime($date_str);
+  if ($parsed_date === FALSE || $parsed_date == -1) {
+    $parsed_date = _feed_parse_w3cdtf($date_str);
+  }
+  return $parsed_date === FALSE ? time() : $parsed_date;
+}
+
+/**
+ * Parse the W3C date/time format, a subset of ISO 8601.
+ *
+ * PHP date parsing functions do not handle this format.
+ * See http://www.w3.org/TR/NOTE-datetime for more information.
+ * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
+ *
+ * @param $date_str
+ *   A string with a potentially W3C DTF date.
+ * @return
+ *   A timestamp if parsed successfully or FALSE if not.
+ */
+function _feed_parse_w3cdtf($date_str) {
+  if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
+    list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
+    // Calculate the epoch for current date assuming GMT.
+    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
+    if ($match[10] != 'Z') { // Z is zulu time, aka GMT
+      list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
+      // Zero out the variables.
+      if (!$tz_hour) {
+        $tz_hour = 0;
+      }
+      if (!$tz_min) {
+        $tz_min = 0;
+      }
+      $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
+      // Is timezone ahead of GMT?  If yes, subtract offset.
+      if ($tz_mod == '+') {
+        $offset_secs *= -1;
+      }
+      $epoch += $offset_secs;
+    }
+    return $epoch;
+  }
+  else {
+    return FALSE;
+  }
+}
+
+
+/**
+ * Set XML feed data to be parsed.
+ *
+ * @param $data
+ *   XML feed data.
+ * @return
+ *   Stored XML feed data.
+ */
+function feed_set_data($data = NULL) {
+  static $static = NULL;
+  if ($data != NULL) {
+    $static = $data;
+  }
+  return $static;
+}
+
+/**
+ * Get stored XML feed data.
+ *
+ * @return
+ *   Stored XML feed data.
+ * @see feed_set_data()
+ */
+function feed_get_data() {
+  return feed_set_data();
+}
+
+/**
+ * Get calculated md5 hash of the feed data.
+ *
+ * @return
+ *   Calculated md5 hash of the feed data.
+ */
+function feed_get_hash() {
+  return md5(feed_get_data());
+}
+
+/**
+ * Store HTTP response headers returned by drupal_http_request().
+ *
+ * @param $headers
+ *   HTTP response headers returned by drupal_http_request().
+ * @return
+ *   Stored HTTP response headers.
+ */
+function feed_set_headers($headers = array()) {
+  static $static = array();
+  if ($headers) {
+    $static = $headers;
+  }
+  return $static;
+}
+
+/**
+ * Get stored HTTP response headers.
+ *
+ * @return
+ *   Stored HTTP response headers.
+ */
+function feed_get_headers() {
+  return feed_set_headers();
+}
+
+/**
+ * Parse stored XML feed data into an object.
+ * 
+ * This function requires that you call feed_set_data() before calling it.
+ *
+ * @return
+ *   FALSE on error, a feed object on success.
+ * @see feed_set_data()
+ */
+function feed_parse() {
+  
+  $data = feed_get_data();
+
+  // Initialize feed object.
+  $feed = new stdClass();
+  
+  // Parse the data.
+  @$data = simplexml_load_string($data);
+  if ($data === FALSE) {
+    $feed->error = t('The downloaded data is not a parsable.');
+    return $feed;
+  }
+  
+  $format = feed_parse_format_detect($data);
+  if ($format == FALSE) {
+    $feed->error = t('The downloaded data is not in a recognizable feed format (ATOM, RSS or RDF).');
+    return $feed;
+  }
+  
+  $feed_handler = 'feed_parse_' . $format;
+  return $feed_handler($data);
+}
Index: modules/aggregator/aggregator.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/aggregator/aggregator.module,v
retrieving revision 1.389
diff -u -p -r1.389 aggregator.module
--- modules/aggregator/aggregator.module	16 Aug 2008 14:48:17 -0000	1.389
+++ modules/aggregator/aggregator.module	3 Sep 2008 15:36:54 -0000
@@ -456,358 +456,89 @@ function aggregator_remove($feed) {
 }
 
 /**
- * Callback function used by the XML parser.
- */
-function aggregator_element_start($parser, $name, $attributes) {
-  global $item, $element, $tag, $items, $channel;
-
-  switch ($name) {
-    case 'IMAGE':
-    case 'TEXTINPUT':
-    case 'CONTENT':
-    case 'SUMMARY':
-    case 'TAGLINE':
-    case 'SUBTITLE':
-    case 'LOGO':
-    case 'INFO':
-      $element = $name;
-      break;
-    case 'ID':
-      if ($element != 'ITEM') {
-        $element = $name;
-      }
-    case 'LINK':
-      if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') {
-        if ($element == 'ITEM') {
-          $items[$item]['LINK'] = $attributes['HREF'];
-        }
-        else {
-          $channel['LINK'] = $attributes['HREF'];
-        }
-      }
-      break;
-    case 'ITEM':
-      $element = $name;
-      $item += 1;
-      break;
-    case 'ENTRY':
-      $element = 'ITEM';
-      $item += 1;
-      break;
-  }
-
-  $tag = $name;
-}
-
-/**
- * Call-back function used by the XML parser.
- */
-function aggregator_element_end($parser, $name) {
-  global $element;
-
-  switch ($name) {
-    case 'IMAGE':
-    case 'TEXTINPUT':
-    case 'ITEM':
-    case 'ENTRY':
-    case 'CONTENT':
-    case 'INFO':
-      $element = '';
-      break;
-    case 'ID':
-      if ($element == 'ID') {
-        $element = '';
-      }
-  }
-}
-
-/**
- * Callback function used by the XML parser.
- */
-function aggregator_element_data($parser, $data) {
-  global $channel, $element, $items, $item, $image, $tag;
-  $items += array($item => array());
-  switch ($element) {
-    case 'ITEM':
-      $items[$item] += array($tag => '');
-      $items[$item][$tag] .= $data;
-      break;
-    case 'IMAGE':
-    case 'LOGO':
-      $image += array($tag => '');
-      $image[$tag] .= $data;
-      break;
-    case 'LINK':
-      if ($data) {
-        $items[$item] += array($tag => '');
-        $items[$item][$tag] .= $data;
-      }
-      break;
-    case 'CONTENT':
-      $items[$item] += array('CONTENT' => '');
-      $items[$item]['CONTENT'] .= $data;
-      break;
-    case 'SUMMARY':
-      $items[$item] += array('SUMMARY' => '');
-      $items[$item]['SUMMARY'] .= $data;
-      break;
-    case 'TAGLINE':
-    case 'SUBTITLE':
-      $channel += array('DESCRIPTION' => '');
-      $channel['DESCRIPTION'] .= $data;
-      break;
-    case 'INFO':
-    case 'ID':
-    case 'TEXTINPUT':
-      // The sub-element is not supported. However, we must recognize
-      // it or its contents will end up in the item array.
-      break;
-    default:
-      $channel += array($tag => '');
-      $channel[$tag] .= $data;
-  }
-}
-
-/**
  * Checks a news feed for new items.
  *
  * @param $feed
  *   An associative array describing the feed to be refreshed.
  */
 function aggregator_refresh($feed) {
-  global $channel, $image;
+  // Retrieve feed.
+  $result = drupal_retrieve_feed($feed['url'], $feed['modified'], $feed['etag'], $feed['hash']);
 
-  // Generate conditional GET headers.
-  $headers = array();
-  if ($feed['etag']) {
-    $headers['If-None-Match'] = $feed['etag'];
-  }
-  if ($feed['modified']) {
-    $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed['modified']) . ' GMT';
-  }
-
-  // Request feed.
-  $result = drupal_http_request($feed['url'], $headers);
-
-  // Process HTTP response code.
-  switch ($result->code) {
-    case 304:
-      db_query('UPDATE {aggregator_feed} SET checked = %d WHERE fid = %d', time(), $feed['fid']);
-      drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title'])));
-      break;
-    case 301:
-      $feed['url'] = $result->redirect_url;
-      // Do not break here.
-    case 200:
-    case 302:
-    case 307:
-      // We store the md5 hash of feed data in the database. When refreshing a
-      // feed we compare stored hash and new hash calculated from downloaded
-      // data. If both are equal we say that feed is not updated.
-      $md5 = md5($result->data);
-      if ($feed['hash'] == $md5) {
-        db_query('UPDATE {aggregator_feed} SET checked = %d WHERE fid = %d', time(), $feed['fid']);
-        drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title'])));
-        break;
-      }
-
-      // Filter the input data.
-      if (aggregator_parse_feed($result->data, $feed)) {
-        $modified = empty($result->headers['Last-Modified']) ? 0 : strtotime($result->headers['Last-Modified']);
-
-        // Prepare the channel data.
-        foreach ($channel as $key => $value) {
-          $channel[$key] = trim($value);
-        }
-
-        // Prepare the image data (if any).
-        foreach ($image as $key => $value) {
-          $image[$key] = trim($value);
-        }
-
-        if (!empty($image['LINK']) && !empty($image['URL']) && !empty($image['TITLE'])) {
-          // TODO: we should really use theme_image() here, but that only works with
-          // local images. It won't work with images fetched with a URL unless PHP version > 5.
-          $image = '<a href="' . check_url($image['LINK']) . '" class="feed-image"><img src="' . check_url($image['URL']) . '" alt="' . check_plain($image['TITLE']) . '" /></a>';
-        }
-        else {
-          $image = NULL;
-        }
-
-        $etag = empty($result->headers['ETag']) ? '' : $result->headers['ETag'];
-        // Update the feed data.
-        db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', hash = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), $channel['LINK'], $channel['DESCRIPTION'], $image, $md5, $etag, $modified, $feed['fid']);
-
-        // Clear the cache.
-        cache_clear_all();
-
-        if (isset($result->redirect_url)) {
-          watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed['title'], '%url' => $feed['url']));
-        }
-
-        watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed['title']));
-        drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed['title'])));
-      }
-      break;
-    default:
-      watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array('%site' => $feed['title'], '%error' => $result->code . ' ' . $result->error), WATCHDOG_WARNING);
-      drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array('%site' => $feed['title'], '%error' => $result->code . ' ' . $result->error)));
-      module_invoke('system', 'check_http_request');
+  if (isset($result->redirect_url)) {
+    $feed['url'] = $result->redirect_url;
   }
-}
 
-/**
- * Parse the W3C date/time format, a subset of ISO 8601.
- *
- * PHP date parsing functions do not handle this format.
- * See http://www.w3.org/TR/NOTE-datetime for more information.
- * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
- *
- * @param $date_str
- *   A string with a potentially W3C DTF date.
- * @return
- *   A timestamp if parsed successfully or FALSE if not.
- */
-function aggregator_parse_w3cdtf($date_str) {
-  if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
-    list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
-    // Calculate the epoch for current date assuming GMT.
-    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
-    if ($match[10] != 'Z') { // Z is zulu time, aka GMT
-      list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
-      // Zero out the variables.
-      if (!$tz_hour) {
-        $tz_hour = 0;
-      }
-      if (!$tz_min) {
-        $tz_min = 0;
-      }
-      $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
-      // Is timezone ahead of GMT?  If yes, subtract offset.
-      if ($tz_mod == '+') {
-        $offset_secs *= -1;
-      }
-      $epoch += $offset_secs;
+  if (count($result->items) > 0) {
+    aggregator_save_items($result->items, $feed);
+    if (!empty($result->image['LINK']) && !empty($result->image['URL']) && !empty($result->image['TITLE'])) {
+      // TODO: we should really use theme_image() here, but that only works with
+      // local images. It won't work with images fetched with a URL unless PHP version > 5.
+      $image = '<a href="' . check_url($result->image['LINK']) . '" class="feed-image"><img src="' . check_url($result->image['URL']) . '" alt="' . check_plain($result->image['TITLE']) . '" /></a>';
+    }
+    else {
+      $image = NULL;
     }
-    return $epoch;
+
+    $headers = feed_get_headers();
+    $etag = empty($headers['ETag']) ? '' : $headers['ETag'];
+    $modified = empty($headers['Last-Modified']) ? 0 : strtotime($headers['Last-Modified']);
+
+    // Update the feed data.
+    db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', hash = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), $result->channel['LINK'], $result->channel['DESCRIPTION'], $image, feed_get_hash(), $etag, $modified, $feed['fid']);
+
+    // Clear the cache.
+    cache_clear_all();
+
+    if (isset($result->redirect_url)) {
+      watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed['title'], '%url' => $feed['url']));
+    }
+
+    watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed['title']));
+    drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed['title'])));
   }
   else {
-    return FALSE;
+    db_query('UPDATE {aggregator_feed} SET checked = %d WHERE fid = %d', time(), $feed['fid']);
+    drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title'])));
+    if (isset($result->error)) {
+      watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array('%site' => $feed['title'], '%error' => $result->code . ' ' . $result->error), WATCHDOG_WARNING);
+      drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array('%site' => $feed['title'], '%error' => $result->code . ' ' . $result->error)));
+    }
   }
 }
 
 /**
- * Parse a feed and store its items.
+ * Store a feed's items.
  *
- * @param $data
- *   The feed data.
+ * @param $items
+ *   An array containing feed items.
  * @param $feed
- *   An associative array describing the feed to be parsed.
+ *   An associative array describing the feed.
  * @return
  *   FALSE on error, TRUE otherwise.
  */
-function aggregator_parse_feed(&$data, $feed) {
-  global $items, $image, $channel;
-
-  // Unset the global variables before we use them.
-  unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);
-  $items = array();
-  $image = array();
-  $channel = array();
-
-  // Parse the data.
-  $xml_parser = drupal_xml_parser_create($data);
-  xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end');
-  xml_set_character_data_handler($xml_parser, 'aggregator_element_data');
-
-  if (!xml_parse($xml_parser, $data, 1)) {
-    watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING);
-    drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error');
-    return FALSE;
-  }
-  xml_parser_free($xml_parser);
-
+function aggregator_save_items($items, $feed) {
   // We reverse the array such that we store the first item last, and the last
   // item first. In the database, the newest item should be at the top.
   $items = array_reverse($items);
 
   // Initialize variables.
-  $title = $link = $author = $description = $guid = NULL;
   foreach ($items as $item) {
-    unset($title, $link, $author, $description, $guid);
-
-    // Prepare the item:
-    foreach ($item as $key => $value) {
-      $item[$key] = trim($value);
-    }
-
-    // Resolve the item's title. If no title is found, we use up to 40
-    // characters of the description ending at a word boundary, but not
-    // splitting potential entities.
-    if (!empty($item['TITLE'])) {
-      $title = $item['TITLE'];
-    }
-    elseif (!empty($item['DESCRIPTION'])) {
-      $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40));
-    }
-    else {
-      $title = '';
-    }
-
-    // Resolve the items link.
-    if (!empty($item['LINK'])) {
-      $link = $item['LINK'];
-    }
-    else {
-      $link = $feed['link'];
-    }
-    $guid = isset($item['GUID']) ? $item['GUID'] : '';
-
-    // Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag.
-    if (!empty($item['CONTENT:ENCODED'])) {
-      $item['DESCRIPTION'] = $item['CONTENT:ENCODED'];
-    }
-    elseif (!empty($item['SUMMARY'])) {
-      $item['DESCRIPTION'] = $item['SUMMARY'];
-    }
-    elseif (!empty($item['CONTENT'])) {
-      $item['DESCRIPTION'] = $item['CONTENT'];
-    }
-
-    // Try to resolve and parse the item's publication date. If no date is
-    // found, use the current date instead.
-    $date = 'now';
-    foreach (array('PUBDATE', 'DC:DATE', 'DCTERMS:ISSUED', 'DCTERMS:CREATED', 'DCTERMS:MODIFIED', 'ISSUED', 'CREATED', 'MODIFIED', 'PUBLISHED', 'UPDATED') as $key) {
-      if (!empty($item[$key])) {
-        $date = $item[$key];
-        break;
-      }
-    }
-
-    $timestamp = strtotime($date); // As of PHP 5.1.0, strtotime returns FALSE on failure instead of -1.
-
-    if ($timestamp <= 0) {
-      $timestamp = aggregator_parse_w3cdtf($date); // Aggregator_parse_w3cdtf() returns FALSE on failure.
-      if (!$timestamp) {
-        // Better than nothing.
-        $timestamp = time();
-      }
-    }
 
     // Save this item. Try to avoid duplicate entries as much as possible. If
     // we find a duplicate entry, we resolve it and pass along its ID is such
     // that we can update it if needed.
-    if (!empty($guid)) {
-      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND guid = '%s'", $feed['fid'], $guid));
+    if (!empty($item['GUID'])) {
+      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND guid = '%s'", $feed['fid'], $item['GUID']));
     }
-    else if ($link && $link != $feed['link'] && $link != $feed['url']) {
-      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND link = '%s'", $feed['fid'], $link));
+    else if ($item['LINK'] && $item['LINK'] != $feed['link'] && $item['LINK'] != $feed['url']) {
+      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND link = '%s'", $feed['fid'], $item['LINK']));
     }
     else {
-      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND title = '%s'", $feed['fid'], $title));
+      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND title = '%s'", $feed['fid'], $item['TITLE']));
     }
     $item += array('AUTHOR' => '', 'DESCRIPTION' => '');
-    aggregator_save_item(array('iid' => (isset($entry->iid) ? $entry->iid:  ''), 'fid' => $feed['fid'], 'timestamp' => $timestamp, 'title' => $title, 'link' => $link, 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION'], 'guid' => $guid));
+    aggregator_save_item(array('iid' => (isset($entry->iid) ? $entry->iid:  ''), 'fid' => $feed['fid'], 'timestamp' => $item['TIMESTAMP'], 'title' => $item['TITLE'], 'link' => $item['LINK'], 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION'], 'guid' => $item['GUID']));
   }
 
   // Remove all items that are older than flush item timer.
