diff --git a/libraries/http_request.inc b/libraries/http_request.inc index ace2fb0..04112e7 100644 --- a/libraries/http_request.inc +++ b/libraries/http_request.inc @@ -113,31 +113,33 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $last_headers = array_change_key_case($last_result->headers); if (!empty($last_headers['etag'])) { - if ($curl) { - $headers[] = 'If-None-Match: ' . $last_headers['etag']; - } - else { - $headers['If-None-Match'] = $last_headers['etag']; - } + $headers['If-None-Match'] = $last_headers['etag']; } if (!empty($last_headers['last-modified'])) { - if ($curl) { - $headers[] = 'If-Modified-Since: ' . $last_headers['last-modified']; - } - else { - $headers['If-Modified-Since'] = $last_headers['last-modified']; - } + $headers['If-Modified-Since'] = $last_headers['last-modified']; } if (!empty($username) && !$curl) { $headers['Authorization'] = 'Basic ' . base64_encode("$username:$password"); } } + + // If we are inside a test case, make sure requests maintain the test UA. + global $db_prefix; + if (is_string($db_prefix) && preg_match("/(simpletest\d+)$/", $db_prefix, $matches)) { + $headers['User-Agent'] = drupal_generate_test_ua($matches[1]); + } + + $headers += array('User-Agent' => 'Drupal (+http://drupal.org/)'); // Support the 'feed' and 'webcal' schemes by converting them into 'http'. $url = strtr($url, array('feed://' => 'http://', 'webcal://' => 'http://')); if ($curl) { - $headers[] = 'User-Agent: Drupal (+http://drupal.org/)'; + $flat_headers = array(); + foreach ($headers as $name => $value) { + $flat_headers[] = "$name: $value"; + } + $result = new stdClass(); // Parse the URL and make sure we can handle the schema. @@ -169,7 +171,7 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva curl_setopt($download, CURLOPT_USERPWD, "{$username}:{$password}"); curl_setopt($download, CURLOPT_HTTPAUTH, CURLAUTH_ANY); } - curl_setopt($download, CURLOPT_HTTPHEADER, $headers); + curl_setopt($download, CURLOPT_HTTPHEADER, $flat_headers); curl_setopt($download, CURLOPT_HEADER, TRUE); curl_setopt($download, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($download, CURLOPT_ENCODING, ''); @@ -195,9 +197,13 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $headers = preg_split("/(\r\n){2}/", $header); $header_lines = preg_split("/\r\n|\n|\r/", end($headers)); $result->headers = array(); - array_shift($header_lines); // skip HTTP response status while ($line = trim(array_shift($header_lines))) { + if (preg_match('~^HTTP/1\.\d (\d{3})~', $line, $matches) && !isset($result->code)) { + // Record the first response code encountered. + $result->code = (int) $matches[1]; + continue; + } list($header, $value) = explode(':', $line, 2); // Normalize the headers. $header = strtolower($header); @@ -211,7 +217,10 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $result->headers[$header] = trim($value); } } - $result->code = curl_getinfo($download, CURLINFO_HTTP_CODE); + if (isset($result->headers['Location'])) { + $result->redirect_code = curl_getinfo($download, CURLINFO_HTTP_CODE); + $result->redirect_url = curl_getinfo($download, CURLINFO_EFFECTIVE_URL); + } curl_close($download); } @@ -223,8 +232,7 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $result->code = isset($result->code) ? $result->code : 200; // In case of 304 Not Modified try to return cached data. - if ($result->code == 304) { - + if ($result->code == 304 || (isset($result->redirect_code) && $result->redirect_code == 304)) { if (isset($last_result)) { $last_result->from_cache = TRUE; return $last_result; diff --git a/plugins/FeedsHTTPFetcher.inc b/plugins/FeedsHTTPFetcher.inc index 4e7afc7..61bb234 100644 --- a/plugins/FeedsHTTPFetcher.inc +++ b/plugins/FeedsHTTPFetcher.inc @@ -29,7 +29,32 @@ class FeedsHTTPFetcherResult extends FeedsFetcherResult { public function getRaw() { feeds_include_library('http_request.inc', 'http_request'); $result = http_request_get($this->url, NULL, NULL, NULL, $this->timeout); - if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206))) { + if ($result->code == 301) { + // Save new feed location to prevent further redirects. + if (!empty($this->feed_nid) && ($importer_id = feeds_get_importer_id(db_result(db_query("SELECT type FROM {node} WHERE nid = %d", $this->feed_nid))))) { + $src = feeds_source($importer_id, $this->feed_nid); + $config = $src->getConfig(); + $config[key($config)]['source'] = $result->redirect_url; + $src->setConfig($config); + $src->save(); + } + watchdog('feeds', 'Feed URL updated to @redirect_url due to permanent redirect. Old URL was @old_url.', array('@redirect_url' => $result->redirect_url, '@old_url' => $this->url), WATCHDOG_WARNING, l(t('View'), 'node/'. $this->feed_nid)); + $result->code = $result->redirect_code; + $this->url = $result->redirect_url; + } + elseif ($result->code == 302 || $result->code == 307) { + watchdog('feeds', 'Feed temporarily redirected to @redirect_url.', array('@redirect_url' => $result->redirect_url), WATCHDOG_WARNING, l(t('View'), 'node/'. $this->feed_nid)); + $result->code = $result->redirect_code; + $this->url = $result->redirect_url; + } + if ($result->code >= 400 && $result->code < 500) { + // Unpublish feeds that end in a bad request. We can't do this via + // node_load and node_save, however, because feeds_nodeapi with $op ==' + // 'update' expects $node to come from a form, which will set 'feeds'. + // Instead, just modify the status field directly in the database. + db_query("UPDATE {node} SET status = 0 WHERE nid = %d", $this->feed_nid); + } + if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206, 304))) { throw new Exception(t('Download of @url failed with code !code.', array('@url' => $this->url, '!code' => $result->code))); } return $this->sanitizeRaw($result->data); diff --git a/tests/feeds.test b/tests/feeds.test index 4bcb20c..57c616c 100644 --- a/tests/feeds.test +++ b/tests/feeds.test @@ -239,7 +239,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { * @return * The node id of the node created. */ - public function createFeedNode($id = 'syndication', $feed_url = NULL, $title = '', $content_type = NULL) { + public function createFeedNode($id = 'syndication', $feed_url = NULL, $title = '', $content_type = NULL, $assert_url = NULL) { if (empty($feed_url)) { $feed_url = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds') . '/tests/feeds/developmentseed.rss2'; } @@ -280,7 +280,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { ->fields('s', array('config')) ->execute()->fetchObject(); $config = unserialize($source->config); - $this->assertEqual($config['FeedsHTTPFetcher']['source'], $feed_url, t('URL in DB correct.')); + $this->assertEqual($config['FeedsHTTPFetcher']['source'], isset($assert_url) ? $assert_url : $feed_url, t('URL in DB correct.')); return $nid; } @@ -306,7 +306,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { // Check that the URL was updated in the feeds_source table. $source = db_query("SELECT * FROM {feeds_source} WHERE feed_nid = :nid", array(':nid' => $nid))->fetchObject(); $config = unserialize($source->config); - $this->assertEqual($config['FeedsHTTPFetcher']['source'], $feed_url, t('URL in DB correct.')); + $this->assertEqual($config['FeedsHTTPFetcher']['source'], isset($assert_url) ? $assert_url : $feed_url, t('URL in DB correct.')); } /** @@ -340,7 +340,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { $this->assertEqual(1, db_query("SELECT COUNT(*) FROM {feeds_source} WHERE id = :id AND feed_nid = 0", array(':id' => $id))->fetchField()); $source = db_query("SELECT * FROM {feeds_source} WHERE id = :id AND feed_nid = 0", array(':id' => $id))->fetchObject(); $config = unserialize($source->config); - $this->assertEqual($config['FeedsHTTPFetcher']['source'], $feed_url, t('URL in DB correct.')); + $this->assertEqual($config['FeedsHTTPFetcher']['source'], isset($assert_url) ? $assert_url : $feed_url, t('URL in DB correct.')); // Check whether feed got properly added to scheduler. $this->assertEqual(1, db_query("SELECT COUNT(*) FROM {job_schedule} WHERE type = :id AND id = 0 AND name = 'feeds_source_import' AND last <> 0 AND scheduled = 0", array(':id' => $id))->fetchField()); diff --git a/tests/feeds_tests.module b/tests/feeds_tests.module index ade38b2..d36e217 100644 --- a/tests/feeds_tests.module +++ b/tests/feeds_tests.module @@ -19,6 +19,11 @@ function feeds_tests_menu() { 'access arguments' => array('access content'), 'type' => MENU_CALLBACK, ); + $items['testing/feeds/redirect_test'] = array( + 'page callback' => 'feeds_test_response', + 'access arguments' => array('access content'), + 'type' => MENU_CALLBACK, + ); return $items; } @@ -78,6 +83,23 @@ function feeds_tests_files() { } /** + * Menu callback. Returns a redirect or other HTTP status code. + */ +function feeds_test_response($code) { + switch ($code) { + case 500: + header($_SERVER['SERVER_PROTOCOL'] . ' 500 Service Unavailable'); + break; + case 301: + case 302: + case 307: + global $base_url; + $feeds_path = drupal_get_path('module', 'feeds') .'/tests/feeds'; + header("Location: {$base_url}/{$feeds_path}/developmentseed.rss2", TRUE, $code); + break; + } +} +/** * Implements hook_feeds_processor_targets_alter() */ function feeds_tests_feeds_processor_targets_alter(&$targets, $entity_type, $bundle_name) {