From 273042ecdf73aaf486f319f2780901c7fcce76d1 Mon Sep 17 00:00:00 2001 From: Carlos Rodriguez Date: Thu, 30 Jun 2011 18:11:42 -0700 Subject: [PATCH 1/1] Handle redirects and other HTTP codes. --- libraries/http_request.inc | 46 ++++++++------ plugins/FeedsHTTPFetcher.inc | 27 ++++++++- tests/feeds.test | 6 +- tests/feeds_http_codes.test | 135 ++++++++++++++++++++++++++++++++++++++++++ tests/feeds_test.info | 5 ++ tests/feeds_test.module | 32 ++++++++++ 6 files changed, 228 insertions(+), 23 deletions(-) create mode 100644 tests/feeds_http_codes.test create mode 100644 tests/feeds_test.info create mode 100644 tests/feeds_test.module diff --git a/libraries/http_request.inc b/libraries/http_request.inc index 3a7b120..d135863 100644 --- a/libraries/http_request.inc +++ b/libraries/http_request.inc @@ -101,31 +101,33 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $has_etag = TRUE; if (!empty($last_headers['ETag'])) { - if ($curl) { - $headers[] = 'If-None-Match: '. $last_headers['ETag']; - } - else { - $headers['If-None-Match'] = $last_headers['ETag']; - } + $headers['If-None-Match'] = $last_headers['ETag']; } if (!empty($last_headers['Last-Modified'])) { - if ($curl) { - $headers[] = 'If-Modified-Since: '. $last_headers['Last-Modified']; - } - else { - $headers['If-Modified-Since'] = $last_headers['Last-Modified']; - } + $headers['If-Modified-Since'] = $last_headers['Last-Modified']; } if (!empty($username) && !$curl) { $headers['Authorization'] = 'Basic '. base64_encode("$username:$password"); } } + // If we are inside a test case, make sure requests maintain the test UA. + global $db_prefix; + if (is_string($db_prefix) && preg_match("/(simpletest\d+)$/", $db_prefix, $matches)) { + $headers['User-Agent'] = drupal_generate_test_ua($matches[1]); + } + + $headers += array('User-Agent' => 'Drupal (+http://drupal.org/)'); + // Support the 'feed' and 'webcal' schemes by converting them into 'http'. $url = strtr($url, array('feed://' => 'http://', 'webcal://' => 'http://')); if ($curl) { - $headers[] = 'User-Agent: Drupal (+http://drupal.org/)'; + $flat_headers = array(); + foreach ($headers as $name => $value) { + $flat_headers[] = "$name: $value"; + } + $result = new stdClass(); // Parse the URL and make sure we can handle the schema. @@ -156,7 +158,7 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva if (!empty($username)) { curl_setopt($download, CURLOPT_USERPWD, "{$username}:{$password}"); } - curl_setopt($download, CURLOPT_HTTPHEADER, $headers); + curl_setopt($download, CURLOPT_HTTPHEADER, $flat_headers); curl_setopt($download, CURLOPT_HEADER, TRUE); curl_setopt($download, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($download, CURLOPT_ENCODING, ''); @@ -175,8 +177,12 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $header_lines = preg_split("/\r\n|\n|\r/", $header); $result->headers = array(); - array_shift($header_lines); // skip HTTP response status while ($line = trim(array_shift($header_lines))) { + if (preg_match('~^HTTP/1\.\d (\d{3})~', $line, $matches) && !isset($result->code)) { + // Record the first response code encountered. + $result->code = (int) $matches[1]; + continue; + } list($header, $value) = explode(':', $line, 2); if (isset($result->headers[$header]) && $header == 'Set-Cookie') { // RFC 2109: the Set-Cookie response header comprises the token Set- @@ -187,20 +193,22 @@ function http_request_get($url, $username = NULL, $password = NULL, $accept_inva $result->headers[$header] = trim($value); } } - $result->code = curl_getinfo($download, CURLINFO_HTTP_CODE); + if (isset($result->headers['Location'])) { + $result->redirect_code = curl_getinfo($download, CURLINFO_HTTP_CODE); + $result->redirect_url = curl_getinfo($download, CURLINFO_EFFECTIVE_URL); + } curl_close($download); } } else { - $result = drupal_http_request($url, $headers); + $result = drupal_http_request($url, $headers, 'GET', NULL, 10); } $result->code = isset($result->code) ? $result->code : 200; // In case of 304 Not Modified try to return cached data. - if ($result->code == 304) { - + if ($result->code == 304 || (isset($result->redirect_code) && $result->redirect_code == 304)) { if (isset($last_result)) { $last_result->from_cache = TRUE; return $last_result; diff --git a/plugins/FeedsHTTPFetcher.inc b/plugins/FeedsHTTPFetcher.inc index a5e88bb..45b58ca 100644 --- a/plugins/FeedsHTTPFetcher.inc +++ b/plugins/FeedsHTTPFetcher.inc @@ -29,7 +29,32 @@ class FeedsHTTPBatch extends FeedsImportBatch { public function getRaw() { feeds_include_library('http_request.inc', 'http_request'); $result = http_request_get($this->url); - if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206))) { + if ($result->code == 301) { + // Save new feed location to prevent further redirects. + if (!empty($this->feed_nid) && ($importer_id = feeds_get_importer_id(db_result(db_query("SELECT type FROM {node} WHERE nid = %d", $this->feed_nid))))) { + $src = feeds_source($importer_id, $this->feed_nid); + $config = $src->getConfig(); + $config[key($config)]['source'] = $result->redirect_url; + $src->setConfig($config); + $src->save(); + } + watchdog('feeds', 'Feed URL updated to @redirect_url due to permanent redirect. Old URL was @old_url.', array('@redirect_url' => $result->redirect_url, '@old_url' => $this->url), WATCHDOG_WARNING, l(t('View'), 'node/'. $this->feed_nid)); + $result->code = $result->redirect_code; + $this->url = $result->redirect_url; + } + elseif ($result->code == 302 || $result->code == 307) { + watchdog('feeds', 'Feed temporarily redirected to @redirect_url.', array('@redirect_url' => $result->redirect_url), WATCHDOG_WARNING, l(t('View'), 'node/'. $this->feed_nid)); + $result->code = $result->redirect_code; + $this->url = $result->redirect_url; + } + if ($result->code >= 400 && $result->code < 500) { + // Unpublish feeds that end in a bad request. We can't do this via + // node_load and node_save, however, because feeds_nodeapi with $op ==' + // 'update' expects $node to come from a form, which will set 'feeds'. + // Instead, just modify the status field directly in the database. + db_query("UPDATE {node} SET status = 0 WHERE nid = %d", $this->feed_nid); + } + if (!in_array($result->code, array(200, 201, 202, 203, 204, 205, 206, 304))) { throw new Exception(t('Download of @url failed with code !code.', array('@url' => $this->url, '!code' => $result->code))); } return $result->data; diff --git a/tests/feeds.test b/tests/feeds.test index 4bdf73d..eabef77 100644 --- a/tests/feeds.test +++ b/tests/feeds.test @@ -92,7 +92,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { - + @@ -184,7 +184,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { * @return * The node id of the node created. */ - public function createFeedNode($id = 'syndication', $feed_url = NULL, $title = '', $content_type = NULL) { + public function createFeedNode($id = 'syndication', $feed_url = NULL, $title = '', $content_type = NULL, $assert_url = NULL) { if (empty($feed_url)) { $feed_url = $GLOBALS['base_url'] .'/'. drupal_get_path('module', 'feeds') .'/tests/feeds/developmentseed.rss2'; } @@ -214,7 +214,7 @@ class FeedsWebTestCase extends DrupalWebTestCase { $source = db_fetch_object(db_query("SELECT * FROM {feeds_source} WHERE id = '%s' AND feed_nid = %d", $id, $nid)); $config = unserialize($source->config); - $this->assertEqual($config['FeedsHTTPFetcher']['source'], $feed_url, t('URL in DB correct.')); + $this->assertEqual($config['FeedsHTTPFetcher']['source'], isset($assert_url) ? $assert_url : $feed_url, t('URL in DB correct.')); return $nid; } diff --git a/tests/feeds_http_codes.test b/tests/feeds_http_codes.test new file mode 100644 index 0000000..4939b0e --- /dev/null +++ b/tests/feeds_http_codes.test @@ -0,0 +1,135 @@ + 'HTTP response code handling', + 'description' => 'Test Feeds response code handling.', + 'group' => 'Feeds', + ); + } + + function setUp() { + parent::setUp(array('feeds_test')); + } + + /** + * Test that HTTP library actually resolves redirects correctly. + */ + function testRedirectResolution() { + feeds_include_library('http_request.inc', 'http_request'); + $using_curl = http_request_use_curl() ? t('Using cURL for the test.') : t('Using drupal_http_request() fallback for test.'); + $this->pass($using_curl); + + $url = url('feeds_test/301', array('absolute' => TRUE)); + $result = http_request_get($url); + $this->assertEqual($result->code, 301, t('301 code returned from server.')); + $this->assertTrue(strstr($result->data, 'Open Atrium'), t('Redirect location resolved.')); + $this->assertEqual($result->redirect_code, 200, t('200 code returned after redirects.')); + + $url = url('feeds_test/302', array('absolute' => TRUE)); + $result = http_request_get($url); + $this->assertEqual($result->code, 302, t('302 code returned from server.')); + $this->assertTrue(strstr($result->data, 'Open Atrium'), t('Redirect location resolved.')); + $this->assertEqual($result->redirect_code, 200, t('200 code returned after redirects.')); + + $url = url('feeds_test/307', array('absolute' => TRUE)); + $result = http_request_get($url); + $this->assertEqual($result->code, 307, t('307 code returned from server.')); + $this->assertTrue(strstr($result->data, 'Open Atrium'), t('Redirect location resolved.')); + $this->assertEqual($result->redirect_code, 200, t('200 code returned after redirects.')); + } + + /** + * Test again with fallback library if necessary. + */ + function testFallback() { + if (http_request_use_curl()) { + variable_set('feeds_never_use_curl', TRUE); + $this->testRedirectResolution(); + } + } + + /** + * Test code handling in the context of an importer. + */ + function testImporter() { + // Create an importer configuration. + $this->createImporterConfiguration('Syndication', 'syndication'); + $this->addMappings('syndication', + array( + array( + 'source' => 'title', + 'target' => 'title', + 'unique' => FALSE, + ), + array( + 'source' => 'description', + 'target' => 'body', + 'unique' => FALSE, + ), + array( + 'source' => 'timestamp', + 'target' => 'created', + 'unique' => FALSE, + ), + array( + 'source' => 'url', + 'target' => 'url', + 'unique' => TRUE, + ), + array( + 'source' => 'guid', + 'target' => 'guid', + 'unique' => TRUE, + ), + ) + ); + + // Check 302 redirection. + $url = url('feeds_test/302', array('absolute' => TRUE)); + $nid = $this->createFeedNode('syndication', $url); + $this->assertText('Created 10 Story nodes.'); + + // Check 307 redirection. + $url = url('feeds_test/307', array('absolute' => TRUE)); + $nid = $this->createFeedNode('syndication', $url); + $this->assertText('Created 10 Story nodes.'); + + // Check 301 redirection. Assert that the URL in db is the redirected URL. + $url = url('feeds_test/301', array('absolute' => TRUE)); + $nid = $this->createFeedNode('syndication', $url, '', NULL, $GLOBALS['base_url'] .'/'. drupal_get_path('module', 'feeds') .'/tests/feeds' .'/developmentseed.rss2'); + $this->assertText('Created 10 Story nodes.'); + + // Check 500 handling. Edit the existing node. + $edit = array( + 'feeds[FeedsHTTPFetcher][source]' => url('feeds_test/500', array('absolute' => TRUE)), + ); + $this->drupalPost("node/$nid/edit", $edit, t('Save')); + + $this->drupalPost("node/$nid/import", array(), t('Import')); + $this->assertText('failed with code 500'); + $status = db_result(db_query("SELECT status FROM {node} WHERE nid = %d", $nid)); + $this->assertEqual($status, 1, t('Feed node with 500 status still published.')); + + // Check 404 handling. Edit the existing node with a fake feed URL. + $edit = array( + 'feeds[FeedsHTTPFetcher][source]' => url('feeds_test_not_there', array('absolute' => TRUE)), + ); + $this->drupalPost("node/$nid/edit", $edit, t('Save')); + + $this->drupalPost("node/$nid/import", array(), t('Import')); + $this->assertText('failed with code 404'); + $status = db_result(db_query("SELECT status FROM {node} WHERE nid = %d", $nid)); + $this->assertEqual($status, 0, t('Feed node with 404 status was unpublished.')); + } +} diff --git a/tests/feeds_test.info b/tests/feeds_test.info new file mode 100644 index 0000000..ffa0484 --- /dev/null +++ b/tests/feeds_test.info @@ -0,0 +1,5 @@ +name = Feeds test +description = Helper module for running Feeds tests. +package = Feeds +core = 6.x +hidden = TRUE diff --git a/tests/feeds_test.module b/tests/feeds_test.module new file mode 100644 index 0000000..0a3927e --- /dev/null +++ b/tests/feeds_test.module @@ -0,0 +1,32 @@ + MENU_CALLBACK, + 'access arguments' => array('access content'), + 'page callback' => 'feeds_test_response', + ); + + return $items; +} + +/** + * Menu callback. Returns a redirect or other HTTP status code. + */ +function feeds_test_response($code) { + switch ($code) { + case 500: + header($_SERVER['SERVER_PROTOCOL'] . ' 500 Service Unavailable'); + break; + case 301: + case 302: + case 307: + global $base_url; + $feeds_path = drupal_get_path('module', 'feeds') .'/tests/feeds'; + header("Location: {$base_url}/{$feeds_path}/developmentseed.rss2", TRUE, $code); + break; + } +} -- 1.7.4.1