diff --git a/README.txt b/README.txt index c556d6b..27f9633 100644 --- a/README.txt +++ b/README.txt @@ -199,6 +199,11 @@ Default: private://feeds/cache Description: The location on the file system where results of HTTP requests are cached. +Name: feeds_in_progress_dir +Default: private://feeds/in_progress +Description: The location on the file system where temporary files are stored + that are in progress of being imported. + Name: feeds_sync_cache_feeds_http_interval Default: 21600 Description: How often the feeds cache directory should be checked for orphaned diff --git a/feeds.install b/feeds.install index 699b2a7..bc23366 100644 --- a/feeds.install +++ b/feeds.install @@ -72,6 +72,7 @@ function feeds_uninstall() { variable_del('feeds_debug'); variable_del('feeds_http_file_cache_dir'); variable_del('feeds_importer_class'); + variable_del('feeds_in_progress_dir'); variable_del('feeds_library_dir'); variable_del('feeds_never_use_curl'); variable_del('feeds_process_limit'); diff --git a/plugins/FeedsFetcher.inc b/plugins/FeedsFetcher.inc index 94de61d..4ae9cab 100644 --- a/plugins/FeedsFetcher.inc +++ b/plugins/FeedsFetcher.inc @@ -74,10 +74,15 @@ class FeedsFetcherResult extends FeedsResult { * If an unexpected problem occurred. */ public function getFilePath() { - if (!$this->fileExists()) { + if (empty($this->file_path)) { // No file exists yet. Save any raw data that we got. $this->saveRawToFile(); } + + // Check if given file exists now. + $this->checkFile(); + + // Return file path. return $this->sanitizeFile($this->file_path); } @@ -186,6 +191,10 @@ class FeedsFetcherResult extends FeedsResult { $file->status = 0; file_save($file); $this->file_path = $file->uri; + + // Clear raw data to save memory, but also to prevent saving the same raw data + // to a file again in the same request. + $this->raw = NULL; } else { throw new RuntimeException(t('Cannot write content to %dest', array('%dest' => $destination))); diff --git a/tests/feeds_fetcher_http.test b/tests/feeds_fetcher_http.test index 3c1a1d6..d532f31 100644 --- a/tests/feeds_fetcher_http.test +++ b/tests/feeds_fetcher_http.test @@ -59,6 +59,86 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { } /** + * Configures the evironment so that multiple cron runs are needed to complete + * an import. + * + * @param string $source_url + * The URL of the file to import. + */ + protected function setUpMultipleCronRuns($source_url) { + // Process 5 items per batch. + variable_set('feeds_process_limit', 5); + + // Set variable to enforce that only five items get imported per cron run. + // @see feeds_tests_cron_queue_alter() + // @see feeds_tests_feeds_after_save() + variable_set('feeds_tests_feeds_source_import_queue_time', 5); + variable_set('feeds_tests_feeds_after_save_sleep', 1); + + // Set up importer. + $this->setUpImporter(); + // Only import during cron runs, not immediately. + $this->setSettings('node', NULL, array( + 'import_on_create' => FALSE, + )); + + // Set source file to import. + $edit = array( + 'feeds[FeedsHTTPFetcher][source]' => $source_url, + ); + $this->drupalPost('import/node', $edit, t('Import')); + + // Ensure that no nodes have been created yet. + $this->assertNodeCount(0, 'No nodes have been created yet (actual: @count).'); + } + + /** + * Returns the file in the Feeds in_progress directory. + * + * @return object + * The found file. + * + * @throws Exception + * In case no file was found, so the test can abort without issuing a fatal + * error. + */ + protected function getInProgressFile() { + // Assert that a file exists in the in_progress dir. + $files = file_scan_directory('private://feeds/in_progress', '/.*/'); + debug($files); + $this->assertEqual(1, count($files), 'The feeds "in progress" dir contains one file.'); + if (!count($files)) { + // Abort test. + throw new Exception('File not found.'); + } + return reset($files); + } + + /** + * Asserts that the given number of nodes exist. + * + * @param int $expected_node_count + * The expected number of nodes in the node table. + * @param string $message + * (optional) The message to assert. + */ + protected function assertNodeCount($expected_node_count, $message = '') { + if (!$message) { + $message = '@expected nodes have been created (actual: @count).'; + } + + $node_count = db_select('node') + ->fields('node', array()) + ->countQuery() + ->execute() + ->fetchField(); + $this->assertEqual($expected_node_count, $node_count, format_string($message, array( + '@expected' => $expected_node_count, + '@count' => $node_count, + ))); + } + + /** * Test the Feed URL form. */ public function testFormValidation() { @@ -414,49 +494,17 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { * - That the source is not refetched while the import has not completed yet. */ public function testImportSourceWithMultipleCronRuns() { - // Process 5 items per batch. - variable_set('feeds_process_limit', 5); - - // Set variable to enforce that only five items get imported per cron run. - // @see feeds_tests_cron_queue_alter() - // @see feeds_tests_feeds_after_save() - variable_set('feeds_tests_feeds_source_import_queue_time', 5); - variable_set('feeds_tests_feeds_after_save_sleep', 1); - - // Set up importer. - $this->setUpImporter(); - // Only import during cron runs, not immediately. - $this->setSettings('node', NULL, array( - 'import_on_create' => FALSE, - )); - - // Set source file to import. $source_url = url('testing/feeds/nodes.csv', array('absolute' => TRUE)); - $edit = array( - 'feeds[FeedsHTTPFetcher][source]' => $source_url, - ); - $this->drupalPost('import/node', $edit, t('Import')); - - // Ensure that no nodes have been created yet. - $node_count = db_select('node') - ->fields('node', array()) - ->countQuery() - ->execute() - ->fetchField(); - $this->assertEqual(0, $node_count, 'No nodes have been created yet.'); + $this->setUpMultipleCronRuns($source_url); // Run cron. Five nodes should be imported. $this->cronRun(); + // Assert that only one file was created in the in_progress dir. + $this->getInProgressFile(); + // Assert that five nodes have been created now. - $node_count = db_select('node') - ->fields('node', array()) - ->countQuery() - ->execute() - ->fetchField(); - $this->assertEqual(5, $node_count, format_string('Five nodes have been created (actual: @count).', array( - '@count' => $node_count, - ))); + $this->assertNodeCount(5); // Assert that the content is *not* saved in the feeds_source table. $source = db_select('feeds_source') @@ -474,14 +522,7 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { // Run cron again. Another four nodes should be imported. $this->cronRun(); - $node_count = db_select('node') - ->fields('node', array()) - ->countQuery() - ->execute() - ->fetchField(); - $this->assertEqual(9, $node_count, format_string('Nine nodes have been created (actual: @count).', array( - '@count' => $node_count, - ))); + $this->assertNodeCount(9); // Check if the imported nodes match that from the original source. $node = node_load(1); @@ -497,6 +538,29 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { } /** + * Tests that an import is aborted when the temporary file in the in_progress + * dir is removed. + */ + public function testAbortImportWhenTemporaryFileIsDeleted() { + $source_url = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds') . '/tests/feeds/many_nodes_ordered.csv'; + $this->setUpMultipleCronRuns($source_url); + + // Run the first cron. + $this->cronRun(); + + // Assert that five nodes have been created. + $this->assertNodeCount(5); + + // Remove file. + $file = $this->getInProgressFile(); + drupal_unlink($file->uri); + + // Run cron again and assert that no more nodes are imported. + $this->cronRun(); + $this->assertNodeCount(5); + } + + /** * Tests that FeedsHTTPFetcherResult::getRaw() always returns the same result * for the same instance, even when caches are cleared in between. *