diff --git a/feeds.install b/feeds.install index f743ee7..66f23ee 100644 --- a/feeds.install +++ b/feeds.install @@ -175,6 +175,13 @@ function feeds_schema() { 'description' => 'Cache for fetcher result.', 'serialize' => TRUE, ), + 'fid' => array( + 'description' => 'ID of the file used by the fetcher result.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0, + ), 'imported' => array( 'type' => 'int', 'not null' => TRUE, @@ -758,3 +765,16 @@ function feeds_update_7213() { // Activate our custom cache handler for the HTTP cache. variable_set('cache_class_cache_feeds_http', 'FeedsHTTPCache'); } + +/** + * Adds file ID column to feeds_source table. + */ +function feeds_update_7214() { + db_add_field('feeds_source', 'fid', array( + 'description' => 'ID of the file used by the fetcher result.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + 'default' => 0, + )); +} diff --git a/feeds.module b/feeds.module index 2c24962..3d92551 100644 --- a/feeds.module +++ b/feeds.module @@ -83,6 +83,38 @@ function feeds_cron() { } variable_set('feeds_sync_cache_feeds_http_last_check', REQUEST_TIME); } + + // Remove temporary import files that were not cleaned up properly. This can + // happen when an import abruptly ended. + $result = db_query('SELECT fm.fid FROM {file_managed} AS fm + INNER JOIN {file_usage} AS fu USING(fid) + LEFT JOIN {feeds_source} AS fs USING(fid) + WHERE fm.timestamp < :timestamp + AND fu.module = \'feeds\' + AND fu.type LIKE \'FR:%\' + AND fs.id IS NULL', array( + ':timestamp' => REQUEST_TIME - DRUPAL_MAXIMUM_TEMP_FILE_AGE + )); + foreach ($result as $row) { + if ($file = file_load($row->fid)) { + // Delete all usage by Feeds and of type "Fetcher Result" first. + db_delete('file_usage') + ->condition('fid', $row->fid) + ->condition('module', 'feeds') + ->condition('type', 'FR:%', 'LIKE') + ->execute(); + + $references = file_usage_list($file); + if (empty($references)) { + if (!file_delete($file)) { + watchdog('file system', 'Could not delete temporary file "%path" during garbage collection', array('%path' => $file->uri), WATCHDOG_ERROR); + } + } + else { + watchdog('file system', 'Did not delete temporary file "%path" during garbage collection, because it is in use by the following modules: %modules.', array('%path' => $file->uri, '%modules' => implode(', ', array_keys($references))), WATCHDOG_INFO); + } + } + } } /** diff --git a/includes/FeedsSource.inc b/includes/FeedsSource.inc index ba86c66..db72179 100644 --- a/includes/FeedsSource.inc +++ b/includes/FeedsSource.inc @@ -421,6 +421,8 @@ class FeedsSource extends FeedsConfigurable { // Fetch. if (empty($this->fetcher_result) || FEEDS_BATCH_COMPLETE == $this->progressParsing()) { $this->fetcher_result = $this->importer->fetcher->fetch($this); + // Set context on the fetcher result. + $this->fetcher_result->setContext($this->id, $this->feed_nid); // Clean the parser's state, we are parsing an entirely new file. unset($this->state[FEEDS_PARSE]); } @@ -651,8 +653,12 @@ class FeedsSource extends FeedsConfigurable { * Clears states. */ protected function clearStates() { + if ($this->fetcher_result instanceof FeedsFetcherResult) { + $this->fetcher_result->cleanUp(); + } $this->state = array(); $this->fetcher_result = NULL; + $this->fid = NULL; } /** @@ -852,6 +858,7 @@ class FeedsSource extends FeedsConfigurable { 'source' => $source, 'state' => isset($this->state) ? $this->state : FALSE, 'fetcher_result' => isset($this->fetcher_result) ? $this->fetcher_result : FALSE, + 'fid' => ($this->fetcher_result instanceof FeedsFetcherResult) ? $this->fetcher_result->getFileId() : NULL, ); if (db_query_range("SELECT 1 FROM {feeds_source} WHERE id = :id AND feed_nid = :nid", 0, 1, array(':id' => $this->id, ':nid' => $this->feed_nid))->fetchField()) { drupal_write_record('feeds_source', $object, array('id', 'feed_nid')); diff --git a/plugins/FeedsFetcher.inc b/plugins/FeedsFetcher.inc index 4ae9cab..4fc73e3 100644 --- a/plugins/FeedsFetcher.inc +++ b/plugins/FeedsFetcher.inc @@ -24,6 +24,27 @@ class FeedsFetcherResult extends FeedsResult { protected $file_path; /** + * The file ID of the file. Can be empty. + * + * @var int + */ + protected $fid; + + /** + * The ID of the importer that uses this fetcher result. + * + * @var int + */ + protected $importer_id; + + /** + * The ID of the feed node. + * + * @var int + */ + protected $feed_nid; + + /** * Constructor. */ public function __construct($raw) { @@ -45,6 +66,22 @@ class FeedsFetcherResult extends FeedsResult { } /** + * Sets the source that uses this fetcher result. + * + * This context is used to register the usage of a file that is saved + * when the import is not completed in one run. + * + * @param string $importer_id + * The ID of the importer that uses this fetcher result. + * @param int $feed_nid + * The ID of the feed node. + */ + public function setContext($importer_id, $feed_nid) { + $this->importer_id = $importer_id; + $this->feed_nid = $feed_nid; + } + + /** * Returns the raw content. * * @return string @@ -87,6 +124,17 @@ class FeedsFetcherResult extends FeedsResult { } /** + * Returns the ID of the temporary file to import. + * + * @return int | null + * A file ID, if there is one. + * Null otherwise. + */ + public function getFileId() { + return $this->fid; + } + + /** * Returns directory for storing files that are in progress of import. * * @return string @@ -188,12 +236,20 @@ class FeedsFetcherResult extends FeedsResult { $this->file_path = FALSE; if ($file = file_save_data($this->getRaw(), $this->constructFilePath())) { - $file->status = 0; + $file->status = ($this->importer_id) ? FILE_STATUS_PERMANENT : 0; file_save($file); $this->file_path = $file->uri; + $this->fid = $file->fid; + + // Eventually add file usage. + if ($this->importer_id) { + // File usage type may only be 64 chars long. "FR" is an abbreviation of + // "FetcherResult". + file_usage_add($file, 'feeds', 'FR:' . substr($this->importer_id, 0, 61), $this->feed_nid); + } - // Clear raw data to save memory, but also to prevent saving the same raw data - // to a file again in the same request. + // Clear raw data to save memory, but also to prevent saving the same raw + // data to a file again in the same request. $this->raw = NULL; } else { @@ -202,6 +258,23 @@ class FeedsFetcherResult extends FeedsResult { } /** + * Called after an import is finished to perform clean up tasks. + */ + public function cleanUp() { + if (isset($this->fid) && $file = file_load($this->fid)) { + // Eventually delete file usage. + if ($this->importer_id) { + // File usage type may only be 64 chars long. + file_usage_delete($file, 'feeds', 'FR:' . substr($this->importer_id, 0, 61), $this->feed_nid); + } + + file_delete($file); + $this->fid = NULL; + $this->file_path = NULL; + } + } + + /** * Sanitize the raw content string. Currently supported sanitizations: * * - Remove BOM header from UTF-8 files. diff --git a/plugins/FeedsFileFetcher.inc b/plugins/FeedsFileFetcher.inc index 7f63455..ce928c4 100644 --- a/plugins/FeedsFileFetcher.inc +++ b/plugins/FeedsFileFetcher.inc @@ -32,6 +32,14 @@ class FeedsFileFetcherResult extends FeedsFetcherResult { $this->checkFile(); return $this->sanitizeFile($this->file_path); } + + /** + * Overrides parent::cleanUp(). + */ + public function cleanUp() { + // Do not remove the uploaded file. If it should be removed, then + // FeedsFileFetcher::afterImport() will take care of that. + } } /** diff --git a/tests/feeds_fetcher_http.test b/tests/feeds_fetcher_http.test index 3acd3c8..af4a633 100644 --- a/tests/feeds_fetcher_http.test +++ b/tests/feeds_fetcher_http.test @@ -77,9 +77,16 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { // Set up importer. $this->setUpImporter(); + + // Turn off periodic import. + $this->setSettings('node', NULL, array( + 'import_period' => FEEDS_SCHEDULE_NEVER, + )); + // Only import during cron runs, not immediately. $this->setSettings('node', NULL, array( - 'import_on_create' => FALSE, + 'import_on_create' => TRUE, + 'process_in_background' => TRUE, )); // Set source file to import. @@ -595,6 +602,148 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { } /** + * Tests that the temporary file is not removed when the import has not + * completed yet. + * + * When an import takes multiple cron runs to complete, a file is saved in the + * feeds 'in progress' directory. This file should remain there when it is + * known that the Feeds import has not yet completed. + */ + public function testTemporaryFileNoDelete() { + $source_url = $GLOBALS['base_url'] . '/' . drupal_get_path('module', 'feeds') . '/tests/feeds/many_nodes_ordered.csv'; + $this->setUpMultipleCronRuns($source_url); + + // Run the first cron. + $this->cronRun(); + + // Assert that five nodes have been created now. + $this->assertNodeCount(5); + + // Assert that a file exists in the in_progress dir. + $file = $this->getInProgressFile(); + + // Change the created time of all managed files to be 24 hours in the past. + // Drupal deletes temporary files after six hours. + db_update('file_managed') + ->fields(array( + 'timestamp' => REQUEST_TIME - DRUPAL_MAXIMUM_TEMP_FILE_AGE - 1, + )) + ->execute(); + + // Run cron again. + $this->cronRun(); + + // Assert that 10 nodes have been created in total. + $this->assertNodeCount(10); + + // Assert that the file still exist as the import has not completed yet. + $this->assertTrue(file_exists($file->uri), format_string('The file @uri exists.', array( + '@uri' => $file->uri, + ))); + } + + /** + * Tests that the temporary file gets cleaned up after the import has + * completed. + */ + public function testTemporaryFileCleanUpAfterImport() { + $source_url = url('testing/feeds/nodes.csv', array('absolute' => TRUE)); + $this->setUpMultipleCronRuns($source_url); + + // Run the first cron. + $this->cronRun(); + + // Assert that a file exists in the in_progress dir. + $file = $this->getInProgressFile(); + + // Change the created time of all managed files to be 24 hours in the past. + // Drupal deletes temporary files after six hours. + db_update('file_managed') + ->fields(array('timestamp' => REQUEST_TIME - 86400)) + ->execute(); + + // Run cron again twice. First one to complete the import, second one to + // ensure that the file is cleaned up. + $this->cronRun(); + $this->cronRun(); + + // Assert that 9 nodes have been created in total. + $this->assertNodeCount(9); + + // Clear PHP file exists cache. + clearstatcache(); + + // Assert that the temporary file no longer exists. + $this->assertFalse(file_exists($file->uri), format_string('The file @uri no longer exists.', array( + '@uri' => $file->uri, + ))); + } + + /** + * Tests that the temporary file gets cleaned up when unlocking a feed. + */ + public function testTemporaryFileCleanUpWhenUnlockingFeed() { + $source_url = url('testing/feeds/nodes.csv', array('absolute' => TRUE)); + $this->setUpMultipleCronRuns($source_url); + + // Run the first cron. + $this->cronRun(); + $this->assertNodeCount(5); + + // Assert that a file exists in the in_progress dir. + $file = $this->getInProgressFile(); + + // Abort the import by unlocking the feeds source. + feeds_source('node')->unlock(); + + // Assert that the temporary file no longer exists at this point. + $this->assertFalse(file_exists($file->uri), format_string('The file @uri no longer exists.', array( + '@uri' => $file->uri, + ))); + + // After this point, the import task remains in the queue. + } + + /** + * Tests that the temporary file gets cleaned up if the import gets aborted abruptly. + */ + public function testTemporaryFileCleanUpWhenAbortingImportAbruptly() { + $source_url = url('testing/feeds/nodes.csv', array('absolute' => TRUE)); + $this->setUpMultipleCronRuns($source_url); + + // Run the first cron. + $this->cronRun(); + + // Assert that a file exists in the in_progress dir. + $file = $this->getInProgressFile(); + + // Change the created time of all managed files to be 24 hours in the past. + // Drupal deletes temporary files after six hours. + db_update('file_managed') + ->fields(array('timestamp' => REQUEST_TIME - 86400)) + ->execute(); + + // Abort the import by deleting the feeds source in the database. + db_delete('feeds_source') + ->condition('id', 'node') + ->execute(); + + // Run cron again. + $this->cronRun(); + + // Assert that only 5 nodes have been created, since the import got aborted. + $this->assertNodeCount(5); + + // Clear PHP file exists cache. + clearstatcache(); + + // Assert that the temporary file no longer exists. + $this->assertFalse(file_exists($file->uri), format_string('The file @uri no longer exists.', array( + '@uri' => $file->uri, + ))); + } + + /** * Tests that FeedsHTTPFetcherResult::getRaw() always returns the same result * for the same instance, even when caches are cleared in between. *