diff --git a/feeds.install b/feeds.install index f743ee7..699bdc1 100644 --- a/feeds.install +++ b/feeds.install @@ -175,6 +175,12 @@ function feeds_schema() { 'description' => 'Cache for fetcher result.', 'serialize' => TRUE, ), + 'fid' => array( + 'description' => 'ID of the file used by the fetcher result.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + ), 'imported' => array( 'type' => 'int', 'not null' => TRUE, @@ -758,3 +764,15 @@ function feeds_update_7213() { // Activate our custom cache handler for the HTTP cache. variable_set('cache_class_cache_feeds_http', 'FeedsHTTPCache'); } + +/** + * Adds file ID column to feeds_source table. + */ +function feeds_update_7214() { + db_add_field('feeds_source', 'fid', array( + 'description' => 'ID of the file used by the fetcher result.', + 'type' => 'int', + 'unsigned' => TRUE, + 'not null' => TRUE, + )); +} diff --git a/feeds.module b/feeds.module index 16de9d2..d0e22dd 100644 --- a/feeds.module +++ b/feeds.module @@ -79,6 +79,35 @@ function feeds_cron() { } variable_set('feeds_sync_cache_feeds_http_last_check', REQUEST_TIME); } + + // Remove temporary import files that were not cleaned up properly. This can + // happen when an import abruptly ended. + $result = db_query('SELECT fm.fid FROM {file_managed} AS fm + INNER JOIN {file_usage} AS fu USING(fid) + LEFT JOIN {feeds_source} AS fs USING(fid) + WHERE fm.timestamp < :timestamp AND fu.module = \'feeds\' AND fu.type LIKE \'FR:%\' AND fs.id IS NULL', array( + ':timestamp' => REQUEST_TIME - DRUPAL_MAXIMUM_TEMP_FILE_AGE + )); + foreach ($result as $row) { + if ($file = file_load($row->fid)) { + // Delete all usage by Feeds and of type "Fetcher Result" first. + db_delete('file_usage') + ->condition('fid', $row->fid) + ->condition('module', 'feeds') + ->condition('type', 'FR:%', 'LIKE') + ->execute(); + + $references = file_usage_list($file); + if (empty($references)) { + if (!file_delete($file)) { + watchdog('file system', 'Could not delete temporary file "%path" during garbage collection', array('%path' => $file->uri), WATCHDOG_ERROR); + } + } + else { + watchdog('file system', 'Did not delete temporary file "%path" during garbage collection, because it is in use by the following modules: %modules.', array('%path' => $file->uri, '%modules' => implode(', ', array_keys($references))), WATCHDOG_INFO); + } + } + } } /** diff --git a/includes/FeedsSource.inc b/includes/FeedsSource.inc index 1dddbc1..50f7d20 100644 --- a/includes/FeedsSource.inc +++ b/includes/FeedsSource.inc @@ -421,6 +421,8 @@ class FeedsSource extends FeedsConfigurable { // Fetch. if (empty($this->fetcher_result) || FEEDS_BATCH_COMPLETE == $this->progressParsing()) { $this->fetcher_result = $this->importer->fetcher->fetch($this); + // Set context on the fetcher result. + $this->fetcher_result->setContext($this->id, $this->feed_nid); // Clean the parser's state, we are parsing an entirely new file. unset($this->state[FEEDS_PARSE]); } @@ -656,6 +658,7 @@ class FeedsSource extends FeedsConfigurable { } $this->state = array(); $this->fetcher_result = NULL; + $this->fid = NULL; } /** @@ -855,6 +858,7 @@ class FeedsSource extends FeedsConfigurable { 'source' => $source, 'state' => isset($this->state) ? $this->state : FALSE, 'fetcher_result' => isset($this->fetcher_result) ? $this->fetcher_result : FALSE, + 'fid' => ($this->fetcher_result instanceof FeedsFetcher) ? $this->fetcher_result->getFileId() : NULL, ); if (db_query_range("SELECT 1 FROM {feeds_source} WHERE id = :id AND feed_nid = :nid", 0, 1, array(':id' => $this->id, ':nid' => $this->feed_nid))->fetchField()) { drupal_write_record('feeds_source', $object, array('id', 'feed_nid')); diff --git a/plugins/FeedsFetcher.inc b/plugins/FeedsFetcher.inc index 718b881..6f18d2e 100644 --- a/plugins/FeedsFetcher.inc +++ b/plugins/FeedsFetcher.inc @@ -31,6 +31,20 @@ class FeedsFetcherResult extends FeedsResult { protected $fid; /** + * The ID of the importer that uses this fetcher result. + * + * @var int + */ + protected $importer_id; + + /** + * The ID of the feed node. + * + * @var int + */ + protected $feed_nid; + + /** * Constructor. */ public function __construct($raw) { @@ -52,6 +66,22 @@ class FeedsFetcherResult extends FeedsResult { } /** + * Sets the source that uses this fetcher result. + * + * This context is used to register the usage of a file that is saved + * when the import is not completed in one run. + * + * @param string $importer_id + * The ID of the importer that uses this fetcher result. + * @param int $feed_nid + * The ID of the feed node. + */ + public function setContext($importer_id, $feed_nid) { + $this->importer_id = $importer_id; + $this->feed_nid = $feed_nid; + } + + /** * Returns the raw content. * * @return string @@ -94,6 +124,17 @@ class FeedsFetcherResult extends FeedsResult { } /** + * Returns the ID of the temporary file to import. + * + * @return int | null + * A file ID, if there is one. + * Null otherwise. + */ + public function getFileId() { + return $this->fid; + } + + /** * Returns directory for storing files that are in progress of import. * * @return string @@ -195,13 +236,20 @@ class FeedsFetcherResult extends FeedsResult { $this->file_path = FALSE; if ($file = file_save_data($this->getRaw(), $this->constructFilePath())) { - $file->status = FILE_STATUS_PERMANENT; + $file->status = ($this->importer_id) ? FILE_STATUS_PERMANENT : FILE_STATUS_TEMPORARY; file_save($file); $this->file_path = $file->uri; $this->fid = $file->fid; - // Clear raw data to save memory, but also to prevent saving the same raw data - // to a file again in the same request. + // Eventually add file usage. + if ($this->importer_id) { + // File usage type may only be 64 chars long. "FR" is an abbreviation of + // "FetcherResult". + file_usage_add($file, 'feeds', 'FR:' . substr($this->importer_id, 0, 61), $this->feed_nid); + } + + // Clear raw data to save memory, but also to prevent saving the same raw + // data to a file again in the same request. $this->raw = NULL; } else { @@ -214,6 +262,12 @@ class FeedsFetcherResult extends FeedsResult { */ public function cleanUp() { if (isset($this->fid) && $file = file_load($this->fid)) { + // Eventually delete file usage. + if ($this->importer_id) { + // File usage type may only be 64 chars long. + file_usage_delete($file, 'feeds', 'FR:' . substr($this->importer_id, 0, 61), $this->feed_nid); + } + file_delete($file); $this->fid = NULL; $this->file_path = NULL; diff --git a/tests/feeds_fetcher_http.test b/tests/feeds_fetcher_http.test index b41522d..af4a633 100644 --- a/tests/feeds_fetcher_http.test +++ b/tests/feeds_fetcher_http.test @@ -670,12 +670,13 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { // Assert that 9 nodes have been created in total. $this->assertNodeCount(9); + // Clear PHP file exists cache. + clearstatcache(); + // Assert that the temporary file no longer exists. $this->assertFalse(file_exists($file->uri), format_string('The file @uri no longer exists.', array( '@uri' => $file->uri, ))); - - $this->getInProgressFile(); } /** @@ -733,6 +734,9 @@ class FeedsFileHTTPTestCase extends FeedsWebTestCase { // Assert that only 5 nodes have been created, since the import got aborted. $this->assertNodeCount(5); + // Clear PHP file exists cache. + clearstatcache(); + // Assert that the temporary file no longer exists. $this->assertFalse(file_exists($file->uri), format_string('The file @uri no longer exists.', array( '@uri' => $file->uri,