diff -r c0085ac96ba8 includes/FeedsSource.inc --- a/includes/FeedsSource.inc Sat Feb 04 14:02:37 2012 +0400 +++ b/includes/FeedsSource.inc Sat Feb 04 17:29:29 2012 +0400 @@ -343,10 +343,12 @@ // Parse. $parser_result = $this->importer->parser->parse($this, $this->fetcher_result); - module_invoke_all('feeds_after_parse', $this, $parser_result); + if (!empty($parser_result)) { + module_invoke_all('feeds_after_parse', $this, $parser_result); - // Process. - $this->importer->processor->process($this, $parser_result); + // Process. + $this->importer->processor->process($this, $parser_result); + } } catch (Exception $e) { // Do nothing. diff -r c0085ac96ba8 libraries/ParserCSV.inc --- a/libraries/ParserCSV.inc Sat Feb 04 14:02:37 2012 +0400 +++ b/libraries/ParserCSV.inc Sat Feb 04 17:29:29 2012 +0400 @@ -74,6 +74,8 @@ public function __construct() { $this->delimiter = ','; + $this->from_encoding = $this->to_encoding = 'UTF-8'; + $this->check_encoding = FALSE; $this->skipFirstLine = FALSE; $this->columnNames = FALSE; $this->timeout = FALSE; @@ -92,6 +94,22 @@ } /** + * Set the source file encoding. + * By default, UTF-8. + */ + public function setEncoding($encoding) { + $this->from_encoding = $encoding; + } + + /** + * Set the option to check source file encoding. + * By default, FALSE. + */ + public function setEncodingCheck($check_encoding) { + $this->check_encoding = $check_encoding; + } + + /** * Set this to TRUE if the parser should skip the first line of the CSV text, * which might be desired if the first line contains the column names. * By default, this is set to FALSE and the first line is not skipped. @@ -192,9 +210,30 @@ $linesParsed = 0; for ($lineIterator->rewind($this->startByte); $lineIterator->valid(); $lineIterator->next()) { + + $line = $lineIterator->current(); + + // Check encoding if needed + if ($this->check_encoding) { + if (function_exists('mb_convert_encoding')) { + if (!mb_check_encoding($line, $this->from_encoding)) { + throw new Exception(t('Source file is not in @encoding encoding.', array('@encoding' => $this->from_encoding))); + } + } else { + throw new Exception(t('For encoding check mbstring PHP extension must be available.')); + } + } + // Convert encoding if needed + if ($this->from_encoding != $this->to_encoding) { + if (function_exists('mb_convert_encoding')) { + $line = mb_convert_encoding($line, $this->to_encoding, $this->from_encoding); + } else { + throw new Exception(t('For encoding conversion mbstring PHP extension must be available.')); + } + } // Make really sure we've got lines without trailing newlines. - $line = trim($lineIterator->current(), "\r\n"); + $line = trim($line, "\r\n"); // Skip empty lines. if (empty($line)) { diff -r c0085ac96ba8 plugins/FeedsCSVParser.inc --- a/plugins/FeedsCSVParser.inc Sat Feb 04 14:02:37 2012 +0400 +++ b/plugins/FeedsCSVParser.inc Sat Feb 04 17:29:29 2012 +0400 @@ -17,6 +17,8 @@ $parser = new ParserCSV(); $delimiter = $source_config['delimiter'] == 'TAB' ? "\t" : $source_config['delimiter']; $parser->setDelimiter($delimiter); + $parser->setEncoding($source_config['encoding']['encoding']); + $parser->setEncodingCheck($source_config['encoding']['check_encoding']); $iterator = new ParserCSVIterator($fetcher_result->getFilePath()); if (empty($source_config['no_headers'])) { @@ -101,6 +103,8 @@ public function sourceDefaults() { return array( 'delimiter' => $this->config['delimiter'], + 'encoding' => $this->config['encoding'], + 'check_encoding' => $this->config['check_encoding'], 'no_headers' => $this->config['no_headers'], ); } @@ -145,7 +149,7 @@ '#description' => t('Check if the imported CSV file does not start with a header row. If checked, mapping sources must be named \'0\', \'1\', \'2\' etc.'), '#default_value' => isset($source_config['no_headers']) ? $source_config['no_headers'] : 0, ); - return $form; + return $form + $this->configEncodingForm(TRUE); } /** @@ -154,6 +158,8 @@ public function configDefaults() { return array( 'delimiter' => ',', + 'encoding' => 'UTF-8', + 'check_encoding' => FALSE, 'no_headers' => 0, ); } @@ -180,6 +186,39 @@ '#description' => t('Check if the imported CSV file does not start with a header row. If checked, mapping sources must be named \'0\', \'1\', \'2\' etc.'), '#default_value' => $this->config['no_headers'], ); + return $form + $this->configEncodingForm(); + } + + public function configEncodingForm($sourceForm = FALSE) { + $form = array(); + $defaults = $this->configDefaults(); + $form['encoding'] = array( + '#type' => 'fieldset', + '#title' => 'Encoding conversion', + '#collapsible' => TRUE, + '#collapsed' => $sourceForm || ($this->config['encoding'] == $defaults['encoding'] && $this->config['check_encoding'] == $defaults['check_encoding']), + ); + if (function_exists('mb_list_encodings')) { + $options = mb_list_encodings(); + $options = array_combine($options, $options); + $form['encoding']['encoding'] = array( + '#type' => 'select', + '#title' => t('Source file encoding'), + '#description' => t('Performs encoding conversion of a source files to UTF-8. Defaults to UTF-8 — no encoding conversion will happen.'), + '#options' => $options, + '#default_value' => $this->config['encoding'], + ); + $form['encoding']['check_encoding'] = array( + '#type' => 'checkbox', + '#title' => t('Check encoding'), + '#description' => t('Checks encoding of a source file and breaks import process if encoding differs.'), + '#default_value' => $this->config['check_encoding'] + ); + } else { + $form['encoding']['encoding'] = array( + '#markup' => '

'.t('Encoding conversion is disabled due to the lack of mbstring PHP extension.').'

', + ); + } return $form; }