From 12ab006599ee71c1921e7a3c762224bb0e4470ba Mon Sep 17 00:00:00 2001 From: Florent Torregrosa Date: Tue, 14 Apr 2015 16:36:49 +0200 Subject: [PATCH] Issue #2470553: Allows search_api_attachments to parse txt files. --- .../processor/FilesFieldsProcessorPlugin.php | 67 +++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/src/Plugin/search_api/processor/FilesFieldsProcessorPlugin.php b/src/Plugin/search_api/processor/FilesFieldsProcessorPlugin.php index 8fd68d1..b0d5a7f 100644 --- a/src/Plugin/search_api/processor/FilesFieldsProcessorPlugin.php +++ b/src/Plugin/search_api/processor/FilesFieldsProcessorPlugin.php @@ -6,6 +6,8 @@ use Drupal\Core\TypedData\DataDefinition; use Drupal\field\Entity\FieldConfig; use Drupal\search_api\Datasource\DatasourceInterface; use Drupal\search_api\Processor\ProcessorPluginBase; +use Drupal\Core\StreamWrapper\StreamWrapperInterface; +use Drupal\Component\Utility\Xss; /** * @SearchApiProcessor( @@ -46,11 +48,37 @@ class FilesFieldsProcessorPlugin extends ProcessorPluginBase { if (!($field = $item->getField('search_api_attachments_' . $field_name))) { continue; } - $field->addValue('test test'); + + // Need to retrieve the files. + $entity = $item->getOriginalObject()->getValue(); + $filefield_values = $entity->get($field_name)->getValue(); + + $fids = array(); + foreach ($filefield_values as $filefield_value) { + $fids[] = $filefield_value['target_id']; + } + + // Retrieve the files. + $files = entity_load_multiple('file', $fids); + + // Parse the files. + $value = ''; + foreach ($files as $file) { + $value .= $this->extract_simple($file); + } + + // Add the value in the indexed data. + $field->addValue($value); } } } + /** + * Helper function. + * + * @return array + * An array of file fields. + */ protected function getFileFields() { $file_fields = array(); // Retrieve file fields of indexed bundles. @@ -66,4 +94,41 @@ class FilesFieldsProcessorPlugin extends ProcessorPluginBase { return $file_fields; } + /** + * Helper function. + * + * @param $file + * A file object that implements FileInterface. + * @return string + * The text extracted from txt file. + */ + protected function extract_simple(FileInterface $file) { + $text = file_get_contents($this->get_realpath($file->getFileUri())); + $text = iconv("UTF-8", "UTF-8//IGNORE", $text); + $text = Xss::filter(str_replace(array('<', '>'), array(' <', '> '), $text), array()); + $text = htmlspecialchars(html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8'); + $text = trim($text); + return $text; + } + + /** + * Helper function. + * + * @param $uri + * The URI of the file, e.g. public://directory/file.jpg. + * @return mixed + * The real path to the file if it is a local file. An URL otherwise. + */ + protected function get_realpath($uri) { + $wrapper = \Drupal::service('stream_wrapper_manager')->getViaUri($uri); + $scheme = file_uri_scheme($uri); + $local_wrappers = \Drupal::service('stream_wrapper_manager')->getWrappers(StreamWrapperInterface::LOCAL); + if (in_array($scheme, array_keys($local_wrappers))) { + return $wrapper->realpath(); + } + else { + return $wrapper->getExternalUrl(); + } + } + } -- 1.7.10.4