Index: apachesolr_attachments.admin.inc
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr_attachments/apachesolr_attachments.admin.inc,v
retrieving revision 1.6
diff -u -p -r1.6 apachesolr_attachments.admin.inc
--- apachesolr_attachments.admin.inc	3 Feb 2010 18:34:22 -0000	1.6
+++ apachesolr_attachments.admin.inc	3 Jun 2010 16:48:27 -0000
@@ -203,7 +203,7 @@ function apachesolr_attachements_delete_
     $solr = apachesolr_get_solr();
     $solr->deleteByQuery("entity:file AND hash:". apachesolr_site_hash());
     $solr->commit();
-    apachesolr_index_updated(time());
+    apachesolr_index_set_last_updated(time());
     apachesolr_clear_last_index('apachesolr_attachments');
     return TRUE;
   }
@@ -230,9 +230,13 @@ function apachesolr_attachments_add_docu
   // fids that were added before but no longer present on this node.
 
   $fids = array();
-  $result = db_query("SELECT fid FROM {apachesolr_attachments_files} WHERE nid = %d", $node->nid);
+  $failed = array();
+  $result = db_query("SELECT fid, failed FROM {apachesolr_attachments_files} WHERE nid = %d", $node->nid);
   while ($row = db_fetch_array($result)) {
     $fids[$row['fid']] = $row['fid'];
+    if($row['failed']) {
+      $failed[$row['fid']] = $row['fid'];
+    }
   }
 
   $files = apachesolr_attachments_get_indexable_files($node);
@@ -243,6 +247,8 @@ function apachesolr_attachments_add_docu
     db_query("UPDATE {apachesolr_attachments_files} SET removed = 1 WHERE fid IN (". db_placeholders($missing_fids) .")", $missing_fids);
   }
   $new_files = array_diff_key($files, $fids);
+  // Filter out the files that have failed
+  $new_files = array_diff_key($new_files, $failed);
   // Add new files.
   foreach ($new_files as $file) {
     db_query("INSERT INTO {apachesolr_attachments_files} (fid, nid, removed, sha1) VALUES (%d, %d, 0, '')", $file->fid, $node->nid);
@@ -277,7 +283,7 @@ function apachesolr_attachments_add_docu
       apachesolr_add_taxonomy_to_document($document, $node);
 
       if (module_exists('apachesolr_nodeaccess')) {
-        apachesolr_nodeaccess_apachesolr_update_index($document, $node);
+        apachesolr_nodeaccess_apachesolr_update_index($document, $node, $namespace);
       }
       drupal_alter('apachesolr_attachment_index', $document, $node, $file);
 
@@ -321,6 +327,101 @@ function apachesolr_attachments_get_inde
   return $file_list;
 }
 
+/**
+ * Helper function to define the table headers for the failures form
+ * 
+ * @return Associative array of headers used in the theme layer and for table sorting
+ */
+function apachesolr_attachments_failures_form_headers() {
+  // Individual table headers.
+  $headers = array(
+    array('data' => t('Re-Index'), 'class' => 'checkbox'),
+    array('data' => t('File'), 'field' => 'filepath', 'sort' => 'asc'),
+    array('data' => t('Type'), 'field' => 'filemime', 'sort' => 'asc'),
+    array('data' => t('Attempts'), 'field' => 'attempts', 'sort' => 'desc'),
+    array('data' => t('Last Attempt'), 'field' => 'last_attempt', 'sort' => 'asc'),
+  );
+  return $headers;
+}
+
+/**
+ * Menu callback function to build the form listing all files that failed indexing
+ * 
+ * @return FAPI array representing 
+ */
+function apachesolr_attachments_failures_form() {
+  $form = array();
+  $options = array();
+  $limit = 10;
+  $files = apachesolr_attachments_get_failed_files($limit);
+  if(!empty($files)) {
+    foreach($files as $fid => $file) {
+      $options[$fid] = '';
+      $form['files'][$fid]['path'] = array(
+        '#value' => $file->filepath,
+      );
+      $form['files'][$fid]['type'] = array(
+        '#value' => $file->filemime,
+      );
+      $form['files'][$fid]['attempts'] = array(
+        '#value' => $file->attempts,
+      );
+      $form['files'][$fid]['last_attempt'] = array(
+        '#value' => date('Y-m-d h:m:s', $file->last_attempt),
+      );
+    }
+    $form['re-index'] = array(
+      '#type' => 'checkboxes',
+      '#options' => $options,
+    );
+    $form['submit'] = array(
+      '#type' => 'submit', 
+      '#value' => t('Re-index Files'),
+      '#description' => t('Send the the selected files to Apache Solr to be re-indexed.'),
+    );
+    $form['#redirect'] = 'admin/reports/apachesolr/attachments';
+  }
+  return $form;
+}
+
+function apachesolr_attachments_failures_form_validate($form, $form_state) {
+  $fids = array_unique(array_values($form_state['values']['re-index']));
+  if(count($fids) <=1 && ($fids[0] < 1)) {
+    form_set_error('re-index', 'You must select at least one file to re-index.');
+  }  
+}
+
+function apachesolr_attachments_failures_form_submit($form, $form_state) {
+  foreach($form_state['values']['re-index'] as $fid => $selected) {
+    if($selected) {
+      // Get each node id that this file is associated with
+      $result = db_query("SELECT nid FROM {apachesolr_attachments_files} WHERE fid = %d", $fid);
+      while($row = db_fetch_array($result)) {
+        // Flag each node as being needed to be updated in the index
+        apachesolr_mark_node($row['nid']);
+      }
+    }
+  }
+  drupal_set_message('The selected files will be re-indexed during the next cron run.');
+}
+
+/**
+ * Helper function to retrieve all of the files that failed indexing
+ * 
+ * @return Associative array of file objects
+ */
+function apachesolr_attachments_get_failed_files($limit=10) {
+  $files = array();
+  $sql = "SELECT f.*, af.attempts, af.last_attempt FROM {files} f LEFT JOIN {apachesolr_attachments_files} af ON af.fid = f.fid WHERE af.failed > 0";
+  $headers = apachesolr_attachments_failures_form_headers();
+  $sql .= tablesort_sql($headers);
+  $result = pager_query($sql, $limit);
+  while($file = db_fetch_object($result)) {
+    $files[$file->fid] = $file;
+  }
+  return $files;
+}
+
 function apachesolr_attachments_default_excluded() {
   $default = array('aif', 'art', 'avi', 'bmp', 'gif', 'ico', 'jpg', 'mov', 'mp3', 'mp4', 'mpg', 'oga', 'ogv', 'png', 'psd', 'ra', 'ram', 'rgb', 'tif',);
   return $default;
@@ -407,15 +508,23 @@ function apachesolr_attachments_get_atta
   }
   else {
     // Extract using Solr.
-    // We allow Solr to throw exceptions - they will be caught
-    // by apachesolr.module.
-    list($text, $metadata) = apachesolr_attachments_extract_using_solr($filepath);
+    try {
+      list($text, $metadata) = apachesolr_attachments_extract_using_solr($filepath);
+    }
+    catch(Exception $e) {
+      $result = db_query("SELECT nid, attempts FROM {apachesolr_attachments_files} WHERE fid = %d", $file->fid);
+      while($row = db_fetch_array($result)) {
+        // An exception occurred, so flag the file as failed and log the attempt
+        db_query("UPDATE {apachesolr_attachments_files} SET failed = %d, attempts = %d, last_attempt = %d WHERE fid = %d AND nid = %d", true, $row['attempts'] + 1, time(), $file->fid, $row['nid']);  
+      }
+      return;      
+    }
   }
   // Strip bad control characters.
   $text = iconv("UTF-8", "UTF-8//IGNORE", $text);
   $text = trim(apachesolr_clean_text($text));
   // Save the extracted, cleaned text to the DB.
-  db_query("UPDATE {apachesolr_attachments_files} SET sha1 = '%s', body = '%s' WHERE fid = %d", $sha1, $text, $file->fid);
+  db_query("UPDATE {apachesolr_attachments_files} SET sha1 = '%s', body = '%s', attempts = 1, last_attempt = %d WHERE fid = %d", $sha1, $text, time(), $file->fid);
 
   return $text;
 }
Index: apachesolr_attachments.install
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr_attachments/apachesolr_attachments.install,v
retrieving revision 1.6
diff -u -p -r1.6 apachesolr_attachments.install
--- apachesolr_attachments.install	20 Dec 2009 02:15:51 -0000	1.6
+++ apachesolr_attachments.install	3 Jun 2010 16:48:27 -0000
@@ -85,11 +85,28 @@ function apachesolr_attachments_schema()
         'type' => 'text',
         'not null' => TRUE,
         'size' => 'big'),
-     ),
+      'failed' => array(
+        'description' => 'flag to indicate if the file was unsuccessfully parsed',
+        'type' => 'int',
+        'unsigned' => TRUE,
+        'default' => '0',
+      ),
+      'attempts' => array(
+        'description' => 'a count of the number of attempts made to index the file.',
+        'type' => 'int',
+        'default' => '0',
+      ),
+      'last_attempt' => array(
+        'description' => 'timestamp indicating the last attempt to index the file.',
+    'type' => 'int',
+    'not null' => TRUE,
+      ),
+    ),
     'indexes' => array(
       'nid' => array('nid'),
       'removed' => array('removed'),
-      ),
+      'failed' => array('failed'),
+    ),
     'primary key' => array('fid'),
     );
 
@@ -130,3 +147,34 @@ function apachesolr_attachments_update_6
   apachesolr_clear_last_index('apachesolr_attachments');
   return $ret;
 }
+
+/**
+ * Add failed, attempts, and last_attempted fields to {apachesolr_attachments_files}.
+ */
+function apachesolr_attachments_update_6002() {
+  $ret = array();
+
+  $schema = array(
+    'description' => 'flag to indicate if the file was unsuccessfully parsed',
+    'type' => 'int',
+    'unsigned' => TRUE,
+    'default' => '0',
+  );
+  db_add_field($ret, 'apachesolr_attachments_files', 'failed', $schema);
+
+  $schema = array(
+    'description' => 'a count of the number of attempts made to index the file.',
+    'type' => 'int',
+    'default' => '0',
+  );
+  db_add_field($ret, 'apachesolr_attachments_files', 'attempts', $schema);
+
+  $schema = array(
+    'description' => 'timestamp indicating the last attempt to index the file.',
+    'type' => 'int',
+    'not null' => TRUE,
+  );
+  db_add_field($ret, 'apachesolr_attachments_files', 'last_attempt', $schema);
+  
+  return $ret;
+}
Index: apachesolr_attachments.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr_attachments/apachesolr_attachments.module,v
retrieving revision 1.19
diff -u -p -r1.19 apachesolr_attachments.module
--- apachesolr_attachments.module	3 Feb 2010 18:44:34 -0000	1.19
+++ apachesolr_attachments.module	3 Jun 2010 16:48:27 -0000
@@ -14,7 +14,7 @@ define (EXTRACTING_SERVLET, 'extract/tik
 function apachesolr_attachments_menu() {
   $items = array();
   $items['admin/settings/apachesolr/attachments'] = array(
-    'title' => 'File attachments',
+    'title' => t('File attachments'),
     'description' => 'Administer Apache Solr Attachments.',
     'page callback' => 'apachesolr_attachments_admin_page',
     'access arguments' => array('administer search'),
@@ -22,7 +22,7 @@ function apachesolr_attachments_menu() {
     'type' => MENU_LOCAL_TASK,
   );
   $items['admin/settings/apachesolr/attachments/confirm/reindex'] = array(
-    'title' => 'Reindex all files',
+    'title' => t('Reindex all files'),
     'page callback' => 'drupal_get_form',
     'page arguments' => array('apachesolr_attachments_confirm', 5),
     'access arguments' => array('administer search'),
@@ -30,7 +30,7 @@ function apachesolr_attachments_menu() {
     'type' => MENU_CALLBACK,
   );
   $items['admin/settings/apachesolr/attachments/confirm/delete'] = array(
-    'title' => 'Delete and reindex all files',
+    'title' => t('Delete and reindex all files'),
     'page callback' => 'drupal_get_form',
     'page arguments' => array('apachesolr_attachments_confirm', 5),
     'access arguments' => array('administer search'),
@@ -38,13 +38,21 @@ function apachesolr_attachments_menu() {
     'type' => MENU_CALLBACK,
   );
   $items['admin/settings/apachesolr/attachments/confirm/clear-cache'] = array(
-    'title' => 'Delete the local cache of file text',
+    'title' => t('Delete the local cache of file text'),
     'page callback' => 'drupal_get_form',
     'page arguments' => array('apachesolr_attachments_confirm', 5),
     'access arguments' => array('administer search'),
     'file' => 'apachesolr_attachments.admin.inc',
     'type' => MENU_CALLBACK,
   );
+  $items['admin/reports/apachesolr/attachments'] = array(
+    'title' => t('Apache Solr Attachments'),
+    'description' => t('List of files that failed to be indexed'),
+    'page callback' => 'drupal_get_form',
+    'page arguments' => array('apachesolr_attachments_failures_form'),
+    'access arguments' => array('administer search'),
+    'file' => 'apachesolr_attachments.admin.inc',
+  );
   return $items;
 }
 
@@ -238,3 +246,36 @@ function apachesolr_attachments_remove_a
   }
 }
 
+/**
+ * Implementation of hook_theme()
+ */
+function apachesolr_attachments_theme() {
+  return array(
+    'apachesolr_attachments_failures_form' => array(
+      'arguments' => array('form' => NULL),
+    ),
+  );
+}
+
+function theme_apachesolr_attachments_failures_form($form) {
+  $rows = array();
+  foreach (element_children($form['files']) as $key) {
+    $row = array();
+    if (isset($form['files'][$key]['path'])) {
+      $status = drupal_render($form['re-index'][$key]);
+      $row[] = array('data' => $status, 'class' => 'checkbox');
+      $row[] = ''.drupal_render($form['files'][$key]['path']).'';
+      $row[] = array('data' => drupal_render($form['files'][$key]['type']));
+      $row[] = array('data' => drupal_render($form['files'][$key]['attempts']));
+      $row[] = array('data' => drupal_render($form['files'][$key]['last_attempt']));
+      $rows[] = $row;
+    }
+  }
+  $headers = apachesolr_attachments_failures_form_headers();
+  $output = '<div>'.t('This form lists all of the files attached to nodes that failed to be indexed by Apache Solr.  When the form is submitted, each selected file will have '.
+    'it\'s parent node marked in the database as needing to be re-indexed, which in turn, will cause the attached files to be re-indexed during the next cron run.').'</div>';
+  $output .= theme('table', $headers, $rows);
+  $output .= theme('pager', NULL, $limit, 0);
+  $output .= drupal_render($form);
+  return $output;
+}
