Index: inform.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/inform/inform.module,v
retrieving revision 1.6
diff -u -p -r1.6 inform.module
--- inform.module	26 Oct 2009 09:47:33 -0000	1.6
+++ inform.module	29 Jan 2010 16:58:40 -0000
@@ -28,7 +28,7 @@ function inform_schema_alter(&$schema) {
     'length' => 2,
   );
 }
-/** 
+/**
  * Implementation of hook_cron().
  * If deferred processing is enabled then use cron rather than node_api save
  * to tag the articles.
@@ -37,16 +37,25 @@ function inform_cron() {
   $defer = variable_get('inform_defer_tagging', 0);
   $batch_size = variable_get('inform_batch_size', 50);
   $days = variable_get('inform_tag_age', 50);
-  if ($defer ==1) {
+  $semaphore = variable_get('inform_batch_running',0);
+  if ($defer ==1 && $semaphore == 0) {
+    watchdog("inform","Start cron batch.");
+    variable_set('inform_batch_running',1);
     for ($i = 0; $i < $batch_size; $i++) {
       $node = _inform_get_next_node_to_tag($days, $batch_size);
       // Break out of loop if there is no node to tag.
-      if ($node == NULL) {
+      if (!$node) {
         break;
       }
       _inform_tag_node($node);
     }
+    variable_set('inform_batch_running',0);
+    watchdog("inform","End cron batch.");
+  }
+  elseif ($semaphore == 1) {
+    watchdog("inform","Inform cron run not started as inform batch is already running.");
   }
+
 }
 
 /**
@@ -106,13 +115,14 @@ function _inform_extract_top_level_item(
         if (!$term_tid) {
           $edit = array('vid' => $vid, 'name' => $tag_item->Name, 'parent' => $top_tid);
           $status = taxonomy_save_term($edit);
+          watchdog("inform","Created new term %term.",array('%term' => $tag_item->Name));
           $term_tid = $edit['tid'];
         }
-        db_query('DELETE FROM {term_node} WHERE nid = %d AND vid = %d AND tid = %d', $node->nid, $node->vid, $term_tid);
-        db_query('INSERT INTO {term_node} (nid, vid, tid, inform_score) VALUES (%d, %d, %d, %d)', $node->nid, $node->vid, $term_tid, $tag_item->Score);
+
+        db_query('INSERT INTO {term_node} (nid, vid, tid, inform_score ) VALUES (%d, %d, %d, %d) ON DUPLICATE KEY UPDATE inform_score = %d', $node->nid, $node->vid, $term_tid, $tag_item->Score,$tag_item->Score);
       }
-      db_query('DELETE FROM {inform_node_update} WHERE nid = %d', $node->nid);
-      db_query('INSERT INTO {inform_node_update} (nid,timestamp) VALUES (%d,SYSDATE())', $node->nid);
+
+      db_query('INSERT INTO {inform_node_update} (nid,timestamp) VALUES (%d,SYSDATE()) ON  DUPLICATE KEY UPDATE timestamp=sysdate()', $node->nid);
     }
   }
 }
@@ -141,6 +151,7 @@ function _inform_tag_node($node) {
       );
       $object_result = $client->ExtractAll($params);
       if (!is_soap_fault($object_result)) {
+        watchdog("inform","Tagging Node %nid.",array('%nid' => $node->nid));
         // Clear the Inform tags for the node.
         db_query('DELETE tn FROM {term_node} tn INNER JOIN {term_data} td ON tn.tid = td.tid WHERE nid = %d  AND tn.vid = %d AND td.vid = %d', $node->nid, $node->vid , $vid);
         // Industries
@@ -155,9 +166,7 @@ function _inform_tag_node($node) {
         if (isset($object_result->ExtractAllResult->Entities->Entity)) {
           _inform_extract_top_level_item($node, INFORM_TERM_ENTITY, $object_result->ExtractAllResult->Entities->Entity);
         }
-        db_query('DELETE FROM {inform_node_update} WHERE nid = %d', $node->nid);
-        db_query('INSERT INTO {inform_node_update} (nid,timestamp) VALUES (%d,SYSDATE())', $node->nid);
-        
+      db_query('INSERT INTO {inform_node_update} (nid,timestamp) VALUES (%d,SYSDATE()) ON  DUPLICATE KEY UPDATE timestamp=sysdate()', $node->nid);
       }
       else {
         drupal_set_message(t('Fault contacting Inform web service. faultcode: $fault_code faultstring: $fault_string',
@@ -168,6 +177,7 @@ function _inform_tag_node($node) {
           ),
           'error');
         drupal_set_message(t('Tagging from Inform has not been done.'), 'error');
+        watchdog("inform","Inform web service failed.");
       }
     }
   }
@@ -175,12 +185,12 @@ function _inform_tag_node($node) {
 
 /**
  * Check if a term is blacklisted
- * 
+ *
  *  @param string $name
  *    Name of taxonomy item.
- *    
- *  @return boolean 
- *    TRUE if term is blacklisted  
+ *
+ *  @return boolean
+ *    TRUE if term is blacklisted
  */
 function _inform_check_blacklist($name) {
   //blacklisted items should be in the blacklist array
@@ -232,13 +242,14 @@ function _inform_get_top_level_entity_ti
       'parent' =>  array()
     );
     taxonomy_save_term($new_toplevel_term);
+    watchdog("inform","Created new sub section %term.",array('%term' => $category));
     $toplevel_tid = $new_toplevel_term['tid'];
   }
   return $toplevel_tid;
 }
 
 
-/** 
+/**
  * Validator for batch processing form.
  */
 
@@ -246,15 +257,15 @@ function _inform_valdate_batch($form, &$
   if (strlen($form_state['values']['age_check'])==0 and not(is_numeric($form_state['values']['age_check']))) {
     form_set_error('age_check', t('If selecting a number of days old it should be a whole number or blank.'));
   }
-  
-  
+
+
   if (strlen($form_state['values']['max_batch'])==0 and not(is_numeric($form_state['values']['max_batch']))) {
     form_set_error('max_batch', t('The batch size must be blank or a whole number.'));
   }
 }
 
 /**
- * Submit handler for batch processing form. 
+ * Submit handler for batch processing form.
  */
 
 function _inform_submit_batch($values) {
@@ -268,14 +279,32 @@ function _inform_submit_batch($values) {
       'error_message' => t('Tagging failed.'),
       'progress_message' => '',
       'operations' => array(
-        array('_inform_batch_process', array($max_days, $max_batch)), 
+        array('_inform_batch_process', array($max_days, $max_batch)),
       ),
       'finished' => '_inform_batch_finish',
     );
-    batch_set($batch);
+    if (variable_get('inform_batch_running',0) == 0) {
+      variable_set('inform_batch_running',1);
+      watchdog("inform","Start manual batch run.");
+      batch_set($batch);
+    }
+    else {
+      watchdog("inform","manual batch run terminated as inform batch is already running.");
+    }
 }
 
+/**
+ * function to work out if an item should be kept in the list of filtererd node types
+ */
 
+function _inform_filter_type($var) {
+  if ($var === 0) {
+    return false;
+  }
+  else {
+    return true;
+  }
+}
 
 /**
  * Get the next nid that requires tagging.
@@ -286,9 +315,8 @@ function _inform_submit_batch($values) {
  *   Clear static variables.
  *
  * @return node
- *   item that requires tagging, NULL if no nodes left to tag.  
+ *   item that requires tagging, NULL if no nodes left to tag.
  */
-
 function _inform_get_next_node_to_tag($days = NULL, $number_to_fetch = 10, $clear = FALSE) {
   static $result;
   if ($days == NULL) {
@@ -296,27 +324,30 @@ function _inform_get_next_node_to_tag($d
   }
   //Perform the query if needed.
   if ($clear || !$result) {
+    $vid = variable_get('inform_vocabulary', NULL);
+    $disabled_analysis = variable_get('inform_analyze_disabled_node_types', array());
+    $disabled_keys = array_keys(array_filter($disabled_analysis,"_inform_filter_type"));
+    // No easy way to code for empty disabled array.
+    array_push($disabled_keys, "DUMMY_ITEM"); 
+    $value_array = array_merge(array($vid, $days), $disabled_keys , array($vid), $disabled_keys);
     $result = db_query_range("SELECT n.nid FROM {node} n
-                              LEFT OUTER JOIN {inform_node_update} inu 
-                                ON n.nid = inu.nid
-                              INNER JOIN {vocabulary_node_types} vnt 
-                                ON n.type = vnt.type 
-                              WHERE 
-                                (
-                                  (
-                                    disregard is null
-                                  AND
-                                    timestamp is null
-                                  )
-                                OR
-                                  (
-                                    disregard =0 
-                                  AND 
-                                    inu.timestamp < date_sub(sysdate(), interval %d day)
-                                  )
-                                )
-                              ORDER BY n.nid DESC",
-                              array($days), 0, $number_to_fetch);
+                               JOIN {inform_node_update} inu ON n.nid = inu.nid
+                               INNER JOIN {vocabulary_node_types} vnt ON n.type = vnt.type 
+                               WHERE vnt.vid = %d AND
+                               disregard =0 AND
+                               inu.timestamp < date_sub(sysdate(), interval %d day) AND
+                               n.type not in (". db_placeholders($disabled_keys,"text") .")
+                              UNION
+                               SELECT n.nid FROM {node} n
+                               LEFT OUTER JOIN {inform_node_update} inu
+                               ON n.nid = inu.nid
+                               INNER JOIN {vocabulary_node_types} vnt
+                               ON n.type = vnt.type
+                               WHERE vnt.vid = %d AND
+                               n.type not in (". db_placeholders($disabled_keys,"text") .") AND
+                               inu.nid is null
+                               ORDER BY nid desc",
+                              $value_array, 0, $number_to_fetch);
   }
 
   $have_result = FALSE;
@@ -328,40 +359,36 @@ function _inform_get_next_node_to_tag($d
       // if $clear is set and there are no available rows, then there are no
       // rows left in the database, return 0
       if ($clear) {
-        return NULL;
-      } 
+        return FALSE;
+        watchdog("inform","No more records to get for inform, finished tagging (or bug).");
+      }
       else {
-        // If there are no rows set and clear is not set then try to fetch 
-        // some more results. Should only every recurse one level deep.
+        // If there are no rows set and clear is not set then try to fetch
+        // some more results. Should only ever recurse one level deep.
         return _inform_get_next_node_to_tag($days, $number_to_fetch, TRUE);
       }
-      
-    } 
+
+    }
     else {
       // There are rows in the current query we can use
     }
     $node = node_load($row['nid'], NULL, TRUE);
-    // If the node has "problem with headline" for the headline or no body then don't tag and set ignore to true. 
-    if ($node->title == "problem with headline" or $node->body == "") {
-      $inform_node_update_check = db_query("SELECT nid FROM {inform_node_update} WHERE nid = %d", array($row['nid']));
-      if (db_fetch_object($inform_node_update_check)) {
-        // Update the row to now ignore.
-        db_query("UPDATE {inform_node_update} SET disregard = 1 WHERE nid = %d", array($row['nid']));        
-      }
-      else {
-        // Insert a row for ignore.
-        db_query("INSERT INTO {inform_node_update} (nid,timestamp,disregard) VALUES (%d,sysdate(),1)", array($row['nid']));
-      }
+    // If the node has "problem with headline" for the headline or no body
+    // or is a disabled type then don't tag and set ignore to true.
+    if ($node->title == "problem with headline" OR $node->body == "") {
+      watchdog("inform","Setting node %nid to be disregarded by inform.",array('%nid' => $node->nid));
+      // Insert a row for ignore.
+      db_query("INSERT INTO {inform_node_update} (nid,timestamp,disregard) VALUES (%d,sysdate(),1) ON DUPLICATE KEY UPDATE disregard = 1", array($row['nid']));
     }
     else {
       $have_result = TRUE;
-    }  
+    }
   }
-  return $node;    
+  return $node;
 }
 
 /**
- * Batch processing 
+ * Batch processing
  */
 function _inform_batch_process($days, $batch_size, &$context) {
   if (empty($context['sandbox'])) {
@@ -372,7 +399,7 @@ function _inform_batch_process($days, $b
   for ($i = 0; $i < $batch_size; $i++) {
     $node = _inform_get_next_node_to_tag($days, $batch_size);
     // Break out of loop if there is no node to tag.
-    if ($node == NULL) {
+    if (!$node) {
       break;
     }
     _inform_tag_node($node);
@@ -393,6 +420,9 @@ function _inform_batch_finish($success, 
     $message = t('An error occurred while processing');
   }
   drupal_set_message($message);
+  //Turn the batch process semaphore off
+  watchdog("inform","End manual batch run.");
+  variable_set('inform_batch_running',0);
 }
 
 
@@ -402,7 +432,7 @@ function _inform_batch_finish($success, 
  *
  * Will send the data to Inform for processing on save.
  */
-function inform_nodeapi(&$node, $op, $a3 = NULL, $a4 = NULL) {
+function inform_nodeapi($node, $op, $a3 = NULL, $a4 = NULL) {
   switch ($op) {
     case 'insert':
     case 'update':
@@ -412,7 +442,9 @@ function inform_nodeapi(&$node, $op, $a3
       $batch_mode =sizeof(batch_get());
       // If the vocabulary is not assigned to the node type we shouldn't tag
       $assigned_vocab = db_result(db_query("SELECT count(*) FROM {vocabulary_node_types} vnt WHERE vnt.type = '%s'", array($node->type)));
-      if (!$defer && !$batch_mode && $assigned_vocab) {
+      // If the node type has been disabled we shouldn't tag.
+      $disabled_analysis = variable_get('inform_analyze_disabled_node_types', array());
+      if (!$defer && !$batch_mode && $assigned_vocab && empty($disabled_analysis[$node->type])) {
         $status = _inform_tag_node($node);
       }
       break;
@@ -438,7 +470,7 @@ function inform_block($op = 'list', $del
       case INFORM_BLOCK_TAGS:
         $subjects = inform_related_subjects();
         if (sizeof($subjects) > 0) {
-          $block['subject'] = t('Related subjects');
+          $block['subject'] = t('Related topics');
           $block['content'] = theme('item_list', $subjects);
         }
       break;
@@ -467,7 +499,7 @@ function inform_help($path, $arg) {
  * Implementation of hook_menu().
  */
 function inform_menu() {
-  
+
   $items['admin/settings/inform'] = array(
     'title' => 'Inform settings',
     'description' => 'Manage your Inform installation',
@@ -499,57 +531,59 @@ function inform_menu() {
 function inform_related_subjects($object = NULL) {
   if ($object==NULL) {
     $object = menu_get_item();
-    
   }
-  
+
   $cutoff = variable_get('inform_related_minimum_score', NULL);
   $vid = variable_get('inform_vocabulary', NULL);
   $count = variable_get('inform_terms_count', NULL);
-
+  
   //If the object is a menu object for a node then treat it as a node
   if (is_array($object) && isset($object['map']) && $object['map'][0] == 'node') {
     $object = $object['map'][1];
   }
-  
-  if (is_array($object) && isset($object['map']) && $object['map'][0] == 'taxonomy') {
-    $tid = $object['map'][2];
-    $result = db_query_range("SELECT {term_data}.tid as term_id,
-                                    coalesce(ts.name,{term_data}.name) as name,
-                                    sum(nt1.inform_score)
-                             FROM {term_data}
-                             INNER JOIN {term_node} nt1 ON nt1.tid = {term_data}.tid
-                         INNER JOIN {term_node} nt2 on nt1.nid = nt2.nid
-                             LEFT OUTER JOIN term_synonym ts ON ts.tid = term_data.tid
-                             WHERE nt2.tid = %d
-                             AND {term_data}.tid != nt2.tid
-                             GROUP BY {term_data}.tid, coalesce(ts.name,{term_data}.name)
-                             ORDER BY sum(nt1.inform_score) desc", array($tid), 0, $count);
-  }
-  elseif (isset($object->nid)) {
-    $check_type_query = db_query("SELECT n.nid from {node} n JOIN {vocabulary_node_types} vnt on n.type = vnt.type where n.nid = %d", array($object->nid));
-    //If this node type can have inform taxonomy then query and show
-    if (db_fetch_object($check_type_query)) {
-      // Don't use the standard taxonomy functions for this block as
-      // the Inform module has settings which are needed in the query.
-      $result = db_query_range("
-        SELECT td.tid as term_id, coalesce(ts.name,td.name) as name, tn.inform_score
-        FROM {term_data} td
-        JOIN {term_node} tn on tn.tid = td.tid
-        LEFT OUTER JOIN {term_synonym} ts on ts.tid = tn.tid
-        WHERE td.vid = %d
-        AND tn.nid = %d
-        AND tn.inform_score > %d
-        AND tn.inform_score != %d
-        ORDER BY inform_score DESC", array($vid, $object->nid, $cutoff), 0, $count);
 
+  if (isset($object->nid)) {
+    $cache_key = "inform-taxonomy-related-" . $object->nid;
+    if ($cache_result = cache_get($cache_key,"inform")) {
+      $links = $cache_result->data;
+      $set_cache = false;
+    }
+    else
+    {
+      $set_cache = true;
+      $check_type_query = db_query("SELECT n.nid from {node} n JOIN {vocabulary_node_types} vnt on n.type = vnt.type where n.nid = %d", array($object->nid));
+      //If this node type can have inform taxonomy then query and show
+      if (db_fetch_object($check_type_query)) {
+        // Don't use the standard taxonomy functions for this block as
+        // the Inform module has settings which are needed in the query.
+        $sql = "
+          SELECT td.tid as term_id, coalesce(ts.name,td.name) as name, tn.inform_score
+          FROM {term_data} td
+          JOIN {term_node} tn on tn.tid = td.tid
+          LEFT OUTER JOIN {term_synonym} ts on ts.tid = tn.tid
+          WHERE td.vid = %d
+          AND tn.nid = %d
+          AND tn.inform_score > %d
+          AND tn.inform_score != %d
+          ORDER BY inform_score DESC";
+        if (function_exists("db_query_range_slave")) {
+          $result = db_query_range_slave($sql, array($vid, $object->nid, $cutoff), 0, $count);
+         }
+         else {
+           $result = db_query_range($sql, array($vid, $object->nid, $cutoff), 0, $count);
+         }
+         while ($row = db_fetch_array($result)) {
+           $links[] = array(data => l($row['name'], 'taxonomy/term/' . $row['term_id']));
+         }
+      }
     }
   }
 
-  if (isset($result)) {
-    $links = array();
-    while ($row = db_fetch_array($result)) {
-      $links[] = array(data => l($row['name'], 'taxonomy/term/' . $row['term_id']));
-    }
+
+  if ($set_cache) {
+    cache_set($cache_key,$links,"inform");
+  } 
+  if (isset($links)) {
     return $links;
   }
 }
@@ -577,8 +611,8 @@ function inform_batch_settings() {
     '#default_value' => variable_get('inform_batch_size', 50),
     '#description' => t('The maxamum size of the batch.')
   );
-  
-  
+
+
   $form['submit'] = array(
     '#type' => 'submit',
     '#value' => t('Save'),
@@ -598,7 +632,7 @@ function inform_admin_settings() {
     '#default_value' => variable_get('inform_url', ''),
     '#description' => t('The URL of the extract web service for your Inform account'),
   );
-  
+
   $form['inform_itoken'] = array(
     '#type' => 'textfield',
     '#title' => t('iToken'),
@@ -629,7 +663,7 @@ function inform_admin_settings() {
     '#size' => 2,
     '#description' => t('The lowest score the related node must score in a category to get compared with the current node'),
   );
-  
+
   $form['inform_defer_tagging'] = array(
     '#type' => 'checkbox',
     '#title' => t('Defer tagging of articles'),
@@ -645,7 +679,7 @@ function inform_admin_settings() {
     '#size' => 2,
     '#description' => t('The age in days to wait before trying to re tag articles, set as 0 to disable re tagging.'),
   );
-  
+
   $form['inform_batch_size'] = array(
     '#type' => 'textfield',
     '#title' => t('Batch size'),
@@ -653,9 +687,36 @@ function inform_admin_settings() {
     '#size' => 2,
     '#description' => t('The default size of batches processed in each cron run. If set to 0 cron mode will be ineffective.'),
   );
-  
-  
-  
+
+  $form['inform_batch_running'] = array(
+    '#type' => 'checkbox',
+    '#title' => t('Unstick Batch'),
+    '#default_value' => variable_get('inform_batch_running', 0),
+    '#size' => 2,
+    '#description' => t('If an inform batch process has failed uncheck this box and submit the form to allow it to start again'),
+  );
+
+  // Figure out which node types are enabled.
+  $vocabulary = taxonomy_vocabulary_load(variable_get('inform_vocabulary', NULL));
+
+  // Only perform inform analysis and hiding of the taxonomy picker for certain node types (default, all where the vocab is enabled).
+  $enabled_types = variable_get('inform_analyze_node_types', FALSE);
+  $anonymous_problems = '';
+  foreach (node_get_types('names') as $type => $name) {
+    if (isset($vocabulary->nodes[$type])) {
+      $checkboxes[$type] = check_plain($name);
+    }
+  }
+
+  $form['inform_analyze_disabled_node_types'] = array(
+    '#type' => 'checkboxes',
+    '#title' => t('Content types to disable for Inform analysis'),
+    '#default_value' => variable_get('inform_analyze_disabled_node_types', $default),
+    '#options' => $checkboxes,
+    '#description' => t('Content types that have the vocabulary enabled and are checked here will not be analyzed by Inform and will have the form element visible on node add and node edit screens.'),
+  );
+
+
   $form["#validate"] = array(
     'inform_admin_settings_validate',
   );
@@ -672,15 +733,15 @@ function inform_admin_settings_validate(
     form_set_error('inform_url', t('You must specify the extract URL'));
   }
   else {
-    // Note: This will not fail gracefully with Xdebug on. 
+    // Note: This will not fail gracefully with Xdebug on.
     try {
       $client = new SoapClient(trim($form_state['values']['inform_url']), array());
     }
     catch (Exception $e) {
-      form_set_error('inform_url', t('The URL must be a valid web service'));  
+      form_set_error('inform_url', t('The URL must be a valid web service'));
     }
   }
-  
+
   if (strlen($form_state['values']['inform_itoken'])==0) {
     form_set_error('inform_itoken', t('You must specify the iToken'));
   }
@@ -706,13 +767,14 @@ function inform_admin_settings_validate(
 /**
  * Implementation of hook_form_alter().
  */
- 
+
 function inform_form_alter(&$form, $form_state, $form_id) {
-  // Remove the inform taxonomy as it gets to big to be 
-  // used effectivly.
+  // Remove the inform taxonomy if the admin has automatic analysis enabled
+  //  as it gets too big to be used effectively.
+  $disabled_analysis = variable_get('inform_analyze_disabled_node_types', array());
   $vid = variable_get('inform_vocabulary', NULL);
-  if (isset($form['taxonomy'][$vid])) {
-    unset($form['taxonomy'][$vid]);
+  if (isset($form['taxonomy']['tags'][$vid]) && empty($disabled_analysis[$form['type']['#value']])) {
+    unset($form['taxonomy']['tags'][$vid]);
   }
 }
 
@@ -727,15 +789,15 @@ function inform_form_alter(&$form, $form
 function inform_form_taxonomy_form_term_alter(&$form, $form_state) {
   $vid = variable_get('inform_vocabulary', FALSE);
   if ($form['#vocabulary']['vid'] == $vid) {
-    // Remove the delete button as it will have no effect on iform (terms will
+    // Remove the delete button as it will have no effect on Inform (terms will
     // keep coming back.
     unset($form['delete']);
-    // Add a blacklist button 
+    // Add a blacklist button
     $form['blacklist'] = array(
       '#type' => 'submit',
       '#value' => 'Blacklist',
     );
-    // Add a custom function to the submit action
+    // Add a custom function to the submit action.
     $form['#submit'][] = inform_form_term_submit;
   }
 }
@@ -779,7 +841,6 @@ function inform_form_taxonomy_form_vocab
  * @param $form_state
  * @return unknown_type
  */
-
 function inform_form_term_submit($form, &$form_state) {
   if ($form_state['clicked_button']['#value'] == t('Blacklist')) {
     $blacklist = variable_get('inform_blacklist', array());
@@ -804,8 +865,8 @@ function inform_reset_blacklist($form, &
     foreach ($blacklist_terms as $key => $value) {
       if ($value != "0") {
         $blacklist[$key] = $value;
-      } 
+      }
     }
     variable_set('inform_blacklist', $blacklist);
   }
-}
\ No newline at end of file
+}
