Index: search404.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/search404/search404.module,v
retrieving revision 1.14
diff -u -r1.14 search404.module
--- search404.module	20 Feb 2008 12:16:33 -0000	1.14
+++ search404.module	21 Feb 2008 21:52:00 -0000
@@ -1,75 +1,148 @@
 <?php
-/* $Id: search404.module,v 1.14 2008/02/20 12:16:33 zyxware Exp $ */
+/* $Id: search404.module,v 1.13.2.5 2007/04/22 15:12:26 forngren Exp $ */
 
 /**
  * Implementation of hook_menu().
  */
-function search404_menu($may_cache) {
-  $items = array();
+function search404_menu() {
 
-  if ($may_cache) {
-    $items[] = array('path' => 'search404',
-      'title' => t('Page not found'), 'access' => true,
-      'callback' => 'search404_page', 'type' => MENU_CALLBACK);
-    $items[] = array(
-      'path' => 'admin/settings/search404',
-      'title' => t('Search 404 settings'),
-      'description' => t('Administer search 404.'),
-      'callback' => 'drupal_get_form',
-      'callback argument' => array('search404_settings'),
-      'access' => user_access('administer site configuration'),
-      'type' => MENU_NORMAL_ITEM,
-    );
-  }
+  $items['search404'] = array(
+  'title' => t('Page not found'),
+  'access arguments' => array(true),
+  'page callback' => 'search404_page',
+  'type' => MENU_CALLBACK,
+  );
+
+  $items['admin/settings/search404'] = array(
+  'title' => t('Search 404 settings'),
+  'description' => t('Administer search 404.'),
+  'page callback' => 'drupal_get_form',
+  'page arguments' => array('search404_settings'),
+  'access arguments' => array('administer site configuration'),
+  'type' => MENU_NORMAL_ITEM,
+  );
 
   return $items;
 }
 
 /**
+ * Replacement for search_get_keys
+ * WARNING: This function can potentially return dangerous
+ *              potential SQL inject/XSS
+ * data. Return must be sanatized before use.
+ */
+function search404_get_keys() {
+  // Abort query on certain extensions, e.g: gif jpg jpeg png
+  $extensions = preg_split('/\s+/', variable_get('search404_ignore_query', 'gif jpg jpeg bmp png'));
+  $extensions = trim(implode('|', $extensions));
+  if (!empty($extensions) && preg_match("/\.($extensions)$/", $_REQUEST['destination'])) {
+    return false;
+  }
+  $keys = $_REQUEST['destination'];
+  // Ingore certain extensions from query
+  $extensions = preg_split('/\s+/', variable_get('search404_ignore_extensions', 'htm html php'));
+  $extensions = trim(implode('|', $extensions));
+  if (!empty($extensions)) {
+     $keys = preg_replace("/\.($extensions)$/", '', $keys);
+  }
+
+  $keys = preg_split('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', $keys);
+
+  // Ignore certain words
+  $keys = array_diff($keys, explode(' ', variable_get('search404_ignore', 'and or the')));
+
+  $modifier = variable_get('search404_use_or', false) ? ' OR ' : ' ';
+  $keys = trim(implode($modifier, $keys));
+  return $keys;
+}
+
+/**
+ * Detect search from search engine (BETA)
+ * WARNING: This function can potentially return dangerous
+ *              potential SQL inject/XSS
+ * data. Return must be sanatized before use.
+ */
+function search404_search_engine_query() {
+  $engines = array(
+    'altavista' => 'q',
+    'aol' => 'query',
+    'google' => 'q',
+    'live' => 'q',
+    'lycos' => 'query',
+    'msn' => 'q',
+    'yahoo' => 'p',
+  );
+  $parsed_url = parse_url($_SERVER['HTTP_REFERER']);
+  $remote_host = $parsed_url['host'];
+  $query_string = $parsed_url['query'];
+  parse_str($query_string, $query);
+
+  if (!$parsed_url === false && !empty($remote_host) && !empty($query_string) && count($query)) {
+    foreach ($engines as $host => $key) {
+      if (strpos($remote_host, $host) !== false && array_key_exists($key, $query)) {
+        return trim($query[$key]);
+      }
+    }
+  }
+
+  return false;
+}
+
+/**
  * Main search function.
- *
- * Updated: Improved by using (stealing) code by Steven
- * - http://drupal.org/node/12668
+ * Started with: http://drupal.org/node/12668
+ * Updated to be more similar to search_view
+ * Beware of messy code
  */
 function search404_page() {
-  $output = t('<p>The page you requested was not found.</p>');
+  $output = '<p>'. t('The page you requested was not found.') .'</p>';
 
-  if (user_access('search content')) {
-    $ignore = explode(' ', variable_get('search404_ignore', 'htm html php'));
-    
-    $q = $_REQUEST['destination'];
-    $keys = split('[^A-Za-z0-9]+', $q);
-    $keys = array_diff($keys, $ignore);
-    
-    $modifier = variable_get('search404_use_or', false) ? ' OR ' : ' ';
-    $keys = trim(implode($modifier, $keys));
+  if (module_exists('search') && user_access('search content')) {
+    $keys = "";
+    if (variable_get('search404_use_search_engine', false)) {
+      $keys = search404_search_engine_query();
+    }
+    if (!$keys) {
+      $keys = search404_get_keys();
+    }
     if ($keys) {
-      // Get search results
+      // TODO: watchdog?
       $results = module_invoke('node', 'search', 'search', $keys);
-
-      if (isset($results) && is_array($results) && count($results)) {
+      if (isset($results) && is_array($results) && count($results) == 1 && variable_get('search404_jump', false)) {
         // First, check to see if there is exactly 1 result
-        if (variable_get('search404_jump', false) && count($results) == 1) {
-          drupal_set_message(t('The page you requested does not exist. A search for &quot;<i>%keys</i>&quot; resulted in this page.', array('%keys' => $keys)));
-          // overwrite $_REQUEST['destination'] because it is set by drupal_not_found()
-          $_REQUEST['destination'] = 'node/' . $results[0]['node']->nid;
-          drupal_goto();
-        }
-
-        // Otherwise, redirect to a search page
-        drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query &quot;<i>%keys</i>&quot;.', array('%keys' => $keys)));
+        drupal_set_message(t('The page you requested does not exist. A search for %keys resulted in this page.', array('%keys' => check_plain($keys))), 'status');
         // overwrite $_REQUEST['destination'] because it is set by drupal_not_found()
-        $_REQUEST['destination'] = 'search/node/' . $keys;
-        drupal_goto();
+        $_REQUEST['destination'] = 'node/'. $results[0]['node']->nid;
+        drupal_goto('search/node');
       }
-      // If there are not results, tell the user
       else {
-        drupal_set_message(t('A search was performed for &quot;<i>%keys</i>&quot;, but nothing was found. Please use the form below to search all of our site content.', array('%keys' => $keys)));
-        // Redirect to the search page to allow the user to perform a different search.
-        $_REQUEST['destination'] = 'search';
-        drupal_goto();
+        drupal_set_message(t('The page you requested does not exist. For your convenience, a search was performed using the query %keys.', array('%keys' => check_plain($keys))), 'error');
+        if (isset($results) && is_array($results) && count($results) > 0) {
+          drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE);
+          // EVIL HAXX!
+            //$oldgetq = $_GET['q'];
+            //$olddestination = $_REQUEST['destination'];
+            //unset($_REQUEST['destination']);
+            $_GET['q'] = "search/node/$keys";
+            $results = theme('search_results', $results, 'node');
+            //$_GET['q'] = $oldgetq;
+            //$_REQUEST['destination'] = $olddestination;
+            $_REQUEST['destination'] = "search/node/$keys";
+            //drupal_goto();
+          // END OF EVIL HAXX!
+          drupal_goto();
+        }
+        else {
+          $results = search_help('search#noresults', drupal_help_arg());
+        }
+        $results = theme('box', t('Search results'), $results);
       }
     }
+
+    // Construct the search form.
+    //$output .= drupal_get_form('search_form', NULL, $keys, 'node');
+    $output = drupal_get_form('search_form', NULL, $keys, 'node');
+    $output .= $results;
   }
 
   return $output;
@@ -84,19 +157,36 @@
     '#title' => t('Jump directly to the search result when there is only one result.'),
     '#default_value' => variable_get('search404_jump', false),
   );
-
   $form['search404_use_or'] = array(
     '#type' => 'checkbox',
     '#title' => t('Use OR between keywords when searching.'),
     '#default_value' => variable_get('search404_use_or', false),
   );
-  
+  $form['search404_use_search_engine'] = array(
+    '#type' => 'checkbox',
+    '#title' => t('Use auto-detection of keywords from search engine referer. BETA! Not for production sites, use at your own risk.'),
+    '#default_value' => variable_get('search404_use_search_engine', false),
+  );
   $form['search404_ignore'] = array(
     '#type' => 'textfield',
     '#title' => t('Words to ignore'),
-    '#description' => t('Separate words with a space, e.g.: "htm html php".'),
-    '#default_value' => variable_get('search404_ignore', 'htm html php'),
+    '#description' => t('These words will be ignored from query. Separate words with a space, e.g.: "and or the".'),
+    '#default_value' => variable_get('search404_ignore', 'and or the'),
+  );
+  $form['search404_ignore_extensions'] = array(
+    '#type' => 'textfield',
+    '#title' => t('Extensions to ignore'),
+    '#description' => t('These extensions will be ignored from query. Separate extensions with a space, e.g.: "htm html php". Do not include leading dot.'),
+    '#default_value' => variable_get('search404_ignore_extensions', 'htm html php'),
+  );
+  $form['search404_ignore_query'] = array(
+    '#type' => 'textfield',
+    '#title' => t('Extensions to abort search'),
+    '#description' => t('A search will not be performed for a query ending in the following extensions. Separate extensions with a space, e.g.: "gif jpg jpeg bmp png". Do not include leading dot.'),
+    '#default_value' => variable_get('search404_ignore_query', 'gif jpg jpeg bmp png'),
   );
 
-   return system_settings_form($form);
-}
+  // Per module settings
+  $form = array_merge($form, module_invoke_all('search404', 'admin'));
+  return system_settings_form($form);
+}
\ No newline at end of file

