Index: test.php
===================================================================
RCS file: test.php
diff -N test.php
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ test.php	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,43 @@
+<?php
+// $Id: index.php,v 1.96 2008/09/20 20:22:23 webchick Exp $
+
+/**
+ * @file
+ * The PHP page that serves all page requests on a Drupal installation.
+ *
+ * The routines here dispatch control to the appropriate handler, which then
+ * prints the appropriate page.
+ *
+ * All Drupal code is released under the GNU General Public License.
+ * See COPYRIGHT.txt and LICENSE.txt.
+ */
+
+/**
+ * Root directory of Drupal installation.
+ */
+define('DRUPAL_ROOT', dirname(realpath(__FILE__)));
+
+require_once DRUPAL_ROOT . '/includes/bootstrap.inc';
+drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
+
+require_once DRUPAL_ROOT . '/includes/browser/browser.inc';
+$browser = Browser::getInstance();
+//$request = $browser->get('http://ipchicken.com/');
+//$request = $browser->get('http://www.archive.org/');
+
+$fields = array(
+  'name' => 'drupal',
+  'pass' => 'password',
+);
+$request = $browser->post(url('', array('absolute' => TRUE)), $fields, 'Log in');
+
+if ($request) {
+  echo $request['content'];
+
+//  $page = $browser->getPage();
+//  echo ($page ? 'true' : 'false');
+//  print_r($page->xpath('//img[@src="images/ipc.gif"]'));
+}
+else {
+  echo 'FAIL!';
+}
Index: includes/browser/wrapper.inc
===================================================================
RCS file: includes/browser/wrapper.inc
diff -N includes/browser/wrapper.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ includes/browser/wrapper.inc	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,26 @@
+<?php
+// $Id$
+
+/**
+ * @file
+ * Text web browser for Drupal.
+ */
+
+interface HttpWrapper {
+
+  public static function info();
+
+  public function open();
+
+  public function close();
+
+  public function getRequestHeaders();
+
+  public function setRequestHeaders($headers = array());
+
+  public function get($url);
+
+  public function post($url, array $fields);
+
+  public function request($method);
+}
Index: includes/browser/stream.inc
===================================================================
RCS file: includes/browser/stream.inc
diff -N includes/browser/stream.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ includes/browser/stream.inc	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,9 @@
+<?php
+// $Id$
+
+/**
+ * @file
+ * Curl implementation of for the browser.
+ */
+
+// Best file in Drupal.
Index: includes/browser/curl.inc
===================================================================
RCS file: includes/browser/curl.inc
diff -N includes/browser/curl.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ includes/browser/curl.inc	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,164 @@
+<?php
+// $Id$
+
+/**
+ * @file
+ * Curl implementation of for the browser.
+ */
+
+class HttpWrapper_curl implements HttpWrapper {
+
+  /**
+   * The handle of the current cURL connection.
+   *
+   * @var resource
+   */
+  protected $handle;
+
+  /**
+   * The current cookie file used by cURL.
+   *
+   * We do not reuse the cookies in further runs, so we do not need a file
+   * but we still need cookie handling, so we set the jar to NULL.
+   */
+  protected $cookieFile = NULL;
+
+  protected $request_headers = array();
+
+  protected $url;
+
+  protected $headers;
+
+  protected $content;
+
+  public function Browser_curl() {
+
+  }
+
+  public static function info() {
+    return array(
+      'name' => 'cURL',
+      'cookies' => TRUE,
+      'methods' => array('POST', 'GET'),
+    );
+  }
+
+  public function open() {
+    if (!isset($this->handle)) {
+      $this->handle = curl_init();
+      curl_setopt_array($this->handle, $this->getDefaultOptions());
+    }
+  }
+
+  /**
+   * Close the cURL handler and unset the handler.
+   */
+  public function close() {
+    if (isset($this->handle)) {
+      curl_close($this->handle);
+      unset($this->handle);
+    }
+  }
+
+  protected function getDefaultOptions() {
+    return array(
+      CURLOPT_COOKIEJAR => $this->cookieFile,
+      CURLOPT_FOLLOWLOCATION => TRUE,
+      CURLOPT_HEADERFUNCTION => array(&$this, 'headerCallback'),
+      CURLOPT_HTTPHEADER => $this->request_headers,
+      CURLOPT_RETURNTRANSFER => TRUE,
+      CURLOPT_SSL_VERIFYPEER => FALSE,
+      CURLOPT_SSL_VERIFYHOST => FALSE,
+      CURLOPT_URL => '/',
+      CURLOPT_USERAGENT => $this->request_headers['User-Agent'],
+    );
+  }
+
+  public function getRequestHeaders() {
+    return $this->request_headers;
+  }
+
+  public function setRequestHeaders($headers = array()) {
+    $this->request_headers = $headers;
+
+    // Update request headers if handle is open.
+    if (isset($this->handle)) {
+      curl_setopt($this->handle, CURLOPT_USERAGENT, $this->request_headers['User-Agent']);
+      curl_setopt($this->handle, CURLOPT_HTTPHEADER, $this->request_headers);
+    }
+  }
+
+  public function get($url) {
+    $this->execute(array(
+      CURLOPT_HTTPGET => TRUE,
+      CURLOPT_URL => $url,
+      CURLOPT_NOBODY => FALSE,
+    ));
+
+    return $this->buildRequest();
+  }
+
+  public function post($url, array $fields) {
+    // TODO Add upload handling code.
+
+    foreach ($fields as $key => $value) {
+      // Encode according to application/x-www-form-urlencoded
+      // Both names and values needs to be urlencoded, according to
+      // http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1
+      $fields[$key] = urlencode($key) . '=' . urlencode($value);
+    }
+
+    $this->execute(array(
+      CURLOPT_POST => TRUE,
+      CURLOPT_URL => $url,
+      CURLOPT_POSTFIELDS => implode('&', $fields),
+    ));
+
+    return $this->buildRequest();
+  }
+
+  public function request($method) {
+    // TODO CURLOPT_CUSTOMREQUEST
+  }
+
+  protected function buildRequest() {
+    if ($this->content !== FALSE) {
+      return array(
+        'url' => $this->url,
+        'headers' => $this->headers,
+        'content' => $this->content,
+      );
+    }
+    return FALSE;
+  }
+
+  /**
+   * Performs a cURL exec with the specified options after calling curlConnect().
+   *
+   * @param $options
+   *   Changes to the current cURL options.
+   */
+  protected function execute($options) {
+    $this->open();
+
+    curl_setopt_array($this->handle, $options);
+    $this->content = curl_exec($this->handle);
+    $this->url = curl_getinfo($this->handle, CURLINFO_EFFECTIVE_URL);
+    // $this->headers should be filled by headerCallback.
+  }
+
+  /**
+   * Reads headers and stores in $headers array.
+   *
+   * @param $curlHandler
+   *   The cURL handler.
+   * @param $header
+   *   An header.
+   * @return
+   *   The string length of the header. (required by cURL)
+   */
+  protected function headerCallback($handler, $header) {
+    $this->headers[] = $header;
+    return strlen($header);
+  }
+}
Index: includes/browser/page.inc
===================================================================
RCS file: includes/browser/page.inc
diff -N includes/browser/page.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ includes/browser/page.inc	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,239 @@
+<?php
+// $Id$
+
+/**
+ * @file
+ * BrowserPage class used with Browser.
+ */
+
+/**
+ * Represents a page of content that has been fetched by the Browser. The class
+ * provides a number of convenience methods that relate to page content.
+ */
+class BrowserPage {
+
+  /**
+   * The URL of the current page.
+   *
+   * @var string
+   */
+  protected $url;
+
+  /**
+   * The headers of the page current page.
+   *
+   * @var Array
+   */
+  protected $headers;
+
+  /**
+   * The root element of the loaded content.
+   *
+   * @var SimpleXMLElement
+   */
+  protected $root;
+
+  /**
+   * Load content into page.
+   *
+   * @param $content
+   *   Content to load.
+   */
+  public function BrowserPage($url, $headers, $content) {
+    $this->url = $url;
+    $this->headers = $headers;
+    $this->root = $this->load($content);
+  }
+
+  /**
+   * Load contents into simplexml.
+   *
+   * @param $content
+   *   Content to load.
+   * @return
+   *   Root SimpleXML element or FALSE.
+   */
+  protected function load($content) {
+    // Use DOM to load HTML soup, and hide warnings.
+    $dom = DOMDocument::loadHTML($content);
+    if ($dom) {
+      return simplexml_import_dom($dom);
+    }
+    return FALSE;
+  }
+
+  /**
+   * Check if the content could be loaded.
+   *
+   * @return
+   *   TRUE if content is loaded, FALSE if content failed to load.
+   */
+  public function isValid() {
+    return ($this->root !== FALSE);
+  }
+
+  /**
+   * Peform an xpath search on the contents of the page. The search is relative
+   * to the root element (HTML tag normally) of the page.
+   *
+   * @param $xpath
+   *   The xpath string to use in the search.
+   * @return
+   *   The return value of the xpath search. For details on the xpath string
+   *   format and return values see the SimpleXML documentation.
+   *   http://us.php.net/manual/function.simplexml-element-xpath.php
+   */
+  public function xpath($xpath) {
+    return $this->root->xpath($xpath);
+  }
+
+  /**
+   * Get all the form elements contained by the page.
+   *
+   * @return
+   *   An array of form elements.
+   */
+  public function getForms() {
+    return $this->xpath('//form');
+  }
+
+  /**
+   * Get all the input elements contained by the page, or nested within a form
+   * when specified.
+   *
+   * @param $form
+   *   Searched for inputs contained by the form.
+   * @return
+   *   Array of input elements.
+   */
+  public function getInputs($form = NULL) {
+    if ($form) {
+      return $form->xpath('.//input|.//textarea|.//select');
+    }
+    return $this->xpath('.//input|.//textarea|.//select');
+  }
+
+  public function getField() {
+    // TODO
+  }
+
+  /**
+   * Get all the options contained by a select, including nested options.
+   *
+   * @param $select
+   *   The select to get the options from.
+   * @return
+   *   Associative array where the keys represent each option value and the
+   *   value is the text contained within the option tag. For example:
+   * @code
+   *   array(
+   *     'option1' => 'Option 1',
+   *     'option2' => 'Option 2',
+   *   )
+   * @endcode
+   */
+  public function getSelectOptions(SimpleXMLElement $select) {
+    $elements = getSelectElements($select);
+
+    $options = array();
+    foreach ($elements as $element) {
+      $options[(string) $element['value']] = asText($element);
+    }
+    return $options;
+  }
+
+  /**
+   * Get all selected options contained by a select, including nested options.
+   *
+   * @param $select
+   *   The select to get the options from.
+   * @return
+   *   Associative array of selected items in the format described by
+   *   getSelectOptions().
+   */
+  public function getSelectedOptions(SimpleXMLElement $select) {
+    $elements = getSelectElements($select);
+
+    $options = array();
+    foreach ($elements as $element) {
+      if (isset($elements['selected'])) {
+        $options[(string) $element['value']] = asText($element);
+      }
+    }
+    return $options;
+  }
+
+  /**
+   * Get all the options contained by a select, including nested options.
+   *
+   * @param $element
+   *   The element to get the options from.
+   * @return
+   *   An array of options contained by the select.
+   */
+  public function getSelectElements(SimpleXMLElement $element) {
+    $options = array();
+
+    // Add all options items.
+    foreach ($element->option as $option) {
+      $options[] = $option;
+    }
+
+    // Search option group children.
+    if (isset($element->optgroup)) {
+      foreach ($element->optgroup as $group) {
+        $options = array_merge($options, $this->getAllOptions($group));
+      }
+    }
+    return $options;
+  }
+
+  public function getAbsoluteUrl($path) {
+    $parts = @parse_url($path);
+    if (isset($parts['scheme'])) {
+      return $path;
+    }
+
+    $base = $this->getBaseUrl();
+    if ($path[0] == '/') {
+      // Lead / then use host as base.
+      $parts = parse_url($base);
+      $base = $parts['scheme'] . '://' . $parts['host'];
+    }
+    return $base . $path;
+  }
+
+  public function getBaseUrl() {
+    // Check for base element.
+    $elements = $this->xpath('.//base');
+    if ($elements) {
+      // More than one may be specified.
+      foreach ($elements as $element) {
+        if (isset($element['href'])) {
+          $base = (string) $element['href'];
+          break;
+        }
+      }
+    }
+    else {
+      $base = substr($this->url, strpos($this->url, '?'));
+    }
+
+    if ($base[strlen($base) - 1] != '/') {
+      $base .= '/';
+    }
+    return $base;
+  }
+
+  /**
+   * Extract the text contained by the element.
+   *
+   * @param $element
+   *   Element to extract text from.
+   * @return
+   *   Extracted text.
+   */
+  public function asText(SimpleXMLElement $element) {
+    return trim(html_entity_decode(strip_tags($element->asXML())));
+  }
+}
Index: includes/browser/browser.inc
===================================================================
RCS file: includes/browser/browser.inc
diff -N includes/browser/browser.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ includes/browser/browser.inc	1 Jan 1970 00:00:00 -0000
@@ -0,0 +1,351 @@
+<?php
+// $Id$
+
+/**
+ * @file
+ * Text web browser for Drupal.
+ */
+
+/*
+ * TODO
+ *
+ * Hooks for get, post events.
+ * HTTP authentication
+ * Decide on parsing which will allow for meta refresh suppose
+ * Deal with Drupal-Assertion callback stuff.
+ * Support PUT, DELETE, HEAD, and OPTIONS -chx
+ *   http://moggy.laceous.com/2008/08/01/custom-request-methods-and-php/
+ *   "RESTfully designed web-services use POST, GET, PUT, and DELETE"
+ * Error handling.
+ * Deal a from containing one button without a name, even no value?
+ * Page support proxy behavior.
+ */
+
+require_once './includes/browser/page.inc';
+
+class Browser {
+
+  /**
+   * The DrupalBrowser instance.
+   *
+   * @var DrupalBrowser
+   */
+  protected static $browser;
+
+  protected $wrapper;
+
+  protected $info;
+
+  /**
+   * The URL of the current page.
+   *
+   * @var string
+   */
+  protected $url;
+
+  /**
+   * The headers of the page current page.
+   *
+   * @var Array
+   */
+  protected $headers;
+
+  /**
+   * The content of the page currently loaded in the internal browser.
+   *
+   * @var string
+   */
+  protected $content;
+
+  protected $page;
+
+  protected function Browser() {
+    // TODO use variable to switch between curl and stream.
+    require_once './includes/browser/wrapper.inc';
+    require_once './includes/browser/curl.inc';
+
+    $this->wrapper = new HttpWrapper_curl();
+    $this->info = HttpWrapper_curl::info();
+    $this->setUserAgent('Drupal (+http://drupal.org/)');
+  }
+
+  final public static function getInstance() {
+    if (!isset(self::$browser)) {
+      self::$browser = new Browser();
+    }
+    return self::$browser;
+  }
+
+  public function getUserAgent() {
+    $headers = $this->getRequestHeaders();
+    return $headers['User-Agent'];
+  }
+
+  public function setUserAgent($agent) {
+    $headers = $this->getRequestHeaders();
+    $headers['User-Agent'] = $agent;
+    $this->wrapper->setRequestHeaders($headers);
+  }
+
+  public function getRequestHeaders() {
+    return $this->wrapper->getRequestHeaders();
+  }
+
+  public function setRequestHeaders(array $headers) {
+    $this->wrapper->setRequestHeaders($headers);
+  }
+
+  /**
+   * Make an HTTP GET request to the specified URL.
+   *
+   * @param $url
+   *   Full URL to retrieve.
+   * @return
+   *   Associative array...
+   */
+  public function get($url) {
+    if (!$this->isMethodSupported('GET')) {
+      return FALSE;
+    }
+
+    $request = $this->wrapper->get($url);
+
+    // TODO Error check, look for meta refresh, etc.
+    $this->setState($request['url'], $request['headers'], $request['content']);
+    return $request;
+  }
+
+  public function post($url, array $fields, $submit) {
+    if (!$this->isMethodSupported('POST')) {
+      return FALSE;
+    }
+
+    // If URL is set then request the page, otherwise use the current page.
+    if ($url) {
+      $this->get($url);
+    }
+    else {
+      $url = $this->url;
+    }
+
+    if (($page = $this->getPage()) === FALSE) {
+      return FALSE;
+    }
+
+    if (($form = $this->findForm($fields, $submit)) === FALSE) {
+      return FALSE;
+    }
+
+    // If form specified action then use that for the post url.
+    if ($form['action']) {
+      $url = $page->getAbsoluteUrl($form['action']);
+    }
+
+    $request = $this->wrapper->post($url, $form['post']);
+
+    // TODO Error check, look for meta refresh, etc.
+    $this->setState($request['url'], $request['headers'], $request['content']);
+    return $request;
+  }
+
+  protected function findForm(array $fields, $submit) {
+    $page = $this->getPage();
+
+    $forms = $page->getForms();
+    foreach ($forms as $form) {
+      if (($post = $this->processForm($form, $fields, $submit)) !== FALSE) {
+        $action = (isset($form['action']) ? (string) $form['action'] : FALSE);
+        return array(
+          'action' => $action,
+          'post' => $post,
+        );
+      }
+    }
+    return FALSE;
+  }
+
+  protected function processForm($form, $fields, $submit) {
+    $page = $this->getPage();
+
+    $post = array();
+    $submit_found = FALSE;
+    $inputs = $page->getInputs($form);
+    foreach ($inputs as $input) {
+      $name = (string) $input['name'];
+      $html_value = isset($input['value']) ? (string) $input['value'] : '';
+
+      // Get type from input vs textarea and select.
+      $type = isset($input['type']) ? (string) $input['type'] : $input->getName();
+
+      if (isset($fields[$name])) {
+        if ($type == 'file') {
+          // TODO deal with upload.
+          // Known type, field processed.
+          unset($fields[$name]);
+        }
+        elseif (($processed_value = $this->processField($input, $type, $fields[$name], $html_value)) !== NULL) {
+          // Value may be ommitted (checkbox).
+          if ($processed_value !== FALSE) {
+            if (is_array($processed_value)) {
+              $post += $processed_value;
+            }
+            else {
+              $post[$name] = $processed_value;
+            }
+          }
+          // Known type, field processed.
+          unset($fields[$name]);
+        }
+      }
+
+      // No post value for the field means that: no post field value specified,
+      // the value does not match the field (checkbox, radio, select), or the
+      // field is of an unknown type.
+      if (!isset($post[$name])) {
+        // No value specified so use default value (value in HTML).
+        if (($default_value = $this->getDefaultFieldValue($input, $type, $html_value)) !== NULL) {
+          $post[$name] = $default_value;
+          unset($fields[$name]);
+        }
+      }
+
+      // Check if the
+      if (($type == 'submit' || $type == 'image') && $submit == $html_value) {
+        $post[$name] = $html_value;
+        $submit_found = TRUE;
+      }
+    }
+
+    if ($submit_found) {
+      return $post;
+    }
+    return FALSE;
+  }
+
+  protected function processField($input, $type, $new_value, $html_value) {
+    switch ($type) {
+      case 'text':
+      case 'textarea':
+      case 'password':
+        return $new_value;
+      case 'radio':
+        if ($new_value == $html_value) {
+          return $new_value;
+        }
+        return NULL;
+      case 'checkbox':
+        // If $new_value is set to FALSE then ommit checkbox value, otherwise
+        // pass original value.
+        if ($new_value === FALSE) {
+          return FALSE;
+        }
+        return $html_value;
+      case 'select':
+        // Remove the ending [] from multi-select element name.
+        $key = preg_replace('/\[\]$/', '', (string) $input['name']);
+
+        $options = $page->getSelectOptions($input);
+        $index = 0;
+        $out = array();
+        foreach ($options as $value => $text) {
+          if (is_array($value)) {
+            if (in_array($value, $new_value)) {
+              $out[$key . '[' . $index++ . ']'] = $value;
+            }
+          }
+          elseif ($new_value == $value) {
+            return $new_value;
+          }
+        }
+        return ($out ? $out : NULL);
+      default:
+        return NULL;
+    }
+  }
+
+  protected function getDefaultFieldValue($input, $type, $html_value) {
+    switch ($type) {
+      case 'textarea':
+        return (string) $input;
+      case 'select':
+        // Remove the ending [] from multi-select element name.
+        $key = preg_replace('/\[\]$/', '', (string) $input['name']);
+        $single = empty($input['multiple']);
+
+        $options = $page->getSelectElements($input);
+        $first = TRUE;
+        $index = 0;
+        $out = array();
+        foreach ($options as $option) {
+          // For single select, we load the first option, if there is a
+          // selected option that will overwrite it later.
+          if ($option['selected'] || ($first && $single)) {
+            $first = FALSE;
+            if ($single) {
+              $out[$key] = (string) $option['value'];
+            }
+            else {
+              $out[$key . '[' . $index++ . ']'] = (string) $option['value'];
+            }
+          }
+          return ($single ? $out[$key] : $out);
+        }
+        break;
+      case 'file':
+        return NULL;
+      case 'radio':
+      case 'checkbox':
+        if (!isset($input['checked'])) {
+          return NULL;
+        }
+        // Deliberately no break.
+      default:
+        return $html_value;
+    }
+  }
+
+  protected function processFields($form, $fields) {
+
+  }
+
+  public function request($method) {
+    if (!$this->isMethodSupported($method)) {
+      return FALSE;
+    }
+
+    // TODO Support abitrary method.
+  }
+
+  public function isMethodSupported($method) {
+    return in_array(strtoupper($method), $this->info['methods']);
+  }
+
+  protected function setState($url, $headers, $content) {
+    $this->url = $url;
+    $this->headers = $headers;
+    $this->content = $content;
+    unset($this->page);
+
+//    module_invoke_all('browser_request', self::$browser); // TODO decide on hooks
+  }
+
+  /**
+   * Gets the current raw HTML of the last requested page.
+   *
+   * @return
+   *   Raw HTML of last requested page.
+   */
+  public function getContent() {
+    return $this->content;
+  }
+
+  public function getPage() {
+    if (!isset($this->page)) {
+      $this->page = new BrowserPage($this->url, $this->headers, $this->content);
+      if (!$this->page->isValid()) {
+        return FALSE;
+      }
+    }
+    return $this->page;
+  }
+}
