Index: INSTALL.txt =================================================================== RCS file: /cvs/drupal/drupal/INSTALL.txt,v retrieving revision 1.39.2.2 diff -u -p -r1.39.2.2 INSTALL.txt --- INSTALL.txt 26 Jul 2007 05:29:58 -0000 1.39.2.2 +++ INSTALL.txt 10 Jan 2008 20:36:40 -0000 @@ -22,7 +22,7 @@ are created automatically. REQUIREMENTS ------------ -Drupal requires a web server, PHP4 (4.3.3 or greater) or PHP5 +Drupal requires a web server, PHP4 (4.3.5 or greater) or PHP5 (http://www.php.net/) and either MySQL (http://www.mysql.com/) or PostgreSQL (http://www.postgresql.org/). The Apache web server and MySQL database are recommended; other web server and database combinations such as IIS and Index: includes/bootstrap.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/bootstrap.inc,v retrieving revision 1.145.2.7 diff -u -p -r1.145.2.7 bootstrap.inc --- includes/bootstrap.inc 28 Oct 2007 04:53:14 -0000 1.145.2.7 +++ includes/bootstrap.inc 10 Jan 2008 20:50:47 -0000 @@ -626,9 +626,48 @@ function referer_uri() { /** * Encode special characters in a plain-text string for display as HTML. + * + * Uses drupal_validate_utf8 to prevent cross site scripting attacks on + * Internet Explorer 6. */ function check_plain($text) { - return htmlspecialchars($text, ENT_QUOTES); + return drupal_validate_utf8($text) ? htmlspecialchars($text, ENT_QUOTES) : ''; +} + +/** + * Checks whether a string is valid UTF-8. + * + * All functions designed to filter input should use drupal_validate_utf8 + * to ensure they operate on valid UTF-8 strings to prevent bypass of the + * filter. + * + * When text containing an invalid UTF-8 lead byte (0xC0 - 0xFF) is presented + * as UTF-8 to Internet Explorer 6, the program may misinterpret subsequent + * bytes. When these subsequent bytes are HTML control characters such as + * quotes or angle brackets, parts of the text that were deemed safe by filters + * end up in locations that are potentially unsafe; An onerror attribute that + * is outside of a tag, and thus deemed safe by a filter, can be interpreted + * by the browser as if it were inside the tag. + * + * This function exploits preg_match behaviour (since PHP 4.3.5) when used + * with the u modifier, as a fast way to find invalid UTF-8. When the matched + * string contains an invalid byte sequence, it will fail silently. + * + * preg_match may not fail on 4 and 5 octet sequences, even though they + * are not supported by the specification. + * + * The specific preg_match behaviour is present since PHP 4.3.5. + * + * @param $text + * The text to check. + * @return + * TRUE if the text is valid UTF-8, FALSE if not. + */ +function drupal_validate_utf8($text) { + if (strlen($text) == 0) { + return TRUE; + } + return (preg_match('/^./us', $text) == 1); } /** Index: modules/filter/filter.module =================================================================== RCS file: /cvs/drupal/drupal/modules/filter/filter.module,v retrieving revision 1.160.2.4 diff -u -p -r1.160.2.4 filter.module --- modules/filter/filter.module 7 Jan 2008 01:17:59 -0000 1.160.2.4 +++ modules/filter/filter.module 10 Jan 2008 20:36:40 -0000 @@ -1268,6 +1268,11 @@ function filter_xss_admin($string) { * The format to use. */ function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) { + // Only operate on valid UTF-8 strings. This is necessary to prevent cross + // site scripting issues on Internet Explorer 6. + if (!drupal_validate_utf8($string)) { + return ''; + } // Store the input format _filter_xss_split($allowed_tags, TRUE); // Remove NUL characters (ignored by some browsers) Index: modules/system/system.install =================================================================== RCS file: /cvs/drupal/drupal/modules/system/system.install,v retrieving revision 1.69.2.8 diff -u -p -r1.69.2.8 system.install --- modules/system/system.install 7 Nov 2007 05:53:24 -0000 1.69.2.8 +++ modules/system/system.install 10 Jan 2008 20:43:16 -0000 @@ -1,7 +1,7 @@