diff --git a/sites/all/modules/contrib/cdn/cdn.admin.inc b/sites/all/modules/contrib/cdn/cdn.admin.inc index 992fbd9..0da3839 100644 --- a/sites/all/modules/contrib/cdn/cdn.admin.inc +++ b/sites/all/modules/contrib/cdn/cdn.admin.inc @@ -403,6 +403,33 @@ function cdn_admin_other_settings_form($form, &$form_state) { ); } + // Fields for SEO-friendly page request redirection + $form['cdn_seo'] = array( + '#type' => 'fieldset', + '#title' => t('SEO: Page Request Redirection'), + '#description' => t("By default all CDNs will cache full HTML pages if + accessed. This means that a second copy of your site + will start to appear and be indexed by Google. This is + duplicate content and can cause Google to penalize your + site in search results. A duplicate site is also confusing + to the user and unprofessional. If you enable Page Request + Redirection below the module will redirect all Drupal + requests (e.g. not assets like JPGs and CSS files that are + served by your web server) to your real domain via an + SEO-friendly 301 redirect.") + ); + $form['cdn_seo'][CDN_SEO_REDIRECT_PAGE_REQUESTS_VARIABLE] = array( + '#type' => 'checkbox', + '#title' => t('Enable Page Request Redirection'), + '#default_value' => variable_get(CDN_SEO_REDIRECT_PAGE_REQUESTS_VARIABLE, CDN_SEO_REDIRECT_PAGE_REQUESTS_DEFAULT), + ); + $form['cdn_seo'][CDN_SEO_USER_AGENTS_VARIABLE] = array( + '#type' => 'textarea', + '#title' => t('CDN User-Agents'), + '#description' => t('A case-insentive list of User Agents that identify a request is from a CDN. One per line.'), + '#default_value' => variable_get(CDN_SEO_USER_AGENTS_VARIABLE, CDN_SEO_USER_AGENTS_DEFAULT), + ); + return system_settings_form($form); } diff --git a/sites/all/modules/contrib/cdn/cdn.module b/sites/all/modules/contrib/cdn/cdn.module index 52a1aa5..f635915 100644 --- a/sites/all/modules/contrib/cdn/cdn.module +++ b/sites/all/modules/contrib/cdn/cdn.module @@ -58,6 +58,11 @@ define('CDN_DAEMON_SYNCED_FILES_DB', 'synced_files.db'); define('CDN_DAEMON_PERSISTENT_DATA_DB', 'persistent_data.db'); define('CDN_DAEMON_FSMONITOR_DB', 'fsmonitor.db'); +// Variables for SEO redirection +define('CDN_SEO_REDIRECT_PAGE_REQUESTS_DEFAULT', TRUE); +define('CDN_SEO_REDIRECT_PAGE_REQUESTS_VARIABLE', 'cdn_seo_redirect_page_requests'); +define('CDN_SEO_USER_AGENTS_VARIABLE', 'cdn_seo_user_agents'); +define('CDN_SEO_USER_AGENTS_DEFAULT', "Amazon Cloudfront\nAkamai"); //---------------------------------------------------------------------------- // Drupal core. @@ -465,6 +470,46 @@ function cdn_init() { ) { drupal_add_css(drupal_get_path('module', 'cdn') . '/cdn.css', array('every_page' => TRUE)); } + + // Protect the CDN from returning content pages. We only want the CDN to + // return static files like images, CSS files, JavaScript files, etc. By default + // it will return anything. Since those static files aren't served by Drupal. + // this routine will only kick-in for pages on the Drupal site. + // @see http://drupal.org/node/1060358 + if(variable_get(CDN_SEO_REDIRECT_PAGE_REQUESTS_VARIABLE, CDN_SEO_REDIRECT_PAGE_REQUESTS_DEFAULT)) { + // getallheaders() is only available when using Apache. For things like + // nginx, we have to define our own function. + if (!function_exists('getallheaders')) { + function getallheaders() { + $headers = array(); + + // Loop to get all of the headers in the request + foreach ($_SERVER as $name => $value) { + if (substr($name, 0, 5) == 'HTTP_') { + // RFC2616 (HTTP/1.1) defines header fields as case-insensitive + $headers[strtolower(str_replace(' ', '-', ucwords(strtolower(str_replace('_', ' ', substr($name, 5))))))] = $value; + } + } + return $headers; + } + } + $headers = getallheaders(); + + // Pull and parse the user agents we are checking, as + // they will be line-break or comma delimited + $cdn_user_agents = explode("\n", str_replace(',', "\n", variable_get(CDN_SEO_USER_AGENTS_VARIABLE, CDN_SEO_USER_AGENTS_DEFAULT))); + array_walk($cdn_user_agents, create_function('&$val', '$val = strtolower(trim($val));')); + + if(isset($headers['user-agent']) && in_array(strtolower($headers['user-agent']), $cdn_user_agents)) { + // A 301 is SEO friendly, as it tells the search engine what the real URL is + // for this content. + header('HTTP/1.0 301 Moved Permanently'); + header('Location: ' . url('', array('absolute' => TRUE)) . substr($_SERVER['REQUEST_URI'], 1)); + + // To ensure this redirect occurs immediately we don't use drupal_exit(). + exit(); + } + } } /**