diff --git a/modules/linkchecker_taxonomy/linkchecker_taxonomy.info b/modules/linkchecker_taxonomy/linkchecker_taxonomy.info new file mode 100644 index 0000000..99facba --- /dev/null +++ b/modules/linkchecker_taxonomy/linkchecker_taxonomy.info @@ -0,0 +1,8 @@ +name = Link checker Taxonomy +description = "Periodically checks for broken links in taxonomies and reports the results." +configure = admin/config/content/linkchecker +package = "Link checker" +core = 7.x + +dependencies[] = taxonomy +dependencies[] = linkchecker diff --git a/modules/linkchecker_taxonomy/linkchecker_taxonomy.install b/modules/linkchecker_taxonomy/linkchecker_taxonomy.install new file mode 100644 index 0000000..2c28468 --- /dev/null +++ b/modules/linkchecker_taxonomy/linkchecker_taxonomy.install @@ -0,0 +1,53 @@ + 'Stores all link references for taxonomies.', + 'fields' => array( + 'tid' => array( + 'type' => 'int', + 'not null' => TRUE, + 'description' => 'Primary Key: Unique {taxonomy}.tid.', + ), + 'lid' => array( + 'type' => 'int', + 'not null' => TRUE, + 'description' => 'Primary Key: Unique {linkchecker_link}.lid.', + ), + ), + 'primary key' => array('tid', 'lid'), + 'foreign keys' => array( + 'tid' => array('taxonomy' => 'tid'), + 'lid' => array('linkchecker_link' => 'lid'), + ), + 'indexes' => array('lid' => array('lid')), + ); + + return $schema; +} + +/** + * If the core modules are disabled the integration need to be disabled. + */ +function linkchecker_taxonomy_modules_disabled($modules) { + // Disable link checks for taxonomy. + if (in_array('taxonomy', $modules)) { + variable_set('linkchecker_scan_taxonomies', 0); + drupal_set_message(t('Link checks for taxonomies have been disabled.')); + } +} diff --git a/modules/linkchecker_taxonomy/linkchecker_taxonomy.module b/modules/linkchecker_taxonomy/linkchecker_taxonomy.module new file mode 100644 index 0000000..ca00df7 --- /dev/null +++ b/modules/linkchecker_taxonomy/linkchecker_taxonomy.module @@ -0,0 +1,437 @@ +machine_name] = $vocabulary->name; + } + $form['settings']['linkchecker_scan_taxonomies'] = array( + '#type' => 'checkboxes', + '#prefix' => "
", + '#title' => t('Scan taxonomies for links'), + '#default_value' => variable_get('linkchecker_scan_taxonomies', array()), + '#options' => array_map('check_plain', $taxonomy_names), + '#description' => t('Enable link checking for the selected taxonomy vocabulary(ies).'), + ); + $form['settings']['linkchecker_check_links_types']['#weight'] = 10; + + $form['#submit'][] = 'linkchecker_taxonomy_admin_settings_form_submit'; + + $form['clear']['linkchecker_analyze']['#submit'][] = 'linkchecker_taxonomy_analyze_links_submit'; + $form['clear']['linkchecker_clear_analyze']['#submit'][] = 'linkchecker_taxonomy_clear_analyze_links_submit'; +} + +/** + * Submit callback. + * + * Analyze fields in all taxonomies. + */ +function linkchecker_taxonomy_analyze_links_submit($form, &$form_state) { + if (variable_get('linkchecker_scan_taxonomies', 0)) { + batch_set(_linkchecker_taxonomy_batch_import_taxonomies()); + } +} + +/** + * Submit callback. + * + * Clear link data and analyze fields in all content types, comments, custom + * blocks. + */ +function linkchecker_taxonomy_clear_analyze_links_submit($form, &$form_state) { + db_truncate('linkchecker_taxonomy')->execute(); + + if (variable_get('linkchecker_scan_taxonomies', 0)) { + batch_set(_linkchecker_taxonomy_batch_import_taxonomies()); + } +} + + +/** + * Linkchecker admin_settings_form submit handler. + */ +function linkchecker_taxonomy_admin_settings_form_submit($form, &$form_state) { + // If taxonomy scanning has been selected. + if ($form_state['values']['linkchecker_scan_taxonomies'] > $form['settings']['linkchecker_scan_taxonomies']['#default_value']) { + batch_set(_linkchecker_taxonomy_batch_import_taxonomies()); + } +} + + +/** + * Batch: Scan taxonomies for links. + */ +function _linkchecker_taxonomy_batch_import_taxonomies() { + $vocabularies = array_keys(array_filter(variable_get('linkchecker_scan_taxonomies', array()))); + $query = db_select('taxonomy_term_data', 'ttd') + ->fields('ttd', array('tid')) + ->orderBy('ttd.tid') + ->condition('tv.machine_name', $vocabularies); + $query->innerJoin('taxonomy_vocabulary', 'tv', 'tv.vid = ttd.vid'); + $result = $query->execute()->fetchAll(); + + $operations = array(); + foreach ($result as $row) { + $operations[] = array('_linkchecker_taxonomy_batch_terms_import_op', array($row->tid)); + } + $batch = array( + 'finished' => '_linkchecker_taxonomy_batch_terms_import_finished', + 'operations' => $operations, + 'title' => t('Scanning for links'), + ); + + return $batch; +} + +/** + * Batch operation: Scan one by one term for links. + */ +function _linkchecker_taxonomy_batch_terms_import_op($tid, &$context) { + // Load the term and scan for links. + $term = taxonomy_term_load($tid); + _linkchecker_taxonomy_add_taxonomy_links($term); + + // Store results for post-processing in the finished callback. + $context['results'][] = $term->tid; + $context['message'] = t('Taxonomy term: @title', array('@title' => $term->name)); +} + +/** + * Implements hook_taxonomy_term_insert(). + */ +function linkchecker_taxonomy_taxonomy_term_insert($term) { + if (_linkchecker_taxonomy_scan_taxonomy_vocabulary($term->vocabulary_machine_name)) { + _linkchecker_taxonomy_add_taxonomy_links($term); + } +} + +/** + * Implements hook_taxonomy_term_update(). + */ +function linkchecker_taxonomy_taxonomy_term_update($term) { + if (_linkchecker_taxonomy_scan_taxonomy_vocabulary($term->vocabulary_machine_name)) { + _linkchecker_taxonomy_add_taxonomy_links($term); + } +} + + +/** + * Implements hook_taxonomy_term_delete(). + */ +function linkchecker_taxonomy_taxonomy_term_delete($term) { + if (_linkchecker_taxonomy_scan_taxonomy_vocabulary($term->vocabulary_machine_name)) { + _linkchecker_taxonomy_delete_taxonomy_links($term); + } +} + +/** + * Add taxonomy links to database. + * + * @param object $term + * The fully populated taxonomy term object. + * @param bool $skip_missing_links_detection + * To prevent endless batch loops the value need to be TRUE. With FALSE + * the need for content re-scans is detected by the number of missing links. + */ +function _linkchecker_taxonomy_add_taxonomy_links($term, $skip_missing_links_detection = FALSE) { + + $filter = new stdClass(); + $filter->settings['filter_url_length'] = 72; + + // Create array of taxonomy fields to scan. + $text_items = array(); + $text_items = array_merge( + $text_items, + _linkchecker_parse_fields('taxonomy_term', $term->vocabulary_machine_name, $term) + ); + + // Get the absolute node path for extraction of relative links. + $path = url('taxonomy/term/' . $term->tid); + + // Extract all links in term. + $links = array_keys(_linkchecker_extract_links(implode(' ', $text_items), $path)); + + // Taxonomy have links. + if (!empty($links)) { + // Remove all links from the links array already in the database and only + // add missing links to database. + $missing_links = _linkchecker_taxonomy_links_missing($term->tid, $links); + + // Only add unique links to database that do not exist. + $i = 0; + foreach ($missing_links as $url) { + $urlhash = drupal_hash_base64($url); + $link = db_query('SELECT lid FROM {linkchecker_link} WHERE urlhash = :urlhash', array(':urlhash' => $urlhash))->fetchObject(); + if (!$link) { + $link = new stdClass(); + $link->urlhash = $urlhash; + $link->url = $url; + $link->status = _linkchecker_link_check_status_filter($url); + drupal_write_record('linkchecker_link', $link); + } + db_insert('linkchecker_taxonomy') + ->fields(array( + 'tid' => $term->tid, + 'lid' => $link->lid, + )) + ->execute(); + + // Break processing if max links limit per run has been reached. + $i++; + if ($i >= LINKCHECKER_SCAN_MAX_LINKS_PER_RUN) { + break; + } + } + + // The first chunk of links not yet found in the {linkchecker_link} table + // have now been imported by the above code. If the number of missing links + // still exceeds the scan limit defined in LINKCHECKER_SCAN_MAX_LINKS_PER_RUN + // the content need to be re-scanned until all links have been collected and + // saved in {linkchecker_link} table. + // + // Above code has already scanned a number of LINKCHECKER_SCAN_MAX_LINKS_PER_RUN + // links and need to be substracted from the number of missing links to + // calculate the correct number of re-scan rounds. + // + // To prevent endless loops the $skip_missing_links_detection need to be TRUE. + // This value will be set by the calling batch process that already knows + // that it is running a batch job and the number of required re-scan rounds. + $missing_links_count = count($missing_links) - LINKCHECKER_SCAN_MAX_LINKS_PER_RUN; + if (!$skip_missing_links_detection && $missing_links_count > 0) { + module_load_include('inc', 'linkchecker', 'linkchecker.batch'); + batch_set(_linkchecker_taxonomy_batch_import_single_term($term->tid, $missing_links_count)); + + // If batches were set in the submit handlers, we process them now, + // possibly ending execution. We make sure we do not react to the batch + // that is already being processed (if a batch operation performs a + // drupal_execute). + if ($batch = &batch_get() && !isset($batch['current_set'])) { + batch_process('taxonomy/term/' . $term->tid); + } + } + } + + // Remove dead link references for cleanup reasons as very last step. + _linkchecker_taxonomy_cleanup_term_references($term->tid, $links); +} + +/** + * Should the defined taxonomy vocabulary scanned for links? + * + * @param string $vocabulary_machine_name + * Verifies if the taxonomy vocabulary is enabled for link checks and should be scanned. + * + * @return bool + * TRUE if taxonomy vocabulary should be scanned, otherwise FALSE. + */ +function _linkchecker_taxonomy_scan_taxonomy_vocabulary($vocabulary_machine_name = NULL) { + + $enabled = FALSE; + $vocabularies = array_keys(array_filter(variable_get('linkchecker_scan_taxonomies', array()))); + + // Scan specific node types only. + if (in_array($vocabulary_machine_name, $vocabularies)) { + $enabled = TRUE; + } + + return $enabled; +} + +/** + * Cleanup no longer used node references to links in the linkchecker_taxonomy table. + */ +function _linkchecker_taxonomy_cleanup_term_references($tid = 0, $links = array()) { + if (empty($links)) { + // Term do not have links. Delete all references if exists. + db_delete('linkchecker_taxonomy') + ->condition('tid', $tid) + ->execute(); + } + else { + // The term still have more than one link, but other links may have been + // removed and links no longer in the content need to be deleted from the + // linkchecker_taxonomy reference table. + $subquery = db_select('linkchecker_link') + ->fields('linkchecker_link', array('lid')) + ->condition('urlhash', array_map('drupal_hash_base64', $links), 'IN'); + + db_delete('linkchecker_taxonomy') + ->condition('tid', $tid) + ->condition('lid', $subquery, 'NOT IN') + ->execute(); + } +} + +/** + * Returns an array of taxonomy references missing in the linkchecker_taxonomy table. + */ +function _linkchecker_taxonomy_links_missing($tid, $links) { + $result = db_query('SELECT ll.url FROM {linkchecker_link} ll INNER JOIN {linkchecker_taxonomy} lt ON lt.lid = ll.lid WHERE lt.tid = :tid AND ll.urlhash IN (:urlhashes)', array(':tid' => $tid, ':urlhashes' => array_map('drupal_hash_base64', $links))); + $links_in_database = array(); + foreach ($result as $row) { + $links_in_database[] = $row->url; + } + return array_diff($links, $links_in_database); +} + +/** + * Remove all term references to links in the linkchecker_taxonomy table. + */ +function _linkchecker_taxonomy_delete_taxonomy_links($tid) { + db_delete('linkchecker_taxonomy') + ->condition('tid', $tid) + ->execute(); +} + +/** + * Recurring scans of a single term via batch API. + * + * @param int $tid + * The unique term id to scan for links. + * @param int $missing_links_count + * The number of links not yet added to linkchecker_links table. By this + * number the re-scan rounds are calulated. + * + * @return array + * The batch task definition. + */ +function _linkchecker_taxonomy_batch_import_single_term($tid, $missing_links_count) { + $operations = array(); + for ($i = 0; $i <= $missing_links_count; $i = $i + (int) LINKCHECKER_SCAN_MAX_LINKS_PER_RUN) { + $operations[] = array('_linkchecker_taxonomy_batch_single_term_import_op', array($tid)); + } + $batch = array( + 'file' => drupal_get_path('module', 'linkchecker') . '/linkchecker.batch.inc', + 'finished' => '_linkchecker_taxonomy_batch_single_term_import_finished', + 'operations' => $operations, + 'title' => t('Scanning for links'), + 'progress_message' => t('Remaining @remaining of @total scans.'), + ); + + return $batch; +} + +/** + * Output term batch result messages. + * + * @param bool $success + * If scan completed successfully or not. + * @param int $results + * Number of terms scanned. + * @param array $operations + * Array of functions called. + */ +function _linkchecker_taxonomy_batch_terms_import_finished($success, $results, $operations) { + if ($success) { + $message = format_plural(count($results), 'One term has been scanned.', '@count terms have been scanned.'); + } + else { + $message = t('Scanning for links in terms have failed with an error.'); + } + drupal_set_message($message); +} + +/** + * Returns IDs of terms that contain a link which the current user is allowed to view. + * + * @param object $link + * An object representing the link to check. + * @return array + * An array of term IDs that contain the provided link and that the + * current user is allowed to view. + */ +function linkchecker_taxonomy_link_term_ids($link) { + // Exit if terms are disabled. + if (!variable_get('linkchecker_scan_taxonomies', 0)) { + return array(); + } + + // Get a list of term containing the link. + $query = db_select('taxonomy_term_data', 't'); + $query->addMetaData('base_table', 'taxonomy_term_data'); + $query->innerJoin('linkchecker_taxonomy', 'lt', 'lt.tid = t.tid'); + $query->condition('lt.lid', $link->lid); + $query->fields('t', array('tid')); + $tids = $query->execute()->fetchCol(); + + // Return the array of term IDs. + if ($tids) { + return $tids; + } + return array(); +} + +/** + * Implements hook_linkchecker_taxonomy_linkchecker_auto_repair_301_links_alter(). + */ +function linkchecker_taxonomy_linkchecker_auto_repair_301_links_alter($link, $response) { +// TAXONOMY: Autorepair all terms having this outdated link. + $result = db_query('SELECT tid FROM {linkchecker_taxonomy} WHERE lid = :lid', array(':lid' => $link->lid)); + foreach ($result as $row) { + // Explicitly don't use taxonomy_term_load_multiple() or the module may run + // into issues like http://drupal.org/node/1210606. With this logic + // term can be updated until an out of memory occurs and further + // updates will be made on the remaining terms only. + $term = taxonomy_term_load($row->tid); + + // Has the term object loaded successfully? + if (is_object($term)) { + $term_original = clone $term; + + // Replace links in subject. + _linkchecker_link_replace($term->name, $link->url, $response->redirect_url); + + // Replace links in fields. + $term = _linkchecker_replace_fields('taxonomy_Term', $term->vocabulary_machine_name, $term, $link->url, $response->redirect_url); + + // Save changed comment and update the comment link list. + if ($term_original != $term) { + taxonomy_term_save($term); + watchdog('linkchecker', 'Changed permanently moved link in term %term from %src to %dst.', array('%term' => $term->tid, '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO); + } + else { + watchdog('linkchecker', 'Link update in term failed. Permanently moved link %src not found in term %term. Manual fix required.', array('%term' => $term->tid, '%src' => $link->url), WATCHDOG_WARNING); + } + } + else { + watchdog('linkchecker', 'Loading term %term for update failed. Manual fix required.', array('%term' => $term->tid), WATCHDOG_ERROR); + } + } +} + +/** + * Implements hook_linkcheker_main_table(). + */ +function linkchecker_taxonomy_linkcheker_main_table() { + return array('linkchecker_taxonomy'); +} + +/** + * Implements hook_linkchecker_report_page_links_alter(). + */ +function linkchecker_taxonomy_linkchecker_report_page_links_alter(&$links, $link) { + $tids = module_invoke('linkchecker_taxonomy', 'linkchecker_link_ids', $link); + + // Show link to comments having this broken link. + if (variable_get('linkchecker_scan_taxonomies', 0) && !empty($tids)) { + foreach ($tids as $tid) { + $links[] = l(t('Edit term @term', array('@term' => $tid)), 'taxonomy/term/' . $tid . '/edit', array('query' => drupal_get_destination())); + } + } +} + +/** + * Implements hook_linkchecker_link_ids(). + */ +function linkchecker_taxonomy_linkchecker_link_ids($link) { + return linkchecker_taxonomy_link_term_ids($link); +} \ No newline at end of file