commit e562e71abc9f8039d24c20aee8bda6d4591f3956
Author: Benjamin Doherty <bjd@pobox.com>
Date:   Thu Aug 12 03:20:53 2010 -0500

    latest patch handles all extra node term associations.

diff --git modules/simpletest/simpletest.info modules/simpletest/simpletest.info
index 63f61e6..076c606 100644
--- modules/simpletest/simpletest.info
+++ modules/simpletest/simpletest.info
@@ -40,3 +40,4 @@ files[] = tests/update.test
 files[] = tests/xmlrpc.test
 files[] = tests/upgrade/upgrade.test
 files[] = tests/upgrade/upgrade.poll.test
+files[] = tests/upgrade/upgrade.taxonomy.test
diff --git modules/simpletest/tests/upgrade/upgrade.taxonomy.test modules/simpletest/tests/upgrade/upgrade.taxonomy.test
new file mode 100644
index 0000000..f088564
--- /dev/null
+++ modules/simpletest/tests/upgrade/upgrade.taxonomy.test
@@ -0,0 +1,49 @@
+<?php
+// $Id$
+
+/**
+ * Test taxonomy upgrades.
+ */
+class UpgradePathTaxonomyTestCase extends UpgradePathTestCase {
+  public static function getInfo() {
+    return array(
+      'name'  => 'Taxonomy upgrade path',
+      'description'  => 'Taxonomy upgrade path tests.',
+      'group' => 'Upgrade path',
+    );
+  }
+
+  public function setUp() {
+    // Path to the database dump.
+    $this->databaseDumpFile = drupal_get_path('module', 'simpletest') . '/tests/upgrade/drupal-6.filled.database.php';
+    parent::setUp();
+  }
+
+  /**
+   * Basic tests for the taxonomy upgrade.
+   */
+  public function testTaxonomyUpgrade() {
+    $this->assertTrue($this->performUpgrade(), t('The upgrade was completed successfully.'));
+
+    // Visit the front page to assert for PHP warning and errors.
+    $this->drupalGet('');
+
+    // TODO: Check that taxonomy_vocabulary_node_type and taxonomy_term_node
+    // have been removed.
+
+    // TODO: Check that the node type 'page' has been associated to a taxonomy
+    // reference field for each vocabulary.
+
+    // TODO: Check that the node type 'story' has been associated to a taxonomy
+    // reference field for each vocabulary. It was not explicitely in
+    // $vocabulary->nodes but each node of type 'story' was associated to
+    // one or more terms.
+
+    // TODO: Check that the node type 'poll' has been associated to no taxonomy
+    // reference field.
+
+    // TODO: Check that each nodes of type 'page' and 'story' is associated to
+    // all the terms, except terms 'nid' and term '49 - nid'
+
+  }
+}
diff --git modules/taxonomy/taxonomy.install modules/taxonomy/taxonomy.install
index 385f556..3c7403f 100644
--- modules/taxonomy/taxonomy.install
+++ modules/taxonomy/taxonomy.install
@@ -394,7 +394,74 @@ function taxonomy_update_7004() {
       field_create_instance($instance);
     }
   }
-  db_drop_table('taxonomy_vocabulary_node_type');
+
+  // Some contrib projects stored term node associations without regard for the
+  // selections in the taxonomy_vocabulary_node_types table.
+  $node_types = array();
+  $result = db_query('SELECT DISTINCT td.vid, n.type FROM {taxonomy_term_node} tn JOIN {node} n ON tn.nid = n.nid JOIN {taxonomy_term_data} td ON tn.tid = td.tid');
+  foreach ($result as $record) {
+    if (empty($vocabularies[$record->vid]->nodes[$record->type])) {
+      $node_types[$record->vid][$record->type] = $record->type;
+      unset($record->type);
+      $record->nodes = $node_types[$record->vid];
+      $extra_vocabularies[$record->vid] = $record;
+    }
+  }
+
+  if (!empty($extra_vocabularies)) {
+
+    $node_types = array();
+
+    // Allowed values for this extra vocabs field is every vocabulary not
+    // included by a core node type association.
+    foreach ($extra_vocabularies as $vid => $record) {
+      $allowed_values[] = array(
+        'vid' => $vid,
+        'parent' => 0,
+      );
+      $node_types = array_merge($node_types, $record->nodes);
+    }
+
+    // TODO: Bike shed this field name.
+    $field_name = 'taxonomyextra_upgrade';
+    $field = array(
+      'field_name' => $field_name,
+      'type' => 'taxonomy_term_reference',
+      'cardinality' => FIELD_CARDINALITY_UNLIMITED,
+      'settings' => array(
+        'required' => FALSE,
+        'allowed_values' => $allowed_values,
+      ),
+    );
+    field_create_field($field);
+
+    foreach ($node_types as $bundle) {
+      $instance = array(
+
+        // There is no valid instance.
+        'label' => 'Taxonomy upgrade extras',
+        'field_name' => $field_name,
+        'bundle' => $bundle,
+        'entity_type' => 'node',
+        'description' => 'Debris left over after upgrade from Drupal 6',
+        'widget' => array(
+          'type' => 'taxonomy_autocomplete',
+        ),
+        'display' => array(
+          'default' => array(
+            'type' => 'taxonomy_term_reference_link',
+            'weight' => 10,
+          ),
+          'teaser' => array(
+            'type' => 'taxonomy_term_reference_link',
+            'weight' => 10,
+          ),
+        ),
+      );
+      field_create_instance($instance);
+    }
+  }
+
   $fields = array('help', 'multiple', 'required', 'tags');
   foreach ($fields as $field) {
     db_drop_field('taxonomy_vocabulary', $field);
@@ -405,15 +472,6 @@ function taxonomy_update_7004() {
  * Migrate {taxonomy_term_node} table to field storage.
  */
 function taxonomy_update_7005(&$sandbox) {
-  // Since we are upgrading from Drupal 6, we know that only
-  // field_sql_storage.module will be enabled.
-  $field = field_info_field($field['field_name']);
-  $data_table = _field_sql_storage_tablename($field);
-  $revision_table = _field_sql_storage_revision_tablename($field);
-  $etid = _field_sql_storage_etid('node');
-  $value_column = $field['field_name'] . '_value';
-  $columns = array('etid', 'entity_id', 'revision_id', 'bundle', 'delta', $value_column);
-
   // This is a multi-pass update. On the first call we need to initialize some
   // variables.
   if (!isset($sandbox['total'])) {
@@ -423,22 +481,76 @@ function taxonomy_update_7005(&$sandbox) {
     $query = db_select('taxonomy_term_node', 't');
     $sandbox['total'] = $query->countQuery()->execute()->fetchField();
     $found = (bool) $sandbox['total'];
+    $result = db_query('SELECT v.*, n.type FROM {taxonomy_vocabulary} v LEFT JOIN {taxonomy_vocabulary_node_type} n ON v.vid = n.vid ORDER BY v.weight, v.name');
+
+    $vocabularies = array();
+    foreach ($result as $record) {
+      // If no node types are associated with a vocabulary, the LEFT JOIN will
+      // return a NULL value for type.
+      if (isset($record->type)) {
+        $vocabularies[$record->vid][$record->type] = 'taxonomy_'. $record->machine_name;
+      }
+    }
+
+    $result = db_query('SELECT DISTINCT v.vid, n.type FROM {taxonomy_term_node} tn LEFT JOIN {node} n ON tn.nid = n.nid LEFT JOIN {taxonomy_term_data} td ON tn.tid = td.tid LEFT JOIN {taxonomy_vocabulary} v ON v.vid = td.vid ORDER BY v.weight, v.name');
+    foreach ($result as $record) {
+      if (empty($vocabularies[$record->vid][$record->type])) {
+        $vocabularies[$record->vid][$record->type] = 'taxonomyextra_upgrade';
+      }
+    }
+
+    if (!empty($vocabularies)) {
+      $sandbox['vocabularies'] = $vocabularies;
+    }
   }
   else {
+    // Grab the current (first) vocabulary. When this vocabulary's terms have
+    // all been updated, it will be removed from the sandbox.
+    reset($sandbox['vocabularies']);
+    $vid = key($sandbox['vocabularies']);
+    $type = key($sandbox['vocabularies'][$vid]);
+
+    // Since we are upgrading from Drupal 6, we know that only
+    // field_sql_storage.module will be enabled.
+    $field = field_info_field($sandbox['vocabularies'][$vid][$type]);
+    $data_table = _field_sql_storage_tablename($field);
+    $revision_table = _field_sql_storage_revision_tablename($field);
+    $etid = _field_sql_storage_etid('node');
+    $value_column = $field['field_name'] . '_tid';
+    $columns = array('etid', 'entity_id', 'revision_id', 'bundle', 'language', 'delta', $value_column);
+
     // We do each pass in batches of 1000, this should result in a
     // maximum of 2000 insert queries each operation.
-    $batch = 1000 + $sandbox['last'];
+    $batch = 1000;
+
+    // Track if we found rows in that run.
+    $found = FALSE;
 
     // Query and save data for the current revision.
-    $result = db_query_range('SELECT td.tid, tn.nid, td.weight, tn.vid, n2.type, n2.created, n2.sticky FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid INNER JOIN {node} n2 ON tn.nid = n2.nid INNER JOIN {node} n ON tn.vid = n.vid AND td.vid = :vocabulary_id ORDER BY td.weight ASC', array(':vocabulary_id' => $vocabulary->vid), $sandbox['last'], $batch);
+    $result = db_query_range('SELECT td.tid, tn.nid, td.weight, tn.vid, n2.type, n2.created, n2.sticky FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid INNER JOIN {node} n2 ON tn.nid = n2.nid INNER JOIN {node} n ON tn.vid = n.vid AND n2.type = :node_type AND td.vid = :vocabulary_id ORDER BY td.weight ASC', $sandbox['last'], $batch, array(':vocabulary_id' => $vid, ':node_type' => $type));
+
     $deltas = array();
     foreach ($result as $record) {
       $found = TRUE;
       $sandbox['count'] += 1;
+      if ($field['field_name'] == 'taxonomyextra_upgrade') {
+        $query = db_select($data_table)
+          ->groupBy('etid')
+          ->groupBy('entity_id')
+          ->groupBy('language')
+          ->condition('etid', $etid)
+          ->condition('entity_id', $record->nid)
+          ->condition('language', LANGUAGE_NONE);
+        $query->addExpression('MAX(delta)');
+        $max = $query->execute()->fetchField();
+        if ($max !== FALSE) {
+          $deltas[$record->nid] = $max;
+        }
+      }
       // Start deltas from 0, and increment by one for each
       // term attached to a node.
       $deltas[$record->nid] = isset($deltas[$record->nid]) ? ++$deltas[$record->nid] : 0;
-      $values = array($etid, $record->nid, $record->vid, $record->type, $deltas[$record->nid], $record->tid);
+      $values = array($etid, $record->nid, $record->vid, $record->type, LANGUAGE_NONE, $deltas[$record->nid], $record->tid);
       db_insert($data_table)->fields($columns)->values($values)->execute();
 
       // Update the {taxonomy_index} table.
@@ -449,20 +561,55 @@ function taxonomy_update_7005(&$sandbox) {
     }
 
     // Query and save data for all revisions.
-    $result = db_query('SELECT td.tid, tn.nid, td.weight, tn.vid, n.type FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid AND td.vid = :vocabulary_id INNER JOIN {node} n ON tn.nid = n.nid ORDER BY td.weight ASC', array(':vocabulary_id' => $vocabulary->vid), $sandbox['last'][$batch]);
+    $result = db_query_range('SELECT td.tid, tn.nid, td.weight, tn.vid, n.type FROM {taxonomy_term_data} td INNER JOIN {taxonomy_term_node} tn ON td.tid = tn.tid AND td.vid = :vocabulary_id INNER JOIN {node} n ON tn.nid = n.nid AND n.type = :node_type ORDER BY td.weight ASC', $sandbox['last'], $batch, array(':vocabulary_id' => $vid, ':node_type' => $type));
     $deltas = array();
     foreach ($result as $record) {
       $found = TRUE;
       $sandbox['count'] += 1;
+      if ($field['field_name'] == 'taxonomyextra_upgrade') {
+        $query = db_select($revision_table)
+          ->groupBy('etid')
+          ->groupBy('revision_id')
+          ->groupBy('language')
+          ->condition('etid', $etid)
+          ->condition('revision_id', $record->vid)
+          ->condition('language', LANGUAGE_NONE);
+        $query->addExpression('MAX(delta)');
+        $max = $query->execute()->fetchField();
+        if ($max !== FALSE) {
+          $deltas[$record->vid] = $max;
+        }
+      }
       // Start deltas at 0, and increment by one for each term attached to a revision.
       $deltas[$record->vid] = isset($deltas[$record->vid]) ? ++$deltas[$record->vid] : 0;
-      $values = array($etid, $record->nid, $record->vid, $record->type, $deltas[$record->vid], $record->tid);
+      $values = array($etid, $record->nid, $record->vid, $record->type, LANGUAGE_NONE, $deltas[$record->vid], $record->tid);
       db_insert($revision_table)->fields($columns)->values($values)->execute();
     }
-    $sandbox['last'] = $batch;
+
+    $sandbox['last'] += $batch;
+
+    // If there were no rows returned, we're finished with the current vocab.
+    // Advance vid counter and reset batch counter or fall through and finish
+    // if there are no vocabularies left.
+    if (!$found) {
+      unset($sandbox['vocabularies'][$vid][$type]);
+      if (empty($sandbox['vocabularies'][$vid])) {
+        unset($sandbox['vocabularies'][$vid]);
+      }
+      if (!empty($sandbox['vocabularies'])) {
+        $found = TRUE;
+        $sandbox['last'] = 0;
+      }
+    }
   }
   if (!$found) {
-   db_drop_table('taxonomy_term_node');
+    db_drop_table('taxonomy_vocabulary_node_type');
+    db_drop_table('taxonomy_term_node');
+    // If there are no vocabs, we're done.
+    $sandbox['#finished'] = TRUE;
+  }
+  else {
+    $sandbox['#finished'] = FALSE;
   }
 }
 
