Index: skos_format.inc
===================================================================
RCS file: skos_format.inc
diff -N skos_format.inc
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ skos_format.inc	26 Aug 2009 18:55:00 -0000
@@ -0,0 +1,701 @@
+<?php
+/* $Id: rdf_format.inc,v 1.1.4.5 2008/09/28 15:02:10 dman Exp $ */
+/**
+ * 2009 addition to handle SKOS by Remzi Celebi on behalf of DFKI.de <remzi.celebi@dfki.de>
+ * Adapted version of rdf_format.inc 
+ * @file Include routines for RDF parsing and taxonomy/term creation.
+ */
+
+define('TAXONOMY_XML_RDF_NS', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
+define('TAXONOMY_XML_TYPE', TAXONOMY_XML_RDF_NS .'type');
+define('TAXONOMY_XML_UNTYPED', 'UNTYPED');
+
+define('TAXONOMY_XML_RDFS_NS', 'http://www.w3.org/2000/01/rdf-schema#');
+
+// See  http://www.w3.org/2004/12/q/doc/rdf-labels.html
+define('TAXONOMY_XML_CONTENTLABEL_NS', 'http://www.w3.org/2004/12/q/contentlabel#');
+define('TAXONOMY_XML_CATEGORY', TAXONOMY_XML_CONTENTLABEL_NS .'Category');
+
+// OWL - Web Ontology Language - Formalized Meaning and Logic
+define('TAXONOMY_XML_OWL_NS', 'http://www.w3.org/2002/07/owl#');
+
+define('TAXONOMY_XML_W3C_WN', 'http://www.w3.org/2006/03/wn/wn20/');
+define('TAXONOMY_XML_W3C_WN_SCHEMA', TAXONOMY_XML_W3C_WN .'schema/');
+
+// Dublin Core - Metadata standards
+define('TAXONOMY_XML_DC_NS', 'http://purl.org/dc/elements/1.1/');
+// Simple Knowledge Organization System - Structural information management 
+define('TAXONOMY_XML_SKOS_NS', 'http://www.w3.org/2004/02/skos/core#');
+// Taxonomic Database Working Group - Biodiversity Information Standards (LSIDs etc)
+define('TAXONOMY_XML_TDWG_NS', 'http://rs.tdwg.org/ontology/voc/Collection#');
+
+/**
+ * Read in SKOS/RDF taxonomies and vocabularies. Create vocabs and terms as needed.
+ * 
+ * See formats.html readme for information about the SKOS/RDF input supported.
+ * 
+ * Targets include :
+ *   SKOS      Simple Knowledge Organization System  http://www.w3.org/2004/02/skos/
+ *   ICRA      Content Rating  http://www.icra.org/vocabulary/ 
+ *   WordNet   Lexicon http: //wordnet.princeton.edu/ 
+ *   SUMO   http://www. ontologyportal.org/
+ * 
+ * ... and the ontologies found at http://www.schemaweb.info/ that implement
+ * appropriate parts of the RDF Schema "rdfs" (eg Classes with subclassOf)
+ */
+function taxonomy_xml_skos_parse(&$data, $vid, $url = NULL) {
+
+  // Use ARC parser
+  include_once("arc/ARC_rdfxml_parser.php"); 
+  $parser_args=array(
+    "bnode_prefix" => "genid",
+    "base" => "",
+  );
+  $parser = new ARC_rdfxml_parser($parser_args);
+  $triples = $parser->parse_data($data);
+  if (! is_array($triples)) {
+    drupal_set_message(t("Problem parsing input %message", array('%message' => $triples)), 'error');
+    return;
+  }
+
+  drupal_set_message(t("%count data triples (atomic statements) found in the source RDF doc", array('%count' => count($triples))));
+  # dpm($triples);
+
+  // The RDF input may come in several flavours, 
+  // Resources of the following 'types' may be cast into taxonomy terms for our purposes.
+  // That is, an rdf:Class is a Drupal:term
+  //
+  // Add to this list as needed
+  //
+  $term_types = array(
+   TAXONOMY_XML_RDF_NS .'Property',
+   TAXONOMY_XML_DC_NS .'subject',
+   TAXONOMY_XML_RDFS_NS .'Class',
+   TAXONOMY_XML_W3C_WN_SCHEMA .'Word',
+   TAXONOMY_XML_W3C_WN_SCHEMA .'NounWordSense',
+   TAXONOMY_XML_W3C_WN_SCHEMA .'NounSynset',
+   TAXONOMY_XML_CONTENTLABEL_NS .'Category',
+   TAXONOMY_XML_SKOS_NS .'Concept', 
+   'urn:lsid:ubio.org:classificationbank',
+  );
+
+  // A Drupal 'vocabulary' is represented by an owl:Ontology
+  // or other similar shaped constructs
+  $vocabulary_types = array(
+    TAXONOMY_XML_OWL_NS .'Ontology',
+    TAXONOMY_XML_RDF_NS .'Description',
+    'http://www.w3.org/2001/12/Glossary',
+    TAXONOMY_XML_TDWG_NS . 'Collection',
+    TAXONOMY_XML_SKOS_NS .'ConceptScheme'
+  );
+
+  $resources_by_type = taxonomy_xml_convert_triples_to_sorted_objects_skos($triples);
+  foreach ($resources_by_type as $uri => $res_by_type) {
+    if(isset($res_by_type))
+    drupal_set_message(t("Resource %count resources in type of %type", array('%count' => count($res_by_type),'%type' => $uri)));
+  }
+
+  # dpm($resources_by_type);
+
+  // The resources are all initialized as data objects.
+  // Resource types we expect to be dealing with are just vocabs and terms. 
+  drupal_set_message(t("Found %count different <strong>kinds</strong> of resources in the input : %types", array('%count' => count($resources_by_type), '%types' => join(', ', array_keys($resources_by_type)))));
+  #dpm($resources_by_type);
+  
+  if ($vid == 0) {
+    // We've been asked to use the vocab described in the source file.
+    // If the vid has already been set, we ignore vocab definitions found in the file
+    
+    // Scan the sorted objects for vocabulary definitions
+    // Hopefully there's only one vocab per file, but loop anyway
+    $vocabularies = array();
+    foreach ($vocabulary_types as $vocabulary_type) {
+      if (isset($resources_by_type[$vocabulary_type]) && is_array($resources_by_type[$vocabulary_type])) {
+        foreach ($resources_by_type[$vocabulary_type] as $uri => &$vocabulary_handle) {
+          $vocabularies[$uri] = &$vocabulary_handle;
+        }
+      }
+    }
+    drupal_set_message(t("Found %count resources to be used as vocabulary definitions", array('%count' => count($vocabularies))));
+
+    if(! $vocabularies) {
+      // Create a placeholder.
+      $vocabularies[] = array('name' => 'Imported Vocabulary');
+    }
+    $vid = taxonomy_xml_absorb_vocabulary_definitions($vocabularies);
+    // $vocabularies now contains a keyed array of target vocabularies the terms may be put into
+    // $vid is the default one (most common is one vocab per input file) to be used unless otherwise defined per-term.
+  }
+  else {
+  // Else using a form-selected vocob.
+    $vocabularies[$vid] = taxonomy_vocabulary_load($vid);
+  }
+
+#dpm(array('vocabs are' => $vocabularies));  
+
+  // Gather the resources that will become terms.
+  // Slightly long way (not using array_merge), as I need to merge indexed and by reference
+  $terms = array();
+  foreach ($term_types as $term_type) {
+    if (isset($resources_by_type[$term_type]) && is_array($resources_by_type[$term_type])) {
+      foreach ($resources_by_type[$term_type] as $uri => &$term_handle) {
+        // Grab name/label early for debugging and indexing
+        //   drupal_set_message(t("Found %type  and %res resources to be used as vocabulary definitions", array('%type' => $term_type,'%res' => $resources_by_type[$term_type] )));
+        $predicates = $term_handle->predicates;
+        if(isset($predicates['label'])) {
+          $term_handle->name = $predicates['label'][0];
+        }
+        $terms[$uri] = &$term_handle;
+      }
+    }
+  }
+
+  // Some of the RDF documents I've been fed DO NOT DEFINE A TYPE for their primary subject.
+  // Neither 
+  // http://www.ubio.org/authority/metadata.php nor 
+  // http://biocol.org/ nor 
+  // http://lsid.tdwg.org/
+  // return RDF that says WHAT the data is. Those that use LSIDs have a type encoded in the Identifier itself :-/
+  
+  // I end up with a collection of data but no idea what it's really talking about.
+  // But IF an entity is rdf:about="THIS URL" then we will take a leap and assume that is our target lump of data.
+  // ... this worked for biocol input
+  foreach( (array)$resources_by_type[TAXONOMY_XML_UNTYPED] as $identifier => $untyped_lump) {
+    if ($identifier == $url) {
+      // Looks like this was the specific thing we were looking for
+      drupal_set_message(t("Untyped Found %type  resources to be used as vocabulary definitions", array('%type' => $untyped_lump )));
+      $terms[$identifier] = $untyped_lump;
+      # dpm(array("The default 'HERE' entity is " => $untyped_lump));
+    }
+  } 
+  
+  drupal_set_message(t("Found %count resources to be imported as terms into vocabulary %vid", array('%count' => count($terms), '%vid' => $vid)));
+
+  // $predicate_synonyms is a translation array to match rdf-speak with Drupal concepts
+  $predicate_synonyms = taxonomy_xml_relationship_synonyms();
+
+  //
+  // START MAKING TERMS
+  //
+  foreach ($terms as $identifier => &$term) {
+    drupal_set_message(t("Reviewing term %uri '%name' and analyzing its properties", array('%uri' => $uri, '%name' => $term->name)));
+
+    if (!isset($term->vid)) { 
+      // This is just a default fallback. Imported terms should really have already chosen their vid.
+      $term->vid = $vid; 
+    }
+    if (!isset($term->identifier)) { 
+      $term->identifier = $identifier; 
+    }
+
+    #dpm($term);
+
+    // Build term from data
+    
+    // Convert all input predicates into attributes on the object 
+    // the taxonomy.module will understand
+    taxonomy_xml_canonicize_predicates(&$term) ;
+
+    // Ensure name is valid    
+    if (! $term->name) {
+      
+      // Look, if we don't even have a name, creating a term is a waste of time.
+      // RDF feeds commonly consist of a bunch of pointers, we can't invent placeholders until we know a little more.
+      // Let's not do this.
+      drupal_set_message(t("Not enough information yet to create a term referred to as %identifier. Not creating it yet.", array('%identifier' => $identifier)));
+      continue;
+      /*
+      // Fallback to a name, identifier derived (roughly) from the URI identifier - not always meaningful, but all we have in some contexts.
+      $term->name = basename($identifier);
+      drupal_set_message(t("Problem, we were unable to find a specific label for the term referred to as %identifier. Guessing that %name will be good enough.", array('%identifier' => $identifier, '%name' => $term->name)));
+      */
+    }
+    # dpm(array('data to merge' => $term));
+
+    // See if a definition already exists in the DB. Build on that.
+    $existing_term = _taxonomy_xml_get_term_placeholder($term->name, $vid);
+    // Merge the old term objects properties into this one. Really just want its tid, but there may be more info I should not lose.
+    // New input takes precedence over older data
+    foreach ((array)$existing_term as $key => $value) { 
+      if (! isset($term->$key)) {
+        $term->$key = $value; 
+      }
+    }
+    // The term object is now as tidy as it can be as a self-contained entity.
+    # dpm($term);    
+
+    if (variable_get('taxonomy_xml_reuseids', FALSE)) {
+      // MAINTAIN IDS
+      // Because this is likely to be used with a site-cloning set-up, it would help if we tried to match IDs
+      // OTOH, doing so could be very messy for other situations.
+      // So, 
+      //  iff there is no pre-existing term with this id, 
+      //  create this one as a clone with the old ID. 
+      // This requires a little DB sneakiness.
+      if ($term->internal_id && ! taxonomy_get_term($term->internal_id)) {
+        $term->tid = $term->internal_id;
+        drupal_set_message(t("Doing sneaky import of %term_name re-using the internal id = %term_id", array('%term_name' => $term->name, '%term_id' => $term->internal_id)));
+        db_query("INSERT INTO {term_data} (tid, name, description, vid) VALUES (%d, '%s', '%s', %d, %d)", $term->tid, $term->name, $term->description, $term->vid);
+  
+        # sequences is gone in D6. Will inserting beyond the auto-increment self-correct?
+        $current_id = db_last_insert_id('term_data', 'tid');
+        if ($current_id < $term->tid) {
+          // This is probably now MYSQL specific.
+          db_query("ALTER TABLE {term_data} AUTO_INCREMENT = %d;", $term->tid);
+        }
+        
+      }
+    }
+
+    # Here's where last-minute data storage done by other modules gets set up
+    module_invoke_all('taxonomy_term_presave', $term);
+
+    #dpm(array("ready to save" => $term));
+    $save_term = (array)$term;
+    $status = taxonomy_save_term($save_term);
+
+    // Re-retrieve the new term definition, just in case anything extra happened to it during processing
+    $new_term = taxonomy_xml_get_term_by_name_from_vocab($term->name, $term->vid);
+    if (! $new_term) {
+      drupal_set_message(t("It seems like we failed to create and retrieve a term called %term_name", array('%term_name' => $term->name)), 'error');
+    }
+    // Merge retrieved values back over our main definition so the handles are up-to-date
+    foreach ((array)$new_term as $key => $value) { 
+      $term->$key = $value; 
+    }
+
+    if ( $status == SAVED_NEW ) {
+      // Just remember this is fresh - for useful feedback messages.
+      $term->taxonomy_xml_new_term = TRUE;
+    }
+
+    // It's possible that not all the referenced items were available in the current document/loop
+    // Add referred items to the import queue for later processing
+    taxonomy_xml_add_all_children_to_queue($term);
+    taxonomy_xml_add_all_parents_to_queue($term);
+    $term->taxonomy_xml_presaved = TRUE; // A flag to avoid double-processing
+  } // end term-construction loop;
+
+  #dpm(array("Saved all, now linking!" => $terms));
+  // Now the terms are all happily created, create their relationships
+  // Couldn't do so until they had all been given tids.
+  taxonomy_xml_set_term_relations($terms);
+
+  #dpm(array('After re-linking, we now have all terms set' => $terms));
+
+  return $terms;
+}
+
+/**
+ * Compile triple statements into information objects again.
+ * 
+ * Returns a nested array, Indexed on their URI/id, and grouped by type
+ * (references so we can change them).
+ * 
+ * Not all RDF data objects declare exactly what they are, some just announce
+ * that they exist.
+ * Some guesswork is done if their identifier is an LSID - we can deduce
+ * what type of object it refers to. An explicit RDF:type will take priority
+ * over this assumption.
+ */
+function taxonomy_xml_convert_triples_to_sorted_objects_skos(&$triples) {  
+  // Triples are boringly granular bits of information.
+  // Merge them.
+  $resources = array();
+  $resources_by_type = array();
+  $count = 0;
+  foreach ($triples as $triplenum => $statement) {
+    
+//    $sub = $statement['s'];
+//    $obj = $statement['o'];
+//     drupal_set_message(t("----------------------- " ));     
+//   foreach ($sub as $subname => $subval){
+//         drupal_set_message(t("Triple: Subject uri %subjuri and val %subjtype,   ", array('%subjuri'=>$subname,'%subjtype'=>$subval) ));     
+//    }
+//    
+//   foreach ($obj as $objname => $objval){
+//       drupal_set_message(t("Triple: Object uri %objname and val %objval,   ", array('%objname'=>$objname,'%objval'=>$objval) ));     
+//  }
+
+     // look subject type and get value of the specified field 
+     // if type is 'uri' get the value of 'uri' or type is 'bnode' then get 'bnode_id'
+     // by Remzi Celebi
+      switch ($statement['s']['type']) {
+      
+        case 'uri' :
+           $subject_val = $statement['s']['uri'];
+            break;
+        case 'bnode':
+          $subject_val = trim($statement['s']['bnode_id']);
+          break;
+        default :
+          $subject_val = trim($statement['s']['val']);
+    
+      }
+      
+    if (! isset($resources[$subject_val]) ) {
+            $resources[$subject_val] = (object)array(); 
+     }
+    $subject = &$resources[$subject_val];
+      
+    # dpm(array("Processing a statement about $subject_uri" => $statement));
+      
+    switch ($statement['o']['type']) {
+      
+      case 'uri' :
+        $object_uri = $statement['o']['uri'];
+
+        // Also make a placeholder for the object, for convenience
+        // It's not much fun referring to something that doesn't exist.
+        if (! isset($resources[$object_uri]) ) {
+          $resources[$object_uri] = (object)array(); 
+        }
+       
+
+        $object_val = $object_uri;
+      break;
+      default :
+        $object_val = trim($statement['o']['val']);
+        
+    }
+   
+    // Placeholders ready, now add this statements info
+    // Namespaces are boring, Simplify the predicates
+    // TODO - revisit if namespaces are needed
+    $predicate = taxonomy_xml_rdf_shortname_skos($statement['p']);
+    
+    if (! isset($subject->predicates[$predicate]) ) {
+      $subject->predicates[$predicate] = array();
+    }
+
+    // Some properties can be collated, listed 
+    // Some need to be merged or selected (languages)
+    // In this stage of pre-processing, we cannot select which string we need, so gather all values
+    if( $statement['o']['type'] == 'bnode'){
+      if(isset($statement['o']['bnode_id'] ))
+            $subject->predicates[$predicate][$statement['o']['bnode_id']] = $statement['o']['bnode_id'];
+    }
+    else if ($statement['o']['type'] == 'literal' ) {
+        if(!isset($subject->predicates[$predicate][$object_val] ))
+            $subject->predicates[$predicate][$object_val] = $object_val;
+    }
+    else {
+      // Only add uniques, Keeps clutter down
+      if (! in_array($object_val, $subject->predicates[$predicate])) {
+        $subject->predicates[$predicate][] = $object_val;
+      }
+    }
+    // drupal_set_message(t("Triple: Subject  type %subjtype and uri %subjuri,  Object type %stat and uri %ouri  and val %val, Predicate: %pred ", array('%subjtype'=>$subject->type, '%subjuri'=>$subject_val, '%stat' => $statement['o']['type'], '%ouri'=> $statement['o']['uri'], '%val' => $statement['o']['val'], '%pred'=> $predicate ) ));
+    
+    if ($predicate == 'type') {
+      // Very important info!
+      $subject->type = $object_val;
+      // Sort it! (by reference)
+      $resources_by_type[$subject->type][$subject_val] = &$subject;
+       //drupal_set_message(t("Subject type %type and uri  %uri ", array('%type' => $subject->type, '%uri' => $subject_uri ) ));
+    }
+    if ($predicate == TAXONOMY_XML_NAME) {
+      $subject->name = $object_val;
+    }
+    
+    // This is very memory-intensive for big vocabs. Try to clean up :(
+    unset($triples[$triplenum]);
+  }
+
+  // Scan the full array for any lost (untyped) data, 
+  // Make some guesses if we can, and collect the rest into a catch-all 'untyped' list.
+  $unknown_resources = array();
+  foreach ($resources as $uri => &$subject) {
+
+    if (! isset($subject->type)) {
+      
+      // A special work-around for irregular data.
+      // This entity didn't declare what TYPE it is.
+      // If the identifier of this resource is an 'LSID'
+      // then the type is sort of embedded in the string as the 'namespace'.
+      // See if we can extract it.
+      
+      if ($lsid = taxonomy_xml_parse_lsid($uri)) {
+        $resources_by_type[$lsid['type']][$uri] = &$subject;
+        
+      }
+      else {
+        // Nope, it's a total UFO
+        $unknown_resources[$uri] = &$subject;
+      }
+    }
+
+    // While we are looping, 
+    // Make a guess at its original, internal ID
+    // grabbing the last numeric bit from the id in the document
+    // eg from '#vocab/1' or '#vocabulary:1' or #term33
+    // Be very generic and forgiving in the format we look for
+    $parts = preg_split('|[^\d]|', $uri);
+    $last_num = array_pop($parts);
+    if (is_numeric($last_num)) {
+      $subject->internal_id = $last_num;
+    }
+    // Not really used Much yet.
+  }
+  if ($unknown_resources) {
+    drupal_set_message(t("Found %count Unsorted (untyped) resources. They are entities that are the subject of a statement, but I don't know what <em>type</em> of thing they are. Not sure what I'll do with these. They are things that have had statements made about them .. that I don't recognise. Probably just extra data found in the input and ignored. %unknown", array('%count' => count($unknown_resources), '%unknown' => join(', ', array_keys($unknown_resources))) ));
+    $resources_by_type[TAXONOMY_XML_UNTYPED] = $unknown_resources;
+  }
+  
+  return $resources_by_type;
+}
+
+/**
+ * Choose a string from an array of language-tagged possibilities
+ * 
+ * Util func to help read complex RDF statements.
+ */
+function taxonomy_xml_get_literal_string($values) {
+  if (! is_array($values)) return trim($values);
+  // May need to choose language
+  if (count($values) == 1) {
+    $out = array_pop($values);
+  }
+  else {
+    // TODO add language selector
+    if ($label = $values['en']) {
+      $out = $label;
+    }
+    else { // fine, whatever
+      $out = array_pop($values);
+    }
+  }
+  return trim($out);
+}
+
+/**
+ * Return the shorthand label of a potentially long RDF URI
+ * 
+ * EG, for http://www.w3.org/1999/02/22-rdf-syntax-ns#Property
+ * return 'Property'
+ * ... for sanity
+ * 
+ * Also flatten LSIDs - which are used like URIs but just are NOT as useful
+ * 
+ */
+function taxonomy_xml_rdf_shortname_skos($uri) {
+
+  // For LSID simplification, flatten assorted RDF-LSID-Predicates (from any authority) into their simple name
+  if (($lsid = taxonomy_xml_parse_lsid($uri)) && ($lsid['namespace'] == 'predicates') ) {
+    return $lsid['identifier'];
+  }
+    
+  $parts = parse_url($uri);
+  $shortname =$parts['fragment'] ? $parts['fragment'] : (
+    $parts['query'] ? $parts['query'] : (
+      basename($parts['path']) 
+      ));
+  // The proper method for guessing simple names is probably documented elsewhere.
+  // ... this does the trick for now.
+  return $shortname;
+}
+
+/**
+ * Return an XML/RDF document representing this vocab
+ * 
+ * I'd like to use ARC libraries, but it doesn't appear to include an RDF
+ * serializer output method, only an input parser...
+ * 
+ * Uses PHP DOM to create DOM document and nodes.
+ * 
+ * We use namespaces carefully here, although it may create wordy output if the
+ * DOM is not optimizing the declarations for us. Still, best to be explicit, it
+ * would seem.
+ * 
+ * The URI used to refer to other resources is based on the source document
+ * location, eg
+ * http://this.server/taxonomy_xml/{vid}/rdf#{tid}
+ * 
+ * Preamble should look something like:
+ * 
+ * <rdf:RDF xmlns:rdf ="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ *   xmlns: rdfs="http://www.w3.org/2000/01/rdf-schema#"
+ *   xmlns: owl="http://www.w3.org/2002/07/owl#"
+ *
+ */
+function taxonomy_xml_skos_create($vid, $parent = 0, $depth = -1, $max_depth = NULL) {
+  $vocabulary = taxonomy_vocabulary_load($vid);
+
+  $domcontainer = taxonomy_xml_skos_document();
+  $dom = $domcontainer->ownerDocument;
+  #dpm(array(domcontainer => $domcontainer, dom => $dom));
+
+  // define the vocab
+  taxonomy_xml_add_vocab_as_skos($domcontainer, $vocabulary);
+
+  // and more details?
+
+  // Now start adding terms. 
+  // They are listed as siblings, not children of the ontology
+  $tree = module_invoke('taxonomy', 'get_tree', $vid, $parent, $depth, $max_depth);
+  taxonomy_xml_add_terms_as_skos($domcontainer, $tree);
+
+  $result = $dom->savexml();
+  
+  // Minor layout tweak for readability
+  $result = preg_replace('|(<[^<]*/[^>]*>)|', "$1\n", $result);
+  $result = preg_replace('|><|', ">\n<", $result);
+  # dpm($result);
+  return $result;
+}
+
+/**
+ * Set up an SKOS/RDF document preamble.
+ * Returns a document, also sets the passed handle to the RDF node that content
+ * should land in
+ * 
+ */
+
+function taxonomy_xml_skos_document() {
+  $dom = new domdocument('1.0', 'UTF-8');
+
+  $dom->appendchild($dom->createcomment(htmlentities("
+    This file was created by Drupal taxonomy_xml import/export tool. 
+    http://drupal.org/project/taxonomy_xml
+    /* $Id: skos_format.inc,v 1.1.4.6 2009/02/25 15:02:10 dman Exp $ */
+
+    The RDF schema in this file is intended to follow the Working Draft
+    described at http://www.w3.org/TR/wordnet-rdf/ for the notation of
+    thesauri and taxonomies.
+    ")
+  ));
+  $dom->appendchild($dom->createprocessinginstruction('xml-stylesheet', 'href="render-taxonomy-rdf.xsl" type="text/xsl"' ));
+
+  $domcontainer = $dom->createelementns(TAXONOMY_XML_RDF_NS, 'rdf:RDF');
+
+  $domcontainer->setattribute('xmlns:dc', TAXONOMY_XML_DC_NS);
+  $domcontainer->setattribute('xmlns:rdfs', TAXONOMY_XML_RDFS_NS);
+ 
+
+  /* by Remzi Celebi, skos namspace initilization it is a bit hack  */
+  $domcontainer->setattribute( 'xmlns:skos', TAXONOMY_XML_SKOS_NS);
+
+  $dom->appendchild($domcontainer);
+
+  return $domcontainer;
+}
+
+/**
+ * Create a SKOS vocabulary definition (just the def, not its terms) and insert it
+ * into the given document element.
+ * 
+ * @param $domcontainer an XML dom document, modified by ref.
+ * @param $vocabulary a vocab object
+ */
+
+
+function taxonomy_xml_add_vocab_as_skos(&$domcontainer, $vocabulary) {
+  $dom = $domcontainer->ownerDocument;
+
+  // Describe the vocabulary itself
+  $vocabnode = $dom->createelement('skos:ConceptScheme');
+  $domcontainer->appendchild($vocabnode);
+  
+  // If this was a cannonic vocab, we would use a full URI as identifiers
+  //$vocabnode->setattribute('rdf:nodeID', 'schemenode-'. $vocabulary->vid );
+  $vocabnode->setattribute('rdf:about', url('taxonomy_xml/'. $vocabulary->vid .'/skos', array( 'absolute' => TRUE) ) );
+
+  if(isset($vocabulary->name)){
+    $vocabnode->appendchild(
+      $dom->createelementns(TAXONOMY_XML_DC_NS, 'dc:title', htmlentities($vocabulary->name))
+    );
+  }
+
+  if ($vocabulary->description) {
+    $vocabnode->appendchild(
+      $dom->createelementns(TAXONOMY_XML_DC_NS, 'dc:description', htmlentities($vocabulary->description))
+    );
+  }
+
+
+}
+
+
+/**
+ * Given a list of terms, append definitions of them to the passed DOM container
+ * 
+ * Following SKOS examples (tho not any explicit instructions,
+ * taxonomy terms are modelled as skos:Concept objects structured.
+ * 
+ * Sample from Skos:
+ * 
+ *  <skos:Concept rdf:nodeID="450">
+ *     <skos:prefLabel>Countryside</skos:prefLabel>
+ *     <skos:altLabel>Areas of Outstanding Natural Beauty</skos:altLabel>
+ *     <skos:altLabel>Hedgerows</skos:altLabel>
+ *     <skos:altLabel>National parks</skos:altLabel>
+ *     <skos:broader rdf:nodeID="496"/>
+ *     <skos:narrower rdf:nodeID="446"/>
+ *     <skos:related rdf:nodeID="565"/>
+ *     <skos:related rdf:nodeID="1565"/>
+ *     <skos:related rdf:nodeID="865"/>
+ *     <skos:related rdf:nodeID="866"/>
+ *  </skos:Concept>
+ * 
+ * I'm copying that syntax.
+ * 
+ * @param $termlist a FLAT array of all terms, internally cross-referenced to
+ * each other defining the tree stucture
+ */
+
+function taxonomy_xml_add_terms_as_skos(&$domcontainer, $termlist) {
+  if (! $termlist) { return; }
+  $dom = $domcontainer->ownerDocument;
+  
+  foreach ($termlist as $term) {
+    $termnode = $dom->createelement('skos:Concept');
+    $termnode->setattribute( 'rdf:nodeID', 'term-'. $term->tid );
+    $domcontainer->appendchild($termnode);
+
+    $termnode->appendchild(
+      $dom->createelement( 'skos:prefLabel', htmlentities($term->name))
+    );
+    
+    if ($term->description) {
+      $termnode->appendchild(
+        $dom->createelement( 'skos:definition', htmlentities($term->description) )
+      );
+    }
+
+    foreach ((array) taxonomy_get_related($term->tid) as $relatedid => $relatedterm) {
+      $related_node = $dom->createelement( 'skos:related' );
+      $related_node->setattribute( 'rdf:nodeID', 'term-'. $relatedid );
+      $termnode->appendchild($related_node);
+    }
+    
+    $synonyms = taxonomy_get_synonyms($term->tid);
+
+    foreach ((array) $synonyms as $synonymname) {
+      $synonymnode = $dom->createelement( 'skos:altLabel', htmlentities($synonymname) );
+      $termnode->appendchild($synonymnode);
+    }
+      // add parents of the term as broader term
+    $parentlist = taxonomy_get_parents($term->tid);
+    foreach ( $parentlist as $parent) {
+      if (is_object($parent)) {
+        $parent_node = $dom->createelement('skos:broader');
+        $parent_node->setattribute( 'rdf:nodeID', 'term-'. $parent->tid );
+        $termnode->appendchild($parent_node);
+      }
+    }
+    // add children of the term as narrower term
+    $childrenlist = taxonomy_get_children($term->tid);
+    foreach ( $childrenlist as $child) {
+      if (is_object($child)) {
+        $child_node = $dom->createelement('skos:narrower');
+        $child_node->setattribute( 'rdf:nodeID', 'term-'. $child->tid );
+        $termnode->appendchild($child_node);
+      }
+    }
+    
+    // workaround for large vocabs - extend runtime indefinately
+    set_time_limit(10);
+  }
+  // Done all terms in list
+}
+
