--- C:/Documents and Settings/jrivero/Escritorio/oai/oai_pmh.module Fri Jun 25 17:42:44 2010 +++ C:/wamp/www/drupal_timbo/sites/default/modules/oai/oai_pmh.module Mon Nov 01 12:49:22 2010 @@ -182,7 +182,19 @@ '#description' => t("The time of the last item fetched by the OAI-PMH module. Change only if you know what you're doing."), '#required' => FALSE, ); - + #'oai_pmh_'. $repo .'_last_fetch' + if(module_exists("biblio")) + $disabled=FALSE; + else + $disabled=TRUE; + $form[$repo]['oai_pmh_'. $repo .'_to_biblio'] = array( + '#type' => 'checkbox', + '#title' => t('Import to Biblio'), + '#default_value' => variable_get('oai_pmh_'. $repo .'_to_biblio', FALSE), + '#description' => t("Import all records to Biblio catalog insted of OAI nodes."), + '#required' => FALSE, + '#disabled' => $disabled, + ); /* $form['oai_pmh_'. $repo .'_coverimage_baseurl'] = array( '#type' => 'textfield', @@ -591,7 +603,6 @@ } $output = array(); - // Get current time before we start fetching. Fetching shouldnt take // long, but in theory there could be a really small window in which // a record could be created in and not fetched, and man that would @@ -618,14 +629,17 @@ // Get the last fetch date, if we cant get the earliest date stamp // and if we cant do that, die. $from = variable_get('oai_pmh_'. $repo .'_last_fetch', - variable_get('oai_pmh_'. $repo .'_earliest_datestamp', FALSE)); + variable_get('oai_pmh_'. $repo .'_earliest_datestamp',FALSE)); + if(!isset($from) || $from=="") + $from = variable_get('oai_pmh_'. $repo .'_earliest_datestamp',FALSE); } - // Define the request limit. $day_span_to_harvest = 60; // TODO: Remove hardcoded option $until = date($date_format, strtotime($from) + ($day_span_to_harvest * 3600 * 24)); - - if (!$from) { + //Validate start and end dates to avoid infinite fetching + if($until>$now) + $until=$now; + if (!$from && $from>=$now) { #die('Unable to get a date to fetch from.'); return; } @@ -652,7 +666,6 @@ } } else {*/ - $output[] = t('Processing @repository, dates @start - @end', array('@repository' => $repo, '@start' => $from, '@end' => $until)); $request = '?verb=ListRecords&from='. _oai_pmh_clean_url($from) . '&until=' . _oai_pmh_clean_url($until) . @@ -716,7 +729,6 @@ } else { $s = simplexml_load_string($xml); - if (!$s) { die('SimpleXML load string error: '. $xml); } @@ -788,9 +800,9 @@ $update = false; $records_new++; } - $dc_node = $item->metadata->children('http://www.openarchives.org/OAI/2.0/oai_dc/')->children('http://purl.org/dc/elements/1.1/'); - + //Save headers + $dc_node_header = $item->header->children(); $node->type = variable_get('oai_pmh_'. $repo .'_node_type',''); $node->title = (string)$dc_node->title; $node->uid = 0; @@ -814,10 +826,10 @@ if ((isset($dc_node->subject)) && ($vid != '-1')) { foreach ($dc_node->subject as $term) { // Remove all numbers - $numbers = array("1", "2", "3", "4", "5", "6", "7", "8", "9", "0"); - $term = str_replace($numbers, '', $term); + //$numbers = array("1", "2", "3", "4", "5", "6", "7", "8", "9", "0"); + //$term = str_replace($numbers, '', $term); // Escape quotes - $term = str_replace('"', '\\"', $term); + /*$term = str_replace('"', '\\"', $term); //Here strip all the tags that the user would like to remove $term = str_replace($strip_tags, '', $term); @@ -836,7 +848,8 @@ $term = str_replace('::', '","', $term); $term = str_replace('--', '","', $term); $term = str_replace('-', '","', $term); - $term = str_replace(':', '","', $term); + $term = str_replace(':', '","', $term);*/ + $term = str_replace('DoajSubjectTerm: ', '', $term); $term = trim($term); if ($term != "") { $term = '"'. $term . '"'; @@ -866,8 +879,8 @@ $vid = variable_get('oai_pmh_'. $repo .'_creator',''); $term=""; $tags_this_vid=""; - if ((isset($dc_node->creator))&&($vid!='-1')) { - foreach ($dc_node->creator as $term) { + if ((isset($dc_node->publisher))&&($vid!='-1')) { + foreach ($dc_node->publisher as $term) { //Here strip all the tags that the user would like to remove for ($i = 0, $size = sizeof($strip_tags); $i < $size; ++$i) { $term = str_replace($strip_tags[$i], '', $term); @@ -898,14 +911,15 @@ $vid = variable_get('oai_pmh_'. $repo .'_date',''); $term = ""; $tags_this_vid = ""; - $last_item_date = str_replace(array('', ''), '', $dc_node->date->asXML()); + //Save the datestamp of the header of the request + $last_item_date = str_replace(array('', ''), '', $dc_node_header->datestamp->asXML()); if ((isset($dc_node->date))&&($vid!='-1')) { foreach ($dc_node->date as $term) { //Here strip all the tags that the user would like to remove for ($i = 0, $size = sizeof($strip_tags); $i < $size; ++$i) { $term = str_replace($strip_tags[$i], '', $term); } - $term = oai_pmh_extract_date($term); + //$term = oai_pmh_extract_date($term); $term = '"'. $term. '"'; $tags_this_vid = $tags_this_vid . ($tags_this_vid ? "," : "") . $term; @@ -940,6 +954,10 @@ ); } else { + if(variable_get('oai_pmh_'. $repo .'_to_biblio', FALSE)==TRUE) + oai_pmh_biblio_save($dc_node); + else + { $node = node_submit($node); node_save($node); $result = db_query( @@ -956,6 +974,7 @@ ); } } + } }//endif // if a resumption token is set, and it is non-null. Requests with // resumptionTokens come back with an empty self closing tag @@ -1044,7 +1063,7 @@ //$interval = variable_get('oai_pmh_'. $repo .'_interval', 21600); //} //else if ($gran == 'days') { - $interval = 432000; + //$interval = 432000; $interval = 0; //$interval = 240; //} @@ -1359,4 +1378,56 @@ $stats['last_updated_date'] = $data->min; return $stats; +} + +/** + * Save the imported record as a biblio node + */ +function oai_pmh_biblio_save($node) { + //Include biblio requiered library + module_load_include('inc', 'biblio', 'biblio.import.export'); + $biblio_node = array(); + $biblio_node['title']=(string)$node->title[0]; + //Set OAI subject terms as keywords removing LCC code and DOAJ tag + foreach($node->subject as $term) { + $term=(string)$term; + $pos=strpos($term,"LCC"); + if(!is_numeric($pos)) + { + $term=str_replace("DoajSubjectTerm: ","",$term); + $biblio_node['biblio_keywords'][]=$term; + } + } + //Use identifiers in header to set URL and ISSN + foreach ($node->identifier as $term) { + $term=(string)$term; + if(strpos($term,"//")) + $biblio_node['biblio_url']=$term; + elseif(strpos($term,"issn")) + $biblio_node['biblio_issn'] = str_replace('issn: ', '', $term);; + } + $biblio_node['biblio_publisher']=(string)$node->publisher; + //TODO: better type import, may not map to any RIS or biblio types + $biblio_node['biblio_type']=oai_pmh_type_map((string)$node->type); + //Add the author in pos 1 + $biblio_node['biblio_contributors'][1][] = array('name' => (string)$node->creator,'auth_type' => _biblio_get_auth_type(1, (string)$node['biblio_type'])); + //Contributors go after the Author in the array + foreach($node->contributor as $term) { + $biblio_node['biblio_contributors'][]=array('name' => (string)$term,'auth_type' =>0); + } + $biblio_node['biblio_abst_e']=(string)$node->description; + $biblio_node['biblio_year'] = (string)$node->date; + $nids[] = biblio_save_node($biblio_node); +} + +/** +*Maps biblio types to RIS format (from ris_parser.inc of biblio module) +*/ +function oai_pmh_type_map($type) { + static $map = array(); + if (empty($map)) { + module_load_include('inc', 'biblio', 'biblio.type.mapper'); + $map = biblio_get_type_map('ris'); + } + return (isset($map[$type]))?$map[$type]:1001; //return the biblio type or 1001 Journal( must add and map to RIS Journal in biblio ) if type not found } \ No newline at end of file