--- C:/Documents and Settings/jrivero/Escritorio/oai/oai_pmh.module Fri Jun 25 17:42:44 2010
+++ C:/wamp/www/drupal_timbo/sites/default/modules/oai/oai_pmh.module Mon Nov 01 12:49:22 2010
@@ -182,7 +182,19 @@
'#description' => t("The time of the last item fetched by the OAI-PMH module. Change only if you know what you're doing."),
'#required' => FALSE,
);
-
+ #'oai_pmh_'. $repo .'_last_fetch'
+ if(module_exists("biblio"))
+ $disabled=FALSE;
+ else
+ $disabled=TRUE;
+ $form[$repo]['oai_pmh_'. $repo .'_to_biblio'] = array(
+ '#type' => 'checkbox',
+ '#title' => t('Import to Biblio'),
+ '#default_value' => variable_get('oai_pmh_'. $repo .'_to_biblio', FALSE),
+ '#description' => t("Import all records to Biblio catalog insted of OAI nodes."),
+ '#required' => FALSE,
+ '#disabled' => $disabled,
+ );
/*
$form['oai_pmh_'. $repo .'_coverimage_baseurl'] = array(
'#type' => 'textfield',
@@ -591,7 +603,6 @@
}
$output = array();
-
// Get current time before we start fetching. Fetching shouldnt take
// long, but in theory there could be a really small window in which
// a record could be created in and not fetched, and man that would
@@ -618,14 +629,17 @@
// Get the last fetch date, if we cant get the earliest date stamp
// and if we cant do that, die.
$from = variable_get('oai_pmh_'. $repo .'_last_fetch',
- variable_get('oai_pmh_'. $repo .'_earliest_datestamp', FALSE));
+ variable_get('oai_pmh_'. $repo .'_earliest_datestamp',FALSE));
+ if(!isset($from) || $from=="")
+ $from = variable_get('oai_pmh_'. $repo .'_earliest_datestamp',FALSE);
}
-
// Define the request limit.
$day_span_to_harvest = 60; // TODO: Remove hardcoded option
$until = date($date_format, strtotime($from) + ($day_span_to_harvest * 3600 * 24));
-
- if (!$from) {
+ //Validate start and end dates to avoid infinite fetching
+ if($until>$now)
+ $until=$now;
+ if (!$from && $from>=$now) {
#die('Unable to get a date to fetch from.');
return;
}
@@ -652,7 +666,6 @@
}
}
else {*/
-
$output[] = t('Processing @repository, dates @start - @end', array('@repository' => $repo, '@start' => $from, '@end' => $until));
$request = '?verb=ListRecords&from='. _oai_pmh_clean_url($from) .
'&until=' . _oai_pmh_clean_url($until) .
@@ -716,7 +729,6 @@
}
else {
$s = simplexml_load_string($xml);
-
if (!$s) {
die('SimpleXML load string error: '. $xml);
}
@@ -788,9 +800,9 @@
$update = false;
$records_new++;
}
-
$dc_node = $item->metadata->children('http://www.openarchives.org/OAI/2.0/oai_dc/')->children('http://purl.org/dc/elements/1.1/');
-
+ //Save headers
+ $dc_node_header = $item->header->children();
$node->type = variable_get('oai_pmh_'. $repo .'_node_type','');
$node->title = (string)$dc_node->title;
$node->uid = 0;
@@ -814,10 +826,10 @@
if ((isset($dc_node->subject)) && ($vid != '-1')) {
foreach ($dc_node->subject as $term) {
// Remove all numbers
- $numbers = array("1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
- $term = str_replace($numbers, '', $term);
+ //$numbers = array("1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
+ //$term = str_replace($numbers, '', $term);
// Escape quotes
- $term = str_replace('"', '\\"', $term);
+ /*$term = str_replace('"', '\\"', $term);
//Here strip all the tags that the user would like to remove
$term = str_replace($strip_tags, '', $term);
@@ -836,7 +848,8 @@
$term = str_replace('::', '","', $term);
$term = str_replace('--', '","', $term);
$term = str_replace('-', '","', $term);
- $term = str_replace(':', '","', $term);
+ $term = str_replace(':', '","', $term);*/
+ $term = str_replace('DoajSubjectTerm: ', '', $term);
$term = trim($term);
if ($term != "") {
$term = '"'. $term . '"';
@@ -866,8 +879,8 @@
$vid = variable_get('oai_pmh_'. $repo .'_creator','');
$term="";
$tags_this_vid="";
- if ((isset($dc_node->creator))&&($vid!='-1')) {
- foreach ($dc_node->creator as $term) {
+ if ((isset($dc_node->publisher))&&($vid!='-1')) {
+ foreach ($dc_node->publisher as $term) {
//Here strip all the tags that the user would like to remove
for ($i = 0, $size = sizeof($strip_tags); $i < $size; ++$i) {
$term = str_replace($strip_tags[$i], '', $term);
@@ -898,14 +911,15 @@
$vid = variable_get('oai_pmh_'. $repo .'_date','');
$term = "";
$tags_this_vid = "";
- $last_item_date = str_replace(array('', ''), '', $dc_node->date->asXML());
+ //Save the datestamp of the header of the request
+ $last_item_date = str_replace(array('', ''), '', $dc_node_header->datestamp->asXML());
if ((isset($dc_node->date))&&($vid!='-1')) {
foreach ($dc_node->date as $term) {
//Here strip all the tags that the user would like to remove
for ($i = 0, $size = sizeof($strip_tags); $i < $size; ++$i) {
$term = str_replace($strip_tags[$i], '', $term);
}
- $term = oai_pmh_extract_date($term);
+ //$term = oai_pmh_extract_date($term);
$term = '"'. $term. '"';
$tags_this_vid = $tags_this_vid . ($tags_this_vid ? "," : "") . $term;
@@ -940,6 +954,10 @@
);
}
else {
+ if(variable_get('oai_pmh_'. $repo .'_to_biblio', FALSE)==TRUE)
+ oai_pmh_biblio_save($dc_node);
+ else
+ {
$node = node_submit($node);
node_save($node);
$result = db_query(
@@ -956,6 +974,7 @@
);
}
}
+ }
}//endif
// if a resumption token is set, and it is non-null. Requests with
// resumptionTokens come back with an empty self closing tag
@@ -1044,7 +1063,7 @@
//$interval = variable_get('oai_pmh_'. $repo .'_interval', 21600);
//}
//else if ($gran == 'days') {
- $interval = 432000;
+ //$interval = 432000;
$interval = 0;
//$interval = 240;
//}
@@ -1359,4 +1378,56 @@
$stats['last_updated_date'] = $data->min;
return $stats;
+}
+
+/**
+ * Save the imported record as a biblio node
+ */
+function oai_pmh_biblio_save($node) {
+ //Include biblio requiered library
+ module_load_include('inc', 'biblio', 'biblio.import.export');
+ $biblio_node = array();
+ $biblio_node['title']=(string)$node->title[0];
+ //Set OAI subject terms as keywords removing LCC code and DOAJ tag
+ foreach($node->subject as $term) {
+ $term=(string)$term;
+ $pos=strpos($term,"LCC");
+ if(!is_numeric($pos))
+ {
+ $term=str_replace("DoajSubjectTerm: ","",$term);
+ $biblio_node['biblio_keywords'][]=$term;
+ }
+ }
+ //Use identifiers in header to set URL and ISSN
+ foreach ($node->identifier as $term) {
+ $term=(string)$term;
+ if(strpos($term,"//"))
+ $biblio_node['biblio_url']=$term;
+ elseif(strpos($term,"issn"))
+ $biblio_node['biblio_issn'] = str_replace('issn: ', '', $term);;
+ }
+ $biblio_node['biblio_publisher']=(string)$node->publisher;
+ //TODO: better type import, may not map to any RIS or biblio types
+ $biblio_node['biblio_type']=oai_pmh_type_map((string)$node->type);
+ //Add the author in pos 1
+ $biblio_node['biblio_contributors'][1][] = array('name' => (string)$node->creator,'auth_type' => _biblio_get_auth_type(1, (string)$node['biblio_type']));
+ //Contributors go after the Author in the array
+ foreach($node->contributor as $term) {
+ $biblio_node['biblio_contributors'][]=array('name' => (string)$term,'auth_type' =>0);
+ }
+ $biblio_node['biblio_abst_e']=(string)$node->description;
+ $biblio_node['biblio_year'] = (string)$node->date;
+ $nids[] = biblio_save_node($biblio_node);
+}
+
+/**
+*Maps biblio types to RIS format (from ris_parser.inc of biblio module)
+*/
+function oai_pmh_type_map($type) {
+ static $map = array();
+ if (empty($map)) {
+ module_load_include('inc', 'biblio', 'biblio.type.mapper');
+ $map = biblio_get_type_map('ris');
+ }
+ return (isset($map[$type]))?$map[$type]:1001; //return the biblio type or 1001 Journal( must add and map to RIS Journal in biblio ) if type not found
}
\ No newline at end of file