Changeset 4124 for exist


Ignore:
Timestamp:
15/08/08 14:27:36 (11 years ago)
Author:
sdonegan
Message:

Updated xquery handles multiple MOLES spatial and temporal elements as well as empty elements.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • exist/trunk/xquery/moles2mdip.xq

    r3841 r4124  
    1 (: Create an MDIP record for moles documents in TargetCollection that have RepositoryID and LocalID :) 
    2 import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery'; 
    3 import module namespace utillib='http://ndg.nerc.ac.uk/xquery/lib/utilities' at 'xmldb:exist:///db/xqueryLib/Utilities/utility_xquery_lib.xquery'; 
     1(: Create an MDIP record for moles documents in /misc/humid1/cbyrom/eclipseWorkspace/ingestAutomation-upgrade2/OAIBatch/data/badc/discovery/moles/ that have badc.nerc.ac.uk and test_file2 :) 
     2import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'vocab_xquery_lib.xquery'; 
     3import module namespace utillib='http://ndg.nerc.ac.uk/xquery/lib/utilities' at 'utility_xquery_lib.xquery'; 
    44 
    55declare default element namespace 'http://www.oceannet.org/mdip/xml'; 
     
    88declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';  
    99 
     10(:declare variable $targetCollection as xs:string := '.';:) 
    1011declare variable $targetCollection as xs:string := 'TargetCollection'; 
    1112declare variable $repositoryIdentifier as xs:string := 'RepositoryID'; 
     
    1516 
    1617for $DE in collection($targetCollection)/moles:dgMetadata/moles:dgMetadataRecord[ 
    17 exists('moles:dgDataEntity')  
    18 and moles:dgMetadataID/moles:schemeIdentifier='NDG-B0'  
    19 and moles:dgMetadataID/moles:repositoryIdentifier=$repositoryIdentifier 
    20 and moles:dgMetadataID/moles:localIdentifier=$localIdentifier] 
     18    exists('moles:dgDataEntity')  
     19    and moles:dgMetadataID/moles:schemeIdentifier='NDG-B0'  
     20    and moles:dgMetadataID/moles:repositoryIdentifier=$repositoryIdentifier 
     21    and moles:dgMetadataID/moles:localIdentifier=$localIdentifier] 
    2122return 
    2223element Metadata { 
    23 element Title {data($DE/moles:name)}, 
     24    attribute xsi:schemaLocation {'http://www.oceannet.org/mdip/xml N:\DATAMA~1\METADA~2\MDIP\MDIP_Metadata_v1_3.xsd'}, (:remove for production :) 
     25    element Title {data($DE/moles:name)}, 
     26 
    2427if (exists($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgDatasetLanguage)) then 
    25 for $lang in $DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgDatasetLanguage 
     28    for $lang in $DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgDatasetLanguage 
    2629return 
    27 element Language{ 
    28 element LanguageName {data($lang/moles:dgValidTerm)}, 
    29 element LanguageVocab {data($lang/moles:dgValidTermID/moles:ParentListID)}, 
    30 element LanguageCode {data($lang/moles:dgValidTermID/moles:TermID)} 
    31 } 
    32  
    33 else (), 
    34  
    35 element Abstract {string($DE/moles:dgMetadataDescription/moles:abstract/moles:abstractText)}, 
    36 for $isoTopic in $DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:iso_topic_list, moles:dgValidTermID/moles:ParentListID)]  
    37 return element TopicCategory { 
    38 element TopicCategoryName {data($isoTopic/moles:dgValidTerm)}, 
    39 element TopicCategoryVocab {data($isoTopic/moles:dgValidTermID/moles:ParentListID)}, 
    40 element TopicCategoryCode {data($isoTopic/moles:dgValidTermID/moles:TermID)} 
     30    element Language{ 
     31        element LanguageName {data($lang/moles:dgValidTerm)}, 
     32        element LanguageVocab {data($lang/moles:dgValidTermID/moles:ParentListID)}, 
     33        element LanguageCode {data($lang/moles:dgValidTermID/moles:TermID)} 
     34} 
     35(: put default in to keep schema happy.. :) 
     36else element Language{ 
     37        element LanguageName {data("English")}, 
     38        element LanguageVocab {data("UKGemini_LanguageCategories")}, 
     39        element LanguageCode {data("eng")} 
    4140}, 
    42 for $subject in ($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgParameterSummary/moles:dgStdParameterMeasured[ 
    43 voclib:spot-vocab($voclib:gcmd_science_valids, moles:dgValidTermID/moles:ParentListID)  
    44 or voclib:spot-vocab($voclib:gcmd_project_valids, moles:dgValidTermID/moles:ParentListID) 
    45 or voclib:spot-vocab($voclib:gcmd_science_valids_categories, moles:dgValidTermID/moles:ParentListID) 
    46 or voclib:spot-vocab($voclib:bodc_parameter_usage_vocab, moles:dgValidTermID/moles:ParentListID) 
    47 or voclib:spot-vocab($voclib:bodc_parameter_discovery_vocab, moles:dgValidTermID/moles:ParentListID) 
    48 ])  
    49 return (element Subject { 
    50 element SubjectName {data($subject/moles:dgValidTerm)}, 
    51 element SubjectVocab {data($subject/moles:dgValidTermID/moles:ParentListID)}, 
    52 element SubjectCode {data($subject/moles:dgValidTermID/moles:TermID)} 
     41 
     42(:check for abstract text:) 
     43if (string-length($DE/moles:dgMetadataDescription/moles:abstract/moles:abstractText)  < 16)  then  
     44    element Abstract {data("There was either no or insufficient info in MOLES abstract to produce an MDIP abstract!")} 
     45else 
     46    element Abstract {string($DE/moles:dgMetadataDescription/moles:abstract/moles:abstractText)}, 
     47 
     48(:check for topicCategory stuff in moles :) 
     49if (count($DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:iso_topic_list, moles:dgValidTermID/moles:ParentListID)] ) < 1 ) then 
     50    (: put padding in to produce valid mdip :) 
     51     element TopicCategory { 
     52        element TopicCategoryName {data("geoscientificInformation")}, 
     53        element TopicCategoryVocab {data("ISO_19115:2003_MD_TopicCategoryCodes")}, 
     54        element TopicCategoryCode {data("001")} 
     55    } 
     56else 
     57    for $isoTopic in $DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:iso_topic_list, moles:dgValidTermID/moles:ParentListID)]  
     58    return element TopicCategory { 
     59        element TopicCategoryName {data($isoTopic/moles:dgValidTerm)}, 
     60        element TopicCategoryVocab {data($isoTopic/moles:dgValidTermID/moles:ParentListID)}, 
     61        element TopicCategoryCode {data($isoTopic/moles:dgValidTermID/moles:TermID)} 
     62    }, 
     63     
     64(: Subject :) 
     65    (: standard parameter terms :) 
     66if (count($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgParameterSummary/moles:dgStdParameterMeasured[ 
     67    voclib:spot-vocab($voclib:gcmd_science_valids, moles:dgValidTermID/moles:ParentListID)  
     68    or voclib:spot-vocab($voclib:gcmd_project_valids, moles:dgValidTermID/moles:ParentListID) 
     69    or voclib:spot-vocab($voclib:gcmd_science_valids_categories, moles:dgValidTermID/moles:ParentListID) 
     70    or voclib:spot-vocab($voclib:bodc_parameter_usage_vocab, moles:dgValidTermID/moles:ParentListID) 
     71    or voclib:spot-vocab($voclib:bodc_parameter_discovery_vocab, moles:dgValidTermID/moles:ParentListID) 
     72    ]) < 1)  then 
     73    element Subject { 
     74        element SubjectName {data("No subject identiified")}, 
     75        element SubjectVocab {data("Other")}, 
     76        element SubjectCode {data("00")} 
     77} 
     78else 
     79    for $subject in ($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgParameterSummary/moles:dgStdParameterMeasured[ 
     80    voclib:spot-vocab($voclib:gcmd_science_valids, moles:dgValidTermID/moles:ParentListID)  
     81    or voclib:spot-vocab($voclib:gcmd_project_valids, moles:dgValidTermID/moles:ParentListID) 
     82    or voclib:spot-vocab($voclib:gcmd_science_valids_categories, moles:dgValidTermID/moles:ParentListID) 
     83    or voclib:spot-vocab($voclib:bodc_parameter_usage_vocab, moles:dgValidTermID/moles:ParentListID) 
     84    or voclib:spot-vocab($voclib:bodc_parameter_discovery_vocab, moles:dgValidTermID/moles:ParentListID) 
     85    ])  
     86    return (element Subject { 
     87        element SubjectName {data($subject/moles:dgValidTerm)}, 
     88        element SubjectVocab {data($subject/moles:dgValidTermID/moles:ParentListID)}, 
     89        element SubjectCode {data($subject/moles:dgValidTermID/moles:TermID)} 
    5390}, 
     91 
     92    (: standard parameter SUB terms :) 
    5493for $subterm in $subject//moles:dgValidSubterm 
    5594return (element Subject { 
    56 element SubjectName {data($subterm/moles:dgValidTerm)}, 
    57 element SubjectVocab {data($subterm/moles:dgValidTermID/moles:ParentListID)}, 
    58 element SubjectCode {data($subterm/moles:dgValidTermID/moles:TermID)} 
    59 } 
     95    element SubjectName {data($subterm/moles:dgValidTerm)}, 
     96    element SubjectVocab {data($subterm/moles:dgValidTermID/moles:ParentListID)}, 
     97    element SubjectCode {data($subterm/moles:dgValidTermID/moles:TermID)} 
     98    } 
    6099) 
    61100), 
    62 if (exists($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange))) then  
    63 element Date { 
    64 element DatasetStartDate { 
    65 let $dateStart:=min($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeStart) cast as xs:date) 
    66 return  
    67 if (empty($dateStart)) then ()  
    68 else data($dateStart) 
    69 }, 
    70 element DatasetEndDate { 
    71 let $dateEnd:=max($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeEnd) cast as xs:date) 
    72 return 
    73 if (empty($dateEnd)) then ()  
    74 else data($dateEnd) 
    75 } 
    76 } 
    77 else (), 
     101 
     102(:Date :) 
     103 
     104(: check empty stuff :) 
     105 
     106if (exists($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange))) then 
     107 
     108    (:Note MOLES can have any number of dateRanges but MDIP only 1.. :) 
     109    if (count($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange)) = 1) then  
     110        element Date { 
     111         
     112             if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeStart) != '') then 
     113                element DatasetStartDate { 
     114                    let $dateStart:=min($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeStart) cast as xs:date) 
     115                    return  
     116                        if (empty($dateStart)) then ()  
     117                            else data($dateStart) 
     118                             
     119                 } 
     120             else element DatasetStartDate {data('')}, 
     121              
     122            if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeEnd) != '') then 
     123                element DatasetEndDate{ 
     124                  let $dateEnd:=max($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeEnd) cast as xs:date) 
     125                  return 
     126                    if (empty($dateEnd)) then ()  
     127                        else data($dateEnd)  
     128                } 
     129            else element DatasetEndDate {data('')} 
     130    } 
     131     
     132    else  
     133  
     134    (: take first start date element and last end date element from the moles to bound :) 
     135        element Date { 
     136         
     137         if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[1]/moles:DateRangeStart) != '') then 
     138            element DatasetStartDate { 
     139                    let $dateStart:=$DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[1]/moles:DateRangeStart) cast as xs:date 
     140                    return  
     141                        if (empty($dateStart)) then ()  
     142                            else data($dateStart) 
     143                } 
     144             else element DatasetStartDate {data('')}, 
     145              
     146             if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[count($DE//moles:dgTemporalCoverage/moles:DateRange)]/moles:DateRangeEnd) != '') then 
     147                element DatasetEndDate { 
     148                    let $dateEnd:=$DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[count($DE//moles:dgTemporalCoverage/moles:DateRange)]/moles:DateRangeEnd) cast as xs:date 
     149                    return 
     150                        if (empty($dateEnd)) then ()  
     151                            else data($dateEnd) 
     152                } 
     153                  else element DatasetEndDate {data('')}                                 
     154            (: 
     155            element DatasetStartDate {data($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[1]/moles:DateRangeStart))}, 
     156            element DatasetEndDate {data($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[count($DE//moles:dgTemporalCoverage/moles:DateRange)]/moles:DateRangeEnd))} 
     157            :) 
     158        } 
     159else (), (: if no date not a lot can do about it :) 
     160 
     161(:Coordinates, boundingBoxes etc :) 
     162(: MOLES, dif can have any number of boundingBoxes but MDIP only 1.. so check how many and produce an agregate box -wow -XPATH2 works here!:) 
     163if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then  
     164    let $minLon := min(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitWest) return element WestCoOrdinate {data ($minLon) } 
     165else  
     166    element WestCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitWest)}, 
     167     
     168if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then  
     169    let $maxLon := max(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitEast) return element EastCoOrdinate {data ($maxLon) } 
     170else  
     171    element EastCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitEast)}, 
     172     
     173if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then  
     174    let $maxLat := max(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitNorth) return element NorthCoOrdinate {data ($maxLat) } 
     175else  
     176    element NorthCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitNorth)}, 
     177     
     178if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then  
     179    let $minLat := min(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitSouth) return element SouthCoOrdinate {data ($minLat) } 
     180else  
     181    element SouthCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitSouth)}, 
     182     
     183 
     184(: 
    78185let $west := min($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox/moles:LimitWest cast as xs:decimal) 
    79186return element WestCoOrdinate {data($west)}, 
     
    83190return element NorthCoOrdinate {data($north)}, 
    84191let $south := min($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox/moles:LimitSouth cast as xs:decimal) 
    85 return element SouthCoOrdinate {data($south)}, 
     192return element SouthCoOrdinate {data($south)} 
     193:) 
     194(:metadata language - hardcode this.. :) 
     195element MetadataLanguage { 
     196    element LanguageName {"English"}, 
     197    element LanguageVocab {"UKGemini_LanguageCategories"}, 
     198    element LanguageCode {"eng"} 
     199}, 
     200 
     201(:DatasetIdentifier:) 
    86202element DatasetIdentifier {concat($DE/moles:dgMetadataID/moles:repositoryIdentifier, $utillib:moles_id_separator, 'MDIP', $utillib:moles_id_separator, $DE/moles:dgMetadataID/moles:localIdentifier)}, 
     203 
     204(:Target:) 
    87205for $target in $DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:ndg_data_provider_vocab, moles:dgValidTermID/moles:ParentListID) ] 
    88206return element Target { 
     
    91209element TargetCode {data($target/moles:dgValidTermID/moles:TermID)} 
    92210} 
    93     } 
     211 
     212} 
Note: See TracChangeset for help on using the changeset viewer.