Changeset 4435


Ignore:
Timestamp:
17/11/08 17:37:05 (11 years ago)
Author:
cbyrom
Message:

Extend atom2DIF export - adding and expanding out new data:

Related_URL data - extracted direct from links
Parameters - extracted from category elements - via a vocab lookup
Keywords - add standard ones

  • add these in the xquery, for universal data, and in the discovery record

creation script for BADC/NEODC specific data.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • exist/trunk/xquery/atom2DIF.xq

    r4430 r4435  
    1 declare default element namespace 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/'; 
     1declare default element namespace 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/'; 
    22declare namespace atom = 'http://www.w3.org/2005/Atom'; 
    33declare namespace dif = 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/'; 
    4 declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';  
     4declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance'; 
    55declare namespace moles="http://ndg.nerc.ac.uk/schema/moles2alpha"; 
    66declare namespace f='http://ndg.nerc.ac.uk/moles/localfunctions'; 
     
    1212import module namespace utillib='http://ndg.nerc.ac.uk/xquery/lib/utilities' at 'xmldb:exist:///db/xqueryLib/Utilities/utility_xquery_lib.xquery'; 
    1313 
    14 (: Keep this in official lib version  
    15 declare variable $targetCollection as xs:string := 'TargetCollection/Published/data_entities/RepositoryID'; 
    16 declare variable $deploymentsCollection as xs:string := 'TargetCollection/Published/deployments/RepositoryID'; 
    17 declare variable $repositoryIdentifier as xs:string := 'RepositoryID'; 
    18 declare variable $localIdentifier as xs:string := 'LocalID'; 
     14(: Keep this in official lib version 
     15declare variable $targetCollection as xs:string := '/db/atoms/Published'; 
     16declare variable $deploymentsDir as xs:string := '/deployments'; 
     17declare variable $dataEntitiesDir as xs:string := '/data_entities'; 
     18declare variable $repositoryIdentifier as xs:string := 'badc.nerc.ac.uk'; 
     19declare variable $localIdentifier as xs:string := 'dataent_active'; 
    1920declare variable $dptTerm as xs:string := 'DPTTerm'; 
    2021declare variable $obsTerm as xs:string :=  'OBSTerm'; 
     
    2526 
    2627declare variable $targetCollection as xs:string := 'TargetCollection'; 
    27 declare variable $deploymentsCollection as xs:string := '/db/atoms'; 
    2828declare variable $repositoryIdentifier as xs:string := 'RepositoryID'; 
    2929declare variable $localIdentifier as xs:string := 'LocalID'; 
     
    3535declare variable $uriTerm as xs:string :=  'URI'; 
    3636 
    37 for $DE in collection($targetCollection)/atom:entry[matches(atom:id, concat('__ATOM__',$localIdentifier,'$'))]  
    38 return  
     37(: These two are unlikely to change :) 
     38declare variable $dataEntitiesDir as xs:string := 'data_entities'; 
     39declare variable $deploymentsDir as xs:string := 'deployments'; 
     40declare variable $deploymentDataDir as xs:string := 'deployment_data'; 
     41 
     42for $DE in collection(string-join(($targetCollection, $dataEntitiesDir, $repositoryIdentifier), '/'))/atom:entry[matches(atom:id, concat('__ATOM__',$localIdentifier,'$'))] 
     43return 
    3944element DIF { 
    40         attribute xsi:schemaLocation {'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.4.xsd'}, 
    41         element Entry_ID {concat(string($DE/moles:entity/moles:molesISO/moles:providerID),':DIF:', tokenize(string($DE/atom:id), '__ATOM__')[2])}, 
    42         element Entry_Title {string($DE/atom:title)}, 
    43         element Data_Set_Citation { 
    44             element Dataset_Creator {string($DE/atom:author/atom:name)}, 
    45             element Dataset_Title {string($DE/atom:title)} 
    46             element Dataset_Publisher {string($DE/moles:entity/moles:molesISO/moles:providerID)} 
    47         }, 
    48         element Personnel { 
    49             element Role {''}, 
    50             element Last_Name {''} 
    51         }, 
    52         element Discipline { element Discipline_Name {''} }, 
    53         element Parameters { element Category {''}, element Topic {''}, element Term {''} }, 
     45        attribute xsi:schemaLocation {'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.4.xsd'}, 
     46        element Entry_ID {concat(string($DE/moles:entity/moles:molesISO/moles:providerID),':DIF:', tokenize(string($DE/atom:id), '__ATOM__')[2])}, 
     47        element Entry_Title {string($DE/atom:title)}, 
     48        element Data_Set_Citation { 
     49            element Dataset_Creator {string($DE/atom:author/atom:name)}, 
     50            element Dataset_Title {string($DE/atom:title)}, 
     51            element Dataset_Publisher {string($DE/moles:entity/moles:molesISO/moles:providerID)} 
     52        }, 
     53        element Personnel { 
     54            element Role {''}, 
     55            element Last_Name {''} 
     56        }, 
     57        element Discipline { element Discipline_Name {''} }, 
     58        for $category in ($DE/atom:category) 
     59        return 
     60                element Parameters {  
     61                        element Category {'Earth Science'},  
     62                element Topic {string-join((data($category/@scheme), data($category/@term)), '---')}, 
     63                        element Term {data($category/@label)}  
     64                        }, 
    5465 
    5566   (:Top level loop over all link data in the atom that refers to deployment atoms:) 
    5667   (: NB, slightly convoluted way of doing things to avoid item duplication :) 
    57         for $val in distinct-values( 
    58                 for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href) 
    59         return 
    60         (:Now look up these deployment atoms and retrieve their links - removing duplicates:) 
    61                 for $title in (data(collection($deploymentsCollection)/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $dptTerm]/@title)) 
    62                 return 
    63                                 $title 
    64                         ) 
    65         return  
    66                 element Sensor_Name { 
    67                 element Long_Name {$val} 
    68         }, 
    69          
    70         for $val in distinct-values( 
    71                 for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href) 
    72         return 
    73                 for $title in (data(collection($deploymentsCollection)/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $obsTerm]/@title)) 
    74                 return 
    75                                 $title 
    76                         ) 
    77     return 
    78             element Source_Name { 
    79            element Long_Name {$val} 
    80             }, 
     68        for $val in distinct-values( 
     69                for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href) 
     70        return 
     71        (:Now look up these deployment atoms and retrieve their links - removing duplicates:) 
     72        for $href in (data(collection(string-join(($targetCollection, $deploymentsDir), '/'))/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $dptTerm]/@href)) 
     73                return 
     74                                $href 
     75                        ) 
     76        return 
     77                for $atom in (collection(string-join(($targetCollection, $deploymentDataDir), '/'))/atom:entry[atom:link[@href = $val]]) 
     78                return 
     79                    element Sensor_Name { 
     80                        element Short_Name {data($atom/moles:entity/moles:molesISO/moles:abbreviation)}, 
     81                        element Long_Name {data($atom/atom:title)} 
     82                    }, 
     83 
     84        for $val in distinct-values( 
     85                for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href) 
     86        return 
     87                for $href in (data(collection(string-join(($targetCollection, $deploymentsDir), '/'))/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $obsTerm]/@href)) 
     88                return 
     89                                $href 
     90                        ) 
     91        return 
     92                for $atom in (collection(string-join(($targetCollection, $deploymentDataDir), '/'))/atom:entry[atom:link[@href = $val]]) 
     93                return 
     94                    element Source_Name { 
     95                        element Short_Name {data($atom/moles:entity/moles:molesISO/moles:abbreviation)}, 
     96                        element Long_Name {data($atom/atom:title)} 
     97                    }, 
     98 
    8199   for $TemporalRange in $DE/moles:temporalRange 
    82         return 
    83         element Temporal_Coverage 
     100        return 
     101        element Temporal_Coverage 
    84102        { 
    85103        element Start_Date {tokenize(string($DE/moles:temporalRange), '/')[1]}, 
    86104        element Stop_Date {tokenize(string($DE/moles:temporalRange), '/')[2]} 
    87105        }, 
    88             
    89         for $BoundingBox in $DE//../gml:Envelope 
    90         return  
    91             element Spatial_Coverage { 
    92                     element Southernmost_Latitude {tokenize(string($BoundingBox/gml:lowerCorner), ' ')[2]}, 
    93                     element Northernmost_Latitude {tokenize(string($BoundingBox/gml:upperCorner), ' ')[2]}, 
    94                     element Westernmost_Longitude {tokenize(string($BoundingBox/gml:lowerCorner), ' ')[1]}, 
    95                     element Easternmost_Longitude {tokenize(string($BoundingBox/gml:upperCorner), ' ')[1]} 
    96                 }, 
    97           
    98         element Data_Resolution {''}, 
    99          
    100         for $val in distinct-values( 
    101                 for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href) 
    102         return 
    103                 for $title in (data(collection($deploymentsCollection)/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $activityTerm]/@title)) 
    104                 return 
    105                                 $title 
    106                         ) 
    107     return 
    108             element Project { 
    109            element Long_Name {$val} 
    110             }, 
    111         element Quality {''}, 
    112         element Use_Constraints {''}, 
    113         element Data_Set_Language {''}, 
    114         element Distribution {''}, 
    115         element Multimedia_Sample {''}, 
    116         element Reference {''}, 
    117         element Summary {string($DE/atom:summary)}, 
    118     for $link in ($DE/atom:link[@rel = $granuleTerm]) 
     106 
     107        for $BoundingBox in $DE//../gml:Envelope 
     108        return 
     109            element Spatial_Coverage { 
     110                    element Southernmost_Latitude {tokenize(string($BoundingBox/gml:lowerCorner), ' ')[2]}, 
     111                    element Northernmost_Latitude {tokenize(string($BoundingBox/gml:upperCorner), ' ')[2]}, 
     112                    element Westernmost_Longitude {tokenize(string($BoundingBox/gml:lowerCorner), ' ')[1]}, 
     113                    element Easternmost_Longitude {tokenize(string($BoundingBox/gml:upperCorner), ' ')[1]} 
     114                }, 
     115 
     116        element Data_Resolution {''}, 
     117 
     118        for $val in distinct-values( 
     119                for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href) 
     120        return 
     121                for $href in (data(collection(string-join(($targetCollection, $deploymentsDir), '/'))/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $activityTerm]/@href)) 
     122                return 
     123                                $href 
     124                        ) 
     125        return 
     126                for $atom in (collection(string-join(($targetCollection, $deploymentDataDir), '/'))/atom:entry[atom:link[@href = $val]]) 
     127                return 
     128                    element Project { 
     129                        element Short_Name {data($atom/moles:entity/moles:molesISO/moles:abbreviation)}, 
     130                        element Long_Name {data($atom/atom:title)} 
     131                    }, 
     132 
     133        element Quality {''}, 
     134        element Use_Constraints {''}, 
     135        element Data_Set_Language {''}, 
     136        element Distribution {''}, 
     137        element Multimedia_Sample {''}, 
     138        element Reference {''}, 
     139        element Summary {string($DE/atom:summary)}, 
     140        (: NB, deployment data is already included in the Project, Source_Name and Sensor_Name elements :) 
     141   for $link in ($DE/atom:link[@rel != $deploymentTerm]) 
    119142       return 
    120143            element Related_URL { 
    121                     element URL {data($link[@href])}, 
    122                        element Description {data($link[@title])}  
    123                        }, 
    124  
    125    for $link in ($DE/atom:link[@rel = $uriTerm]) 
    126        return 
    127             element Related_URL { 
    128                     element URL {data($link[@href])}, 
    129                        element Description {data($link[@title])} 
    130                        }, 
    131         element Parent_DIF {''}, 
    132         element IDN_Node { element Short_Name {''} }, 
    133         element Originating_Metadata_Node {''},  
    134         element Metadata_Name {'[CEOS IDN DIF]'}, 
    135         element Metadata_Version {'9.4'}, 
    136         if  (exists($DE/moles:entity/moles:molesISO/moles:created)) then  
    137                 element DIF_Creation_Date {data($DE/moles:entity/moles:molesISO/moles:created)} 
    138         else (), 
    139         if  (exists($DE/atom:updated)) then  
    140                 element DIF_Revision_History {data($DE/atom:updated)}  
    141         else () 
    142         }  
     144                    element URL {data($link/@href)}, 
     145                    element Description {data($link/@title)}, 
     146                    element URL_Content_Type{ 
     147                                element Type{data($link/@rel)} 
     148                    } 
     149            }, 
     150        element Parent_DIF {''}, 
     151        element IDN_Node { element Short_Name {''} }, 
     152        element Originating_Metadata_Node {''}, 
     153        element Metadata_Name {'[CEOS IDN DIF]'}, 
     154        element Metadata_Version {'9.4'}, 
     155        if  (exists($DE/moles:entity/moles:molesISO/moles:created)) then 
     156                element DIF_Creation_Date {data($DE/moles:entity/moles:molesISO/moles:created)} 
     157        else (), 
     158        if  (exists($DE/atom:updated)) then 
     159                element DIF_Revision_History {data($DE/atom:updated)} 
     160        else () 
     161        } 
Note: See TracChangeset for help on using the changeset viewer.