source: exist/trunk/xquery/atom2DIF.xq @ 4435

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/xquery/atom2DIF.xq@4435
Revision 4435, 8.7 KB checked in by cbyrom, 12 years ago (diff)

Extend atom2DIF export - adding and expanding out new data:

Related_URL data - extracted direct from links
Parameters - extracted from category elements - via a vocab lookup
Keywords - add standard ones

  • add these in the xquery, for universal data, and in the discovery record

creation script for BADC/NEODC specific data.

Line 
1declare default element namespace 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
2declare namespace atom = 'http://www.w3.org/2005/Atom';
3declare namespace dif = 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
4declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
5declare namespace moles="http://ndg.nerc.ac.uk/schema/moles2alpha";
6declare namespace f='http://ndg.nerc.ac.uk/moles/localfunctions';
7declare namespace georss = "http://www.georss.org/georss";
8declare namespace gml = "http://www.opengis.net/gml";
9declare namespace fn = "http://www.w3.org/2005/02/xpath-functions";
10
11import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery';
12import module namespace utillib='http://ndg.nerc.ac.uk/xquery/lib/utilities' at 'xmldb:exist:///db/xqueryLib/Utilities/utility_xquery_lib.xquery';
13
14(: Keep this in official lib version
15declare variable $targetCollection as xs:string := '/db/atoms/Published';
16declare variable $deploymentsDir as xs:string := '/deployments';
17declare variable $dataEntitiesDir as xs:string := '/data_entities';
18declare variable $repositoryIdentifier as xs:string := 'badc.nerc.ac.uk';
19declare variable $localIdentifier as xs:string := 'dataent_active';
20declare variable $dptTerm as xs:string := 'DPTTerm';
21declare variable $obsTerm as xs:string :=  'OBSTerm';
22declare variable $activityTerm as xs:string :=  'ACTIVITYTerm';
23declare variable $uriTerm as xs:string :=  'URITerm';
24declare variable $granuleTerm as xs:string :=  'GranuleTerm';
25:)
26
27declare variable $targetCollection as xs:string := 'TargetCollection';
28declare variable $repositoryIdentifier as xs:string := 'RepositoryID';
29declare variable $localIdentifier as xs:string := 'LocalID';
30declare variable $dptTerm as xs:string := 'DPT - NOT YET SET UP/None/DPT';
31declare variable $obsTerm as xs:string :=  'OBS - NOT YET SET UP/None/OBS';
32declare variable $activityTerm as xs:string :=  'Activity - NOT YET SET UP/None/ACTIVITY';
33declare variable $granuleTerm as xs:string :=  'GRAN - NOT YET SET UP/None/GRANULE';
34declare variable $deploymentTerm as xs:string := 'NOT YET SET UP/None/Deployment';
35declare variable $uriTerm as xs:string :=  'URI';
36
37(: These two are unlikely to change :)
38declare variable $dataEntitiesDir as xs:string := 'data_entities';
39declare variable $deploymentsDir as xs:string := 'deployments';
40declare variable $deploymentDataDir as xs:string := 'deployment_data';
41
42for $DE in collection(string-join(($targetCollection, $dataEntitiesDir, $repositoryIdentifier), '/'))/atom:entry[matches(atom:id, concat('__ATOM__',$localIdentifier,'$'))]
43return
44element DIF {
45        attribute xsi:schemaLocation {'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/ http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif_v9.4.xsd'},
46        element Entry_ID {concat(string($DE/moles:entity/moles:molesISO/moles:providerID),':DIF:', tokenize(string($DE/atom:id), '__ATOM__')[2])},
47        element Entry_Title {string($DE/atom:title)},
48        element Data_Set_Citation {
49            element Dataset_Creator {string($DE/atom:author/atom:name)},
50            element Dataset_Title {string($DE/atom:title)},
51            element Dataset_Publisher {string($DE/moles:entity/moles:molesISO/moles:providerID)}
52        },
53        element Personnel {
54            element Role {''},
55            element Last_Name {''}
56        },
57        element Discipline { element Discipline_Name {''} },
58        for $category in ($DE/atom:category)
59        return
60                element Parameters {
61                        element Category {'Earth Science'},
62                element Topic {string-join((data($category/@scheme), data($category/@term)), '---')},
63                        element Term {data($category/@label)}
64                        },
65
66   (:Top level loop over all link data in the atom that refers to deployment atoms:)
67   (: NB, slightly convoluted way of doing things to avoid item duplication :)
68        for $val in distinct-values(
69                for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href)
70        return
71        (:Now look up these deployment atoms and retrieve their links - removing duplicates:)
72        for $href in (data(collection(string-join(($targetCollection, $deploymentsDir), '/'))/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $dptTerm]/@href))
73                return
74                                $href
75                        )
76        return
77                for $atom in (collection(string-join(($targetCollection, $deploymentDataDir), '/'))/atom:entry[atom:link[@href = $val]])
78                return
79                    element Sensor_Name {
80                        element Short_Name {data($atom/moles:entity/moles:molesISO/moles:abbreviation)},
81                        element Long_Name {data($atom/atom:title)}
82                    },
83
84        for $val in distinct-values(
85                for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href)
86        return
87                for $href in (data(collection(string-join(($targetCollection, $deploymentsDir), '/'))/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $obsTerm]/@href))
88                return
89                                $href
90                        )
91        return
92                for $atom in (collection(string-join(($targetCollection, $deploymentDataDir), '/'))/atom:entry[atom:link[@href = $val]])
93                return
94                    element Source_Name {
95                        element Short_Name {data($atom/moles:entity/moles:molesISO/moles:abbreviation)},
96                        element Long_Name {data($atom/atom:title)}
97                    },
98
99   for $TemporalRange in $DE/moles:temporalRange
100        return
101        element Temporal_Coverage
102        {
103        element Start_Date {tokenize(string($DE/moles:temporalRange), '/')[1]},
104        element Stop_Date {tokenize(string($DE/moles:temporalRange), '/')[2]}
105        },
106
107        for $BoundingBox in $DE//../gml:Envelope
108        return
109            element Spatial_Coverage {
110                    element Southernmost_Latitude {tokenize(string($BoundingBox/gml:lowerCorner), ' ')[2]},
111                    element Northernmost_Latitude {tokenize(string($BoundingBox/gml:upperCorner), ' ')[2]},
112                    element Westernmost_Longitude {tokenize(string($BoundingBox/gml:lowerCorner), ' ')[1]},
113                    element Easternmost_Longitude {tokenize(string($BoundingBox/gml:upperCorner), ' ')[1]}
114                },
115
116        element Data_Resolution {''},
117
118        for $val in distinct-values(
119                for $deploymentID in data($DE/atom:link[@rel = $deploymentTerm]/@href)
120        return
121                for $href in (data(collection(string-join(($targetCollection, $deploymentsDir), '/'))/atom:entry[atom:link[@href = $deploymentID] and atom:category[@label = 'Deployment']]/atom:link[@rel = $activityTerm]/@href))
122                return
123                                $href
124                        )
125        return
126                for $atom in (collection(string-join(($targetCollection, $deploymentDataDir), '/'))/atom:entry[atom:link[@href = $val]])
127                return
128                    element Project {
129                        element Short_Name {data($atom/moles:entity/moles:molesISO/moles:abbreviation)},
130                        element Long_Name {data($atom/atom:title)}
131                    },
132
133        element Quality {''},
134        element Use_Constraints {''},
135        element Data_Set_Language {''},
136        element Distribution {''},
137        element Multimedia_Sample {''},
138        element Reference {''},
139        element Summary {string($DE/atom:summary)},
140        (: NB, deployment data is already included in the Project, Source_Name and Sensor_Name elements :)
141   for $link in ($DE/atom:link[@rel != $deploymentTerm])
142       return
143            element Related_URL {
144                    element URL {data($link/@href)},
145                    element Description {data($link/@title)},
146                    element URL_Content_Type{
147                                element Type{data($link/@rel)}
148                    }
149            },
150        element Parent_DIF {''},
151        element IDN_Node { element Short_Name {''} },
152        element Originating_Metadata_Node {''},
153        element Metadata_Name {'[CEOS IDN DIF]'},
154        element Metadata_Version {'9.4'},
155        if  (exists($DE/moles:entity/moles:molesISO/moles:created)) then
156                element DIF_Creation_Date {data($DE/moles:entity/moles:molesISO/moles:created)}
157        else (),
158        if  (exists($DE/atom:updated)) then
159                element DIF_Revision_History {data($DE/atom:updated)}
160        else ()
161        }
Note: See TracBrowser for help on using the repository browser.