source: exist/trunk/xquery/dif2moles.xq @ 3841

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/xquery/dif2moles.xq@4326
Revision 3841, 13.2 KB checked in by cbyrom, 11 years ago (diff)

Various updates to the xquery files - to remove usage of invalid specs
and dependency on obsolete function calls. Also add the inputParse_xquery_lib.xquery
file - referenced from the xquery files but not currently included in codebase.
NB, changes made to allow use of Saxon java library to do xqueries - to
remove the need for the discovery service eXist DB.

  • NB, also changes tested by comparing saxon transforms to eXist ones; these

were found to be identical.

Line 
1(: This query produces one MOLES data entity plus one organisation entry for a given DIF instance
2   Input is TargetCollection (where the DIF exists), RepositoryID, where the existing DIF lies, and
3   expected to be the output RepositoryID (fix it in the output xml if it's wrong), and Input_EntryID
4   which is the DIF entryID, and LocalID which is the output localID for the data entity. We also
5   need the localID for the organisation entity, which appears as repository_localid
6   :)
7(: Note algoritm for creating non-pre-existing organisations :)
8(: dgPersons are not created as one can't tell automatically which are people and which are orgs, and orgs are simpler :) 
9
10import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery';
11import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'xmldb:exist:///db/xqueryLib/Utilities/inputParse_xquery_lib.xquery';
12
13declare default element namespace 'http://ndg.nerc.ac.uk/moles';
14declare namespace dif='http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
15declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
16
17(: Replacable parameters :)
18declare variable $targetCollection as xs:string := 'TargetCollection';
19declare variable $input_repository as xs:string := 'RepositoryID';
20declare variable $input_repository_local as xs:string := 'repository_localid';
21declare variable $input_entry_id as xs:string := 'Input_Entry_ID';
22declare variable $output_local_id as xs:string := 'LocalID';
23
24for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]
25return
26element dgMetadata {
27element dgMetadataRecord {
28element dgMetadataID {
29element schemeIdentifier {'NDG-B0'},
30element repositoryIdentifier {$input_repository},
31if ($output_local_id != 'Output_LocalID') then
32element localIdentifier {$output_local_id}
33else
34element localIdentifier {data($DIF/dif:Entry_ID)}
35},
36element dgMetadataDescription {
37element metadataDescriptionID {
38element schemeIdentifier {'NDG-B0'},
39element repositoryIdentifier {$input_repository},
40if ($output_local_id != 'Output_LocalID') then
41element localIdentifier {concat('generated_desc-', $output_local_id)}
42else
43element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
44},
45element metadataDescriptionLastUpdated {current-date()}                 ,
46element abstract {
47element abstractText {string($DIF/dif:Summary)}
48}
49,
50for $descOnline in $DIF/dif:Related_URL
51return
52element descriptionSection {
53element descriptionOnlineReference {
54element dgSimpleLink {encode-for-uri(data($descOnline/dif:URL))}},
55if (exists($descOnline/dif:URL_Content_Type)) then
56element dgReferenceName {data($descOnline/dif:URL_Content_Type)}
57else ()
58}
59},
60element name {string($DIF/dif:Entry_Title)},
61element abbreviation {string($DIF/dif:Entry_Title)},
62element dgDataEntity {
63element dgDataSetType {''},
64element dgDataSummary {
65for $parameter in $DIF/dif:Parameters
66return
67element dgParameterSummary {
68element dgParameterValue {
69element dgValueDataParameter {
70element Value {''},
71element dgStandardUnit {
72element dgValidTerm {'dummy'},
73element dgValidTermID {
74element ParentListID {$voclib:unknown_vocab_id},
75element TermID {encode-for-uri('dummy unit')}
76}
77}
78}
79},
80element dgStdParameterMeasured {
81if ($parameter/dif:Category!='') then element dgValidTerm {string($parameter/dif:Category)}
82else  element dgValidTerm {'unknown'},
83element dgValidTermID {
84element ParentListID {concat($voclib:gcmd_science_valids_categories, '/current')},
85element TermID {encode-for-uri($parameter/dif:Category)}
86},
87element dgValidSubterm {
88if ($parameter/dif:Topic!='') then element dgValidTerm {string($parameter/dif:Topic)}
89else  element dgValidTerm {'unknown'},
90element dgValidTermID {
91element ParentListID {concat($voclib:gcmd_science_valids_topics, '/current')},
92element TermID {encode-for-uri($parameter/dif:Topic)}
93},
94element dgValidSubterm {
95if ($parameter/dif:Term!='') then element dgValidTerm {string($parameter/dif:Term)}
96else  element dgValidTerm {'unknown'},
97element dgValidTermID {
98element ParentListID {concat($voclib:gcmd_science_valids_terms, '/current')},
99element TermID {encode-for-uri($parameter/dif:Term)}
100},
101if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
102element dgValidSubterm {
103element dgValidTerm {string($parameter/dif:Variable)},
104element dgValidTermID {
105element ParentListID {concat($voclib:gcmd_science_valids_variables, '/current')},
106element TermID {encode-for-uri($parameter/dif:Variable)}
107},
108if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
109element dgValidSubterm {
110element dgValidTerm {string($parameter/dif:Detailed_Variable)},
111element dgValidTermID {
112element ParentListID {$voclib:unknown_vocab_id},
113element TermID {encode-for-uri($parameter/dif:Detailed_Variable)}
114},
115element ListLevel {4}}
116else (),
117element ListLevel {3}
118}
119else (),
120element ListLevel {2}
121},
122element ListLevel {1}
123},
124element ListLevel {0}
125},
126element ParameterName {
127concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
128if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
129concat(' > ', string($parameter/dif:Variable))
130else (),
131if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
132concat(' > ', string($parameter/dif:Detailed_Variable))
133else ()
134},
135element ParameterAbbreviation {
136concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
137if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
138concat(' > ', string($parameter/dif:Variable))
139else (),
140if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
141concat(' > ', string($parameter/dif:Detailed_Variable))
142else ()
143}
144},
145if (exists($DIF/dif:Spatial_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Location) or exists($DIF/dif:Temporal_Coverage)) then
146element dgDataCoverage {
147if (exists($DIF/dif:Spatial_Coverage)  or exists($DIF/dif:Location))  then
148element dgSpatialCoverage {
149for $boundingbox in $DIF/dif:Spatial_Coverage[exists(dif:Northernmost_Latitude)
150and exists(dif:Southernmost_Latitude)
151and exists(dif:Easternmost_Longitude)
152and exists(dif:Westernmost_Longitude)]
153return
154element BoundingBox {
155element LimitNorth {data(inputParse:fix-coord($boundingbox/dif:Northernmost_Latitude))},
156element LimitSouth {data(inputParse:fix-coord($boundingbox/dif:Southernmost_Latitude))},
157element LimitWest {data(inputParse:fix-coord($boundingbox/dif:Westernmost_Longitude))},
158element LimitEast {data(inputParse:fix-coord($boundingbox/dif:Easternmost_Longitude))}
159},
160for $location in $DIF/dif:Location
161return
162element dgArea {
163element dgValidTerm {string($location)},
164element dgValidTermID {
165element ParentListID {concat($voclib:gcmd_location_valids, '/current')},
166element TermID {encode-for-uri($location)}
167}
168}
169}
170else (),
171if (exists($DIF/dif:Temporal_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Chronostratigraphic_Unit)) then
172element dgTemporalCoverage {
173for $temporalcoverage in $DIF/dif:Temporal_Coverage[exists(dif:Start_Date)]
174return
175element DateRange {
176element DateRangeStart {string($temporalcoverage/dif:Start_Date)},
177element DateRangeEnd {string($temporalcoverage/dif:Stop_Date)}
178},
179for $paleotemporalcoverage in $DIF/dif:Paleo_Temporal_Coverage[exists(dif:Paleo_Start_Date)]
180return
181element DateRange {
182element DateRangeStart {string($paleotemporalcoverage/dif:Paleo_Start_Date)},
183element DateRangeEnd {string($paleotemporalcoverage/dif:Paleo_Stop_Date)}
184},
185for $chronostratigraphic in $DIF/dif:Chronostratigraphic_Unit
186return
187element dgChronostratigraphicTerm {
188element dgValidTerm {string($chronostratigraphic)},
189element dgValidTermID {
190element ParentListID {concat($voclib:gcmd_chronostratigraphic_valids, '/current')},
191element TermID {encode-for-uri($chronostratigraphic)}
192}
193}
194}
195else ()
196}
197else ()
198},
199element dgDataRoles {
200if (exists($DIF/dif:Data_Set_Citation/dif:Dataset_Creator)) then
201element dgDataCreator {
202element dgMetadataID {
203element schemeIdentifier {'NDG-B0'},
204element repositoryIdentifier {$input_repository},
205if ($output_local_id != 'Output_LocalID') then
206element localIdentifier {concat('generated_creator-', $output_local_id)}
207else
208element localIdentifier {concat('generated_creator-', encode-for-uri(string($DIF/dif:Entry_ID)))}
209},
210element roleName {'Data Creator'},
211element abbreviation {'Creator'},
212for $creatorID in $DIF/dif:Data_Set_Citation/dif:Dataset_Creator
213return
214element dgRoleHolder {
215element dgMetadataID {
216element schemeIdentifier {'NDG-B0'},
217element repositoryIdentifier {$input_repository},
218if ($output_local_id != 'Output_LocalID') then
219element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
220else
221element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
222},
223element startDate {current-date()}
224}
225}
226else if (exists($DIF/dif:Originating_Center)) then
227element dgDataCreator {
228element dgMetadataID {
229element schemeIdentifier {'NDG-B0'},
230element repositoryIdentifier {$input_repository},
231if ($output_local_id != 'Output_LocalID') then
232element localIdentifier  {encode-for-uri(concat('generated_creator-', $output_local_id))}
233else
234element localIdentifier  {encode-for-uri(concat('generated_creator-', data($DIF/dif:Entry_ID)))}
235},
236element roleName {'Data Creator'},
237element abbreviation {'Creator'},
238for $creatorID in $DIF/dif:Originating_Center
239return
240element dgRoleHolder {
241element dgOrganisationID {
242element schemeIdentifier {'NDG-B0'},
243element repositoryIdentifier {$input_repository},
244if ($output_local_id != 'Output_LocalID') then
245element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
246else
247element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
248},
249element startDate {current-date()}
250}
251}
252else (),
253element dgDataCurator {
254element dgMetadataID {
255element schemeIdentifier {'NDG-B0'},
256element repositoryIdentifier {$input_repository},
257if ($output_local_id != 'Output_LocalID') then
258element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
259else
260element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
261},
262element roleName {'Data Curator'},
263element abbreviation {'Curator'},
264element dgRoleHolder {
265element dgOrganisationID {
266element schemeIdentifier {'NDG-B0'},
267element repositoryIdentifier {$input_repository},
268element localIdentifier {$input_repository_local}
269},
270element startDate {current-date()}
271}
272}
273}
274},
275element dgStructuredKeyword {
276element dgValidTerm {'d2b converted record'},
277element dgValidTermID {
278element ParentListID {$voclib:unknown_vocab_id},
279element TermID {'d2b'}
280}
281},
282for $structuredKeywords in $DIF/dif:Keyword
283return
284element dgStructuredKeyword {
285element dgValidTerm {string($structuredKeywords)},
286element dgValidTermID {
287if ($structuredKeywords='MDIP' or $structuredKeywords='NERC' or $structuredKeywords='NERC_DDC' or $structuredKeywords='DPPP') then
288element ParentListID {concat($voclib:ndg_data_provider_vocab, '/current')}
289else
290element ParentListID {$voclib:unknown_vocab_id}
291,
292element TermID {encode-for-uri($structuredKeywords)}
293}
294},
295for $structuredKeywords in $DIF/dif:ISO_Topic_Category
296return
297element dgStructuredKeyword {
298element dgValidTerm {string($structuredKeywords)},
299element dgValidTermID {
300element ParentListID {concat($voclib:iso_topic_list, '/current')},
301element TermID {encode-for-uri($structuredKeywords)}
302}
303},
304if (exists($DIF/dif:DIF_Creation_Date) or exists($DIF/dif:Last_DIF_Revision_Date)) then
305element dgMetadataProvenance {
306if (exists($DIF/dif:DIF_Creation_Date)) then
307element RecordCreation {
308element CreatedDate {
309if (string($DIF/dif:DIF_Creation_Date) castable as xs:date) then
310string($DIF/dif:DIF_Creation_Date) cast as xs:date
311else (current-date())
312},
313element CreatedBy {$input_repository}
314}
315else
316element RecordCreation {
317element CreatedDate {current-date()},
318element CreatedBy {'MOLES Import'}
319},
320if (exists($DIF/dif:Last_DIF_Revision_Date)) then
321element RecordUpdate {
322element UpdateDate {
323if (string($DIF/dif:Last_DIF_Revision_Date) castable as xs:dateTime) then
324string($DIF/dif:Last_DIF_Revision_Date) cast as xs:dateTime
325else (current-dateTime())
326},
327element UpdatedBy {$input_repository}
328}
329else ()
330}
331else ()
332} (: </dgMetadataRecord>:),
333for $creator in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center))
334return
335element dgOrganisation {
336element dgMetadataID {
337element schemeIdentifier {'NDG-B0'},
338element repositoryIdentifier {$input_repository},
339if ($output_local_id != 'Output_LocalID') then
340element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', $output_local_id))}
341else
342element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', data($DIF/dif:Entry_ID)))}
343},
344element name {string($creator)},
345element abbreviation {string($creator)},
346element contactDetails {''}
347}
348} (:    </dgMetadata> :)
Note: See TracBrowser for help on using the repository browser.