source: exist/trunk/xquery/dif2moles.xq @ 3151

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/xquery/dif2moles.xq@3158
Revision 3151, 13.8 KB checked in by lawrence, 12 years ago (diff)

Fixing the xqueries to support the "right" browse query
in the ndgUtils egg.

Line 
1(: This query produces one MOLES data entity plus one organisation entry for a given DIF instance
2   Input is TargetCollection (where the DIF exists), RepositoryID, where the existing DIF lies, and
3   expected to be the output RepositoryID (fix it in the output xml if it's wrong), and Input_EntryID
4   which is the DIF entryID, and LocalID which is the output localID for the data entity. We also
5   need the localID for the organisation entity, which appears as repository_localid
6   :)
7(: Note algoritm for creating non-pre-existing organisations :)
8(: dgPersons are not created as one can't tell automatically which are people and which are orgs, and orgs are simpler :) 
9import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery';
10import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'xmldb:exist:///db/xqueryLib/Utilities/inputParse_xquery_lib.xquery';
11declare default element namespace 'http://ndg.nerc.ac.uk/moles';
12declare namespace dif='http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
13declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
14
15(: Keep as an example, but note the entryID in the dif is an
16   NDG format ID, so the example isn't as obvious ....
17declare variable $targetCollection as xs:string {'/db/discovery/original/ndg.noc.soton.ac.uk'};
18declare variable $input_repository as xs:string {'ndg.noc.soton.ac.uk'};
19declare variable $input_repository_local as xs:string {'nocs'};
20declare variable $input_entry_id as xs:string {'ndg.noc.soton.ac.uk__DIF__NOCSDAT110'};
21declare variable $output_local_id as xs:string {'NOCS_DAT110'};
22:)
23(: Replacable parameters :)
24declare variable $targetCollection as xs:string {'TargetCollection'};
25declare variable $input_repository as xs:string {'RepositoryID'};
26declare variable $input_repository_local as xs:string {'repository_localid'};
27declare variable $input_entry_id as xs:string {'Input_Entry_ID'};
28declare variable $output_local_id as xs:string {'LocalID'};
29
30for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]
31return
32element dgMetadata {
33element dgMetadataRecord {
34element dgMetadataID {
35element schemeIdentifier {'NDG-B0'},
36element repositoryIdentifier {$input_repository},
37if ($output_local_id != 'Output_LocalID') then
38element localIdentifier {$output_local_id}
39else
40element localIdentifier {data($DIF/dif:Entry_ID)}
41},
42element dgMetadataDescription {
43element metadataDescriptionID {
44element schemeIdentifier {'NDG-B0'},
45element repositoryIdentifier {$input_repository},
46if ($output_local_id != 'Output_LocalID') then
47element localIdentifier {concat('generated_desc-', $output_local_id)}
48else
49element localIdentifier {concat('generated_desc-', escape-uri(string($DIF/dif:Entry_ID), true()))}
50},
51element metadataDescriptionLastUpdated {current-date()}                 ,
52element abstract {
53element abstractText {string($DIF/dif:Summary)}
54}
55,
56for $descOnline in $DIF/dif:Related_URL
57return
58element descriptionSection {
59element descriptionOnlineReference {
60element dgSimpleLink {escape-uri(data($descOnline/dif:URL), true())}},
61if (exists($descOnline/dif:URL_Content_Type)) then
62element dgReferenceName {data($descOnline/dif:URL_Content_Type)}
63else ()
64}
65},
66element name {string($DIF/dif:Entry_Title)},
67element abbreviation {string($DIF/dif:Entry_Title)},
68element dgDataEntity {
69element dgDataSetType {''},
70element dgDataSummary {
71for $parameter in $DIF/dif:Parameters
72return
73element dgParameterSummary {
74element dgParameterValue {
75element dgValueDataParameter {
76element Value {''},
77element dgStandardUnit {
78element dgValidTerm {'dummy'},
79element dgValidTermID {
80element ParentListID {$voclib:unknown_vocab_id},
81element TermID {escape-uri('dummy unit', true())}
82}
83}
84}
85},
86element dgStdParameterMeasured {
87if ($parameter/dif:Category!='') then element dgValidTerm {string($parameter/dif:Category)}
88else  element dgValidTerm {'unknown'},
89element dgValidTermID {
90element ParentListID {concat($voclib:gcmd_science_valids_categories, '/current')},
91element TermID {escape-uri($parameter/dif:Category, true())}
92},
93element dgValidSubterm {
94if ($parameter/dif:Topic!='') then element dgValidTerm {string($parameter/dif:Topic)}
95else  element dgValidTerm {'unknown'},
96element dgValidTermID {
97element ParentListID {concat($voclib:gcmd_science_valids_topics, '/current')},
98element TermID {escape-uri($parameter/dif:Topic, true())}
99},
100element dgValidSubterm {
101if ($parameter/dif:Term!='') then element dgValidTerm {string($parameter/dif:Term)}
102else  element dgValidTerm {'unknown'},
103element dgValidTermID {
104element ParentListID {concat($voclib:gcmd_science_valids_terms, '/current')},
105element TermID {escape-uri($parameter/dif:Term, true())}
106},
107if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
108element dgValidSubterm {
109element dgValidTerm {string($parameter/dif:Variable)},
110element dgValidTermID {
111element ParentListID {concat($voclib:gcmd_science_valids_variables, '/current')},
112element TermID {escape-uri($parameter/dif:Variable, true())}
113},
114if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
115element dgValidSubterm {
116element dgValidTerm {string($parameter/dif:Detailed_Variable)},
117element dgValidTermID {
118element ParentListID {$voclib:unknown_vocab_id},
119element TermID {escape-uri($parameter/dif:Detailed_Variable, true())}
120},
121element ListLevel {4}}
122else (),
123element ListLevel {3}
124}
125else (),
126element ListLevel {2}
127},
128element ListLevel {1}
129},
130element ListLevel {0}
131},
132element ParameterName {
133concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
134if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
135concat(' > ', string($parameter/dif:Variable))
136else (),
137if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
138concat(' > ', string($parameter/dif:Detailed_Variable))
139else ()
140},
141element ParameterAbbreviation {
142concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
143if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
144concat(' > ', string($parameter/dif:Variable))
145else (),
146if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
147concat(' > ', string($parameter/dif:Detailed_Variable))
148else ()
149}
150},
151if (exists($DIF/dif:Spatial_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Location) or exists($DIF/dif:Temporal_Coverage)) then
152element dgDataCoverage {
153if (exists($DIF/dif:Spatial_Coverage)  or exists($DIF/dif:Location))  then
154element dgSpatialCoverage {
155for $boundingbox in $DIF/dif:Spatial_Coverage[exists(dif:Northernmost_Latitude)
156and exists(dif:Southernmost_Latitude)
157and exists(dif:Easternmost_Longitude)
158and exists(dif:Westernmost_Longitude)]
159return
160element BoundingBox {
161element LimitNorth {data(inputParse:fix-coord($boundingbox/dif:Northernmost_Latitude))},
162element LimitSouth {data(inputParse:fix-coord($boundingbox/dif:Southernmost_Latitude))},
163element LimitWest {data(inputParse:fix-coord($boundingbox/dif:Westernmost_Longitude))},
164element LimitEast {data(inputParse:fix-coord($boundingbox/dif:Easternmost_Longitude))}
165},
166for $location in $DIF/dif:Location
167return
168element dgArea {
169element dgValidTerm {string($location)},
170element dgValidTermID {
171element ParentListID {concat($voclib:gcmd_location_valids, '/current')},
172element TermID {escape-uri($location, true())}
173}
174}
175}
176else (),
177if (exists($DIF/dif:Temporal_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Chronostratigraphic_Unit)) then
178element dgTemporalCoverage {
179for $temporalcoverage in $DIF/dif:Temporal_Coverage[exists(dif:Start_Date)]
180return
181element DateRange {
182element DateRangeStart {string($temporalcoverage/dif:Start_Date)},
183element DateRangeEnd {string($temporalcoverage/dif:Stop_Date)}
184},
185for $paleotemporalcoverage in $DIF/dif:Paleo_Temporal_Coverage[exists(dif:Paleo_Start_Date)]
186return
187element DateRange {
188element DateRangeStart {string($paleotemporalcoverage/dif:Paleo_Start_Date)},
189element DateRangeEnd {string($paleotemporalcoverage/dif:Paleo_Stop_Date)}
190},
191for $chronostratigraphic in $DIF/dif:Chronostratigraphic_Unit
192return
193element dgChronostratigraphicTerm {
194element dgValidTerm {string($chronostratigraphic)},
195element dgValidTermID {
196element ParentListID {concat($voclib:gcmd_chronostratigraphic_valids, '/current')},
197element TermID {escape-uri($chronostratigraphic, true())}
198}
199}
200}
201else ()
202}
203else ()
204},
205element dgDataRoles {
206if (exists($DIF/dif:Data_Set_Citation/dif:Dataset_Creator)) then
207element dgDataCreator {
208element dgMetadataID {
209element schemeIdentifier {'NDG-B0'},
210element repositoryIdentifier {$input_repository},
211if ($output_local_id != 'Output_LocalID') then
212element localIdentifier {concat('generated_creator-', $output_local_id)}
213else
214element localIdentifier {concat('generated_creator-', escape-uri(string($DIF/dif:Entry_ID), true()))}
215},
216element roleName {'Data Creator'},
217element abbreviation {'Creator'},
218for $creatorID in $DIF/dif:Data_Set_Citation/dif:Dataset_Creator
219return
220element dgRoleHolder {
221element dgMetadataID {
222element schemeIdentifier {'NDG-B0'},
223element repositoryIdentifier {$input_repository},
224if ($output_local_id != 'Output_LocalID') then
225element localIdentifier  {escape-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id), true())}
226else
227element localIdentifier  {escape-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)), true())}
228},
229element startDate {current-date()}
230}
231}
232else if (exists($DIF/dif:Originating_Center)) then
233element dgDataCreator {
234element dgMetadataID {
235element schemeIdentifier {'NDG-B0'},
236element repositoryIdentifier {$input_repository},
237if ($output_local_id != 'Output_LocalID') then
238element localIdentifier  {escape-uri(concat('generated_creator-', $output_local_id), true())}
239else
240element localIdentifier  {escape-uri(concat('generated_creator-', data($DIF/dif:Entry_ID)), true())}
241},
242element roleName {'Data Creator'},
243element abbreviation {'Creator'},
244for $creatorID in $DIF/dif:Originating_Center
245return
246element dgRoleHolder {
247element dgOrganisationID {
248element schemeIdentifier {'NDG-B0'},
249element repositoryIdentifier {$input_repository},
250if ($output_local_id != 'Output_LocalID') then
251element localIdentifier  {escape-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id), true())}
252else
253element localIdentifier  {escape-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)), true())}
254},
255element startDate {current-date()}
256}
257}
258else (),
259element dgDataCurator {
260element dgMetadataID {
261element schemeIdentifier {'NDG-B0'},
262element repositoryIdentifier {$input_repository},
263if ($output_local_id != 'Output_LocalID') then
264element localIdentifier  {escape-uri(concat('generated_curator-', $output_local_id), true())}
265else
266element localIdentifier  {escape-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)), true())}
267},
268element roleName {'Data Curator'},
269element abbreviation {'Curator'},
270element dgRoleHolder {
271element dgOrganisationID {
272element schemeIdentifier {'NDG-B0'},
273element repositoryIdentifier {$input_repository},
274element localIdentifier {$input_repository_local}
275},
276element startDate {current-date()}
277}
278}
279}
280},
281element dgStructuredKeyword {
282element dgValidTerm {'d2b converted record'},
283element dgValidTermID {
284element ParentListID {$voclib:unknown_vocab_id},
285element TermID {'d2b'}
286}
287},
288for $structuredKeywords in $DIF/dif:Keyword
289return
290element dgStructuredKeyword {
291element dgValidTerm {string($structuredKeywords)},
292element dgValidTermID {
293if ($structuredKeywords='MDIP' or $structuredKeywords='NERC' or $structuredKeywords='NERC_DDC' or $structuredKeywords='DPPP') then
294element ParentListID {concat($voclib:ndg_data_provider_vocab, '/current')}
295else
296element ParentListID {$voclib:unknown_vocab_id}
297,
298element TermID {escape-uri($structuredKeywords, true())}
299}
300},
301for $structuredKeywords in $DIF/dif:ISO_Topic_Category
302return
303element dgStructuredKeyword {
304element dgValidTerm {string($structuredKeywords)},
305element dgValidTermID {
306element ParentListID {concat($voclib:iso_topic_list, '/current')},
307element TermID {escape-uri($structuredKeywords, true())}
308}
309},
310if (exists($DIF/dif:DIF_Creation_Date) or exists($DIF/dif:Last_DIF_Revision_Date)) then
311element dgMetadataProvenance {
312if (exists($DIF/dif:DIF_Creation_Date)) then
313element RecordCreation {
314element CreatedDate {
315if (string($DIF/dif:DIF_Creation_Date) castable as xs:date) then
316string($DIF/dif:DIF_Creation_Date) cast as xs:date
317else (current-date())
318},
319element CreatedBy {$input_repository}
320}
321else
322element RecordCreation {
323element CreatedDate {current-date()},
324element CreatedBy {'MOLES Import'}
325},
326if (exists($DIF/dif:Last_DIF_Revision_Date)) then
327element RecordUpdate {
328element UpdateDate {
329if (string($DIF/dif:Last_DIF_Revision_Date) castable as xs:dateTime) then
330string($DIF/dif:Last_DIF_Revision_Date) cast as xs:dateTime
331else (current-dateTime())
332},
333element UpdatedBy {$input_repository}
334}
335else ()
336}
337else ()
338} (: </dgMetadataRecord>:),
339for $creator in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center))
340return
341element dgOrganisation {
342element dgMetadataID {
343element schemeIdentifier {'NDG-B0'},
344element repositoryIdentifier {$input_repository},
345if ($output_local_id != 'Output_LocalID') then
346element localIdentifier  {escape-uri(concat('generated_orgcit-', string($creator), '-', $output_local_id), true())}
347else
348element localIdentifier  {escape-uri(concat('generated_orgcit-', string($creator), '-', data($DIF/dif:Entry_ID)), true())}
349},
350element name {string($creator)},
351element abbreviation {string($creator)},
352element contactDetails {''}
353}
354} (:    </dgMetadata> :)
Note: See TracBrowser for help on using the repository browser.