source: ndgCommon/trunk/ndg/common/xmldb/xquery/dif2moles.xq @ 4938

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/xmldb/xquery/dif2moles.xq@4938
Revision 4938, 19.2 KB checked in by cbyrom, 11 years ago (diff)

Update xqueries:

  • make more precise to improve search performance
  • add in full paths to libs to ease use in eXist
  • create new xquery for searching for DIF docs
Line 
1(: This query produces one MOLES data entity plus one organisation entry for a given DIF instance
2   Input is TargetCollection (where the DIF exists), RepositoryID, where the existing DIF lies, and
3   expected to be the output RepositoryID (fix it in the output xml if it's wrong), and Input_EntryID
4   which is the DIF entryID, and LocalID which is the output localID for the data entity. We also
5   need the localID for the organisation entity, which appears as repository_localid
6   :)
7(: Note algoritm for creating non-pre-existing organisations :)
8(: dgPersons are not created as one can't tell automatically which are people and which are orgs, and orgs are simpler :) 
9
10import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery';
11import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'xmldb:exist:///db/xqueryLib/Utilities/inputParse_xquery_lib.xquery';
12
13declare default element namespace 'http://ndg.nerc.ac.uk/moles';
14declare namespace dif='http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
15declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
16
17(: Replacable parameters:)
18declare variable $targetCollection as xs:string := 'TargetCollection';
19declare variable $input_repository as xs:string := 'RepositoryID';
20declare variable $input_repository_local as xs:string := 'repository_localid';
21declare variable $input_entry_id as xs:string := 'Input_Entry_ID';
22declare variable $output_local_id as xs:string := 'LocalID';
23
24
25(:declare variable $input_entry_id as xs:string := 'badc.nerc.ac.uk:DIF:dataent_ukmo-midas';
26declare variable $targetCollection := doc("badc.nerc.ac.uk__DIF__dataent_ukmo-midas.xml";:)
27
28
29(: SJD get current date and manipulate to correct format as raw current-date not useable in MOLES:)
30declare variable $currentDate as xs:string := substring(current-date() cast as xs:string,1,10);
31(:declare variable $extractDate as xs:string := substring($fullDate,1,10);:)
32
33(:for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]:)
34for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]
35return
36element dgMetadata {
37element dgMetadataRecord {
38element dgMetadataID {
39element schemeIdentifier {'NDG-B0'},
40element repositoryIdentifier {$input_repository},
41if ($output_local_id != 'Output_LocalID') then
42element localIdentifier {$output_local_id}
43else
44element localIdentifier {data($DIF/dif:Entry_ID)}
45},
46element dgMetadataDescription {
47element metadataDescriptionID {
48element schemeIdentifier {'NDG-B0'},
49element repositoryIdentifier {$input_repository},
50if ($output_local_id != 'Output_LocalID') then
51element localIdentifier {concat('generated_desc-', $output_local_id)}
52else
53element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
54},
55element metadataDescriptionLastUpdated {$currentDate}                   ,
56    element abstract {
57        element abstractText {string($DIF/dif:Summary)}
58    }
59    ,
60    for $descOnline in $DIF/dif:Related_URL
61    return
62        element descriptionSection {
63            element descriptionOnlineReference {
64            element dgSimpleLink {
65                (:SJD changed to name picking up from Description element in DIF?? :)
66               (: element URL {encode-for-uri(data($descOnline/dif:URL))},:)
67                if (exists($descOnline/dif:Description)) then
68                    element name {data($descOnline/dif:Description)},
69                    element URL {data($descOnline/dif:URL) cast as xs:anyURI}
70                else (),
71                (: if logic SUCKS in Xquery.  :)
72                 if (not(exists($descOnline/dif:Description))) then
73                    element name {data('URL')},
74                    element URL {data($descOnline/dif:URL) cast as xs:anyURI}
75                  else()
76                } }             
77            }   
78        },
79        element name {string($DIF/dif:Entry_Title)},
80        element abbreviation {string($DIF/dif:Entry_Title)},
81        element dgDataEntity {
82        element dgDataSetType {''},
83        element dgDataSummary {
84        for $parameter in $DIF/dif:Parameters
85        return
86            element dgParameterSummary {
87                element dgParameterValue {
88                    element dgValueDataParameter {
89                        element Value {''},
90                        element dgStandardUnit {
91                            element dgValidTerm {'dummy'},
92                            element dgValidTermID {
93                                element ParentListID {$voclib:unknown_vocab_id},
94                                element TermID {encode-for-uri('dummy unit')}
95                            }
96                        }
97                    }
98                },
99            element dgStdParameterMeasured {
100                if ($parameter/dif:Category!='') then element dgValidTerm {string($parameter/dif:Category)}
101                else  element dgValidTerm {'unknown'},
102                    element dgValidTermID {
103                        element ParentListID {concat($voclib:gcmd_science_valids_categories, '/current')},
104                        element TermID {encode-for-uri($parameter/dif:Category)}
105                    },
106                    element dgValidSubterm {
107                        if ($parameter/dif:Topic!='') then element dgValidTerm {string($parameter/dif:Topic)}
108                        else  element dgValidTerm {'unknown'},
109                            element dgValidTermID {
110                                element ParentListID {concat($voclib:gcmd_science_valids_topics, '/current')},
111                                element TermID {encode-for-uri($parameter/dif:Topic)}
112                            },
113                    element dgValidSubterm {
114                        if ($parameter/dif:Term!='') then element dgValidTerm {string($parameter/dif:Term)}
115                        else  element dgValidTerm {'unknown'},
116                            element dgValidTermID {
117                                element ParentListID {concat($voclib:gcmd_science_valids_terms, '/current')},
118                                element TermID {encode-for-uri($parameter/dif:Term)}
119                            },
120                        if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
121                            element dgValidSubterm {
122                                element dgValidTerm {string($parameter/dif:Variable)},
123                                element dgValidTermID {
124                                    element ParentListID {concat($voclib:gcmd_science_valids_variables, '/current')},
125                                    element TermID {encode-for-uri($parameter/dif:Variable)}
126                                },
127                        if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
128                            element dgValidSubterm {
129                                element dgValidTerm {string($parameter/dif:Detailed_Variable)},
130                                element dgValidTermID {
131                                    element ParentListID {$voclib:unknown_vocab_id},
132                                    element TermID {encode-for-uri($parameter/dif:Detailed_Variable)}
133                                },
134                        element ListLevel {4}}
135                        else (),
136                        element ListLevel {3}
137                        }
138                        else (),
139                        element ListLevel {2}
140                        },
141                        element ListLevel {1}
142                        },
143                        element ListLevel {0}
144                        },
145                element ParameterName {
146                    concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
147                    if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
148                    concat(' > ', string($parameter/dif:Variable))
149                    else (),
150                    if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
151                    concat(' > ', string($parameter/dif:Detailed_Variable))
152                    else ()
153                },
154                element ParameterAbbreviation {
155                    concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
156                    if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
157                    concat(' > ', string($parameter/dif:Variable))
158                    else (),
159                    if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
160                    concat(' > ', string($parameter/dif:Detailed_Variable))
161                    else ()
162                }
163            }, (: end of dgParameterSummary :)
164if (exists($DIF/dif:Spatial_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Location) or exists($DIF/dif:Temporal_Coverage)) then
165element dgDataCoverage {
166    if (exists($DIF/dif:Spatial_Coverage)  or exists($DIF/dif:Location))  then
167    element dgSpatialCoverage {
168        for $boundingbox in $DIF/dif:Spatial_Coverage[exists(dif:Northernmost_Latitude)
169            and exists(dif:Southernmost_Latitude)
170            and exists(dif:Easternmost_Longitude)
171            and exists(dif:Westernmost_Longitude)]
172            return
173                element BoundingBox {
174                    element LimitNorth {data(inputParse:fix-coord($boundingbox/dif:Northernmost_Latitude))},
175                    element LimitSouth {data(inputParse:fix-coord($boundingbox/dif:Southernmost_Latitude))},
176                    element LimitWest {data(inputParse:fix-coord($boundingbox/dif:Westernmost_Longitude))},
177                    element LimitEast {data(inputParse:fix-coord($boundingbox/dif:Easternmost_Longitude))}
178                },
179        for $location in $DIF/dif:Location
180        return
181            element dgArea {
182                element dgValidTerm {string($location)},
183                element dgValidTermID {
184                element ParentListID {concat($voclib:gcmd_location_valids, '/current')},
185                element TermID {encode-for-uri($location)}
186            }
187        }
188    }
189    else (),
190    if (exists($DIF/dif:Temporal_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Chronostratigraphic_Unit)) then
191        element dgTemporalCoverage {
192               for $temporalcoverage in $DIF/dif:Temporal_Coverage[exists(dif:Start_Date)]
193        return
194            element DateRange {
195                element DateRangeStart {string($temporalcoverage/dif:Start_Date)},
196                element DateRangeEnd {string($temporalcoverage/dif:Stop_Date)}
197            },
198            for $paleotemporalcoverage in $DIF/dif:Paleo_Temporal_Coverage[exists(dif:Paleo_Start_Date)]
199            return
200                element DateRange {
201                element DateRangeStart {string($paleotemporalcoverage/dif:Paleo_Start_Date)},
202                element DateRangeEnd {string($paleotemporalcoverage/dif:Paleo_Stop_Date)}
203                },
204            for $chronostratigraphic in $DIF/dif:Chronostratigraphic_Unit
205            return
206                element dgChronostratigraphicTerm {
207                    element dgValidTerm {string($chronostratigraphic)},
208                        element dgValidTermID {
209                            element ParentListID {concat($voclib:gcmd_chronostratigraphic_valids, '/current')},
210                            element TermID {encode-for-uri($chronostratigraphic)}
211                        }
212                     }
213                }
214            else ()
215        }
216    else ()
217},
218element dgDataRoles {
219if (exists($DIF/dif:Data_Set_Citation/dif:Dataset_Creator)) then
220element dgDataCreator {
221element dgMetadataID {
222element schemeIdentifier {'NDG-B0'},
223element repositoryIdentifier {$input_repository},
224if ($output_local_id != 'Output_LocalID') then
225element localIdentifier {concat('generated_creator-', $output_local_id)}
226else
227element localIdentifier {concat('generated_creator-', encode-for-uri(string($DIF/dif:Entry_ID)))}
228},
229element roleName {'Data Creator'},
230element abbreviation {'Creator'},
231for $creatorID in $DIF/dif:Data_Set_Citation/dif:Dataset_Creator
232return
233element dgRoleHolder {
234(: SJD changed this to dgOrganisationID from dgMetadataID - assuming simple error :)
235    element dgOrganisationID {
236        element schemeIdentifier {'NDG-B0'},
237        element repositoryIdentifier {$input_repository},
238        if ($output_local_id != 'Output_LocalID') then
239            element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
240        else
241            element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
242        },
243       element startDate {current-date()}
244    }
245}
246else if (exists($DIF/dif:Originating_Center)) then
247element dgDataCreator {
248element dgMetadataID {
249element schemeIdentifier {'NDG-B0'},
250element repositoryIdentifier {$input_repository},
251if ($output_local_id != 'Output_LocalID') then
252element localIdentifier  {encode-for-uri(concat('generated_creator-', $output_local_id))}
253else
254element localIdentifier  {encode-for-uri(concat('generated_creator-', data($DIF/dif:Entry_ID)))}
255},
256element roleName {'Data Creator'},
257element abbreviation {'Creator'},
258for $creatorID in $DIF/dif:Originating_Center
259return
260element dgRoleHolder {
261element dgOrganisationID {
262element schemeIdentifier {'NDG-B0'},
263element repositoryIdentifier {$input_repository},
264if ($output_local_id != 'Output_LocalID') then
265element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
266else
267element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
268},
269element startDate {current-date()}
270}
271}
272else (),
273element dgDataCurator {
274    element dgMetadataID {
275        element schemeIdentifier {'NDG-B0'},
276        element repositoryIdentifier {$input_repository},
277        if ($output_local_id != 'Output_LocalID') then
278            element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
279        else
280            element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
281    },
282    element roleName {'Data Curator'},
283    element abbreviation {'Curator'},
284    element dgRoleHolder {
285        element dgOrganisationID {
286            element schemeIdentifier {'NDG-B0'},
287            element repositoryIdentifier {$input_repository},
288            (:SJD this not valid - just use same localIdentifier as above..:)
289            (:element localIdentifier {$input_repository_local}:)
290            if ($output_local_id != 'Output_LocalID') then
291            element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
292            else
293            element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
294        },
295    element startDate {current-date()}
296}
297}
298}
299},
300element dgStructuredKeyword {
301element dgValidTerm {'d2b converted record'},
302element dgValidTermID {
303element ParentListID {$voclib:unknown_vocab_id},
304element TermID {'d2b'}
305}
306},
307for $structuredKeywords in $DIF/dif:Keyword
308return
309element dgStructuredKeyword {
310element dgValidTerm {string($structuredKeywords)},
311element dgValidTermID {
312if ($structuredKeywords='MDIP' or $structuredKeywords='NERC' or $structuredKeywords='NERC_DDC' or $structuredKeywords='DPPP') then
313element ParentListID {concat($voclib:ndg_data_provider_vocab, '/current')}
314else
315element ParentListID {$voclib:unknown_vocab_id}
316,
317element TermID {encode-for-uri($structuredKeywords)}
318}
319},
320for $structuredKeywords in $DIF/dif:ISO_Topic_Category
321return
322element dgStructuredKeyword {
323element dgValidTerm {string($structuredKeywords)},
324element dgValidTermID {
325element ParentListID {concat($voclib:iso_topic_list, '/current')},
326element TermID {encode-for-uri($structuredKeywords)}
327}
328},
329if (exists($DIF/dif:DIF_Creation_Date) or exists($DIF/dif:Last_DIF_Revision_Date)) then
330element dgMetadataProvenance {
331if (exists($DIF/dif:DIF_Creation_Date)) then
332element RecordCreation {
333element CreatedDate {
334if (string($DIF/dif:DIF_Creation_Date) castable as xs:date) then
335string($DIF/dif:DIF_Creation_Date) cast as xs:date
336else (current-date())
337},
338element CreatedBy {$input_repository}
339}
340else
341element RecordCreation {
342element CreatedDate {current-date()},
343element CreatedBy {'MOLES Import'}
344},
345if (exists($DIF/dif:Last_DIF_Revision_Date)) then
346element RecordUpdate {
347element UpdateDate {
348if (string($DIF/dif:Last_DIF_Revision_Date) castable as xs:dateTime) then
349string($DIF/dif:Last_DIF_Revision_Date) cast as xs:dateTime
350else (current-dateTime())
351},
352element UpdatedBy {$input_repository}
353}
354else ()
355}
356else ()
357} (: </dgMetadataRecord>:),
358
359(: Had to add some stuff here so originating data centre stuff is picked up
360for $bum in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center | dif:Data_Center))
361return
362    element snooze {data('arse')},
363
364:)
365
366if (count(distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center ))) !=0) then
367    for $creator in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center ))
368    return
369        element dgOrganisation{
370            element dgMetadataID {
371                element schemeIdentifier {'NDG-B0'},
372                element repositoryIdentifier {$input_repository},
373                if ($output_local_id != 'Output_LocalID') then
374                    element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', $output_local_id))}
375                else
376                    element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', data($DIF/dif:Entry_ID)))}
377                },
378                element name {string($creator)},
379                element abbreviation {string($creator)},
380                element contactDetails { 
381                    element URI {data($DIF/dif:Data_Center/dif:Data_Center_URL)}
382                }
383        }
384 else
385     element dgOrganisation{
386         element dgMetadataID {
387                element schemeIdentifier {'NDG-B0'},
388                element repositoryIdentifier {$input_repository},               
389               if ($output_local_id != 'Output_LocalID') then
390                   element localIdentifier {concat('generated_desc-', $output_local_id)}
391               else
392                   element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
393               },
394                element name {data($DIF/dif:Data_Center/dif:Data_Center_Name/dif:Long_Name)},
395                element abbreviation {data($DIF/dif:Data_Center/dif:Data_Center_Name/dif:Short_Name)},
396                element contactDetails { 
397                    element URI {data($DIF/dif:Data_Center/dif:Data_Center_URL) cast as xs:anyURI}
398                }
399     }
400 
401} (:    </dgMetadata> :)
402
Note: See TracBrowser for help on using the repository browser.