source: exist/branches/proglue_production_rev4605_xquery4884/python/build/lib/ndgUtils/xquery/dif2moles.xq @ 4886

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/branches/proglue_production_rev4605_xquery4884/python/build/lib/ndgUtils/xquery/dif2moles.xq@4886
Revision 4886, 19.4 KB checked in by sdonegan, 10 years ago (diff)

Creating operational branch for ndgUtils as used on proglue

Line 
1(: This query produces one MOLES data entity plus one organisation entry for a given DIF instance
2   Input is TargetCollection (where the DIF exists), RepositoryID, where the existing DIF lies, and
3   expected to be the output RepositoryID (fix it in the output xml if it's wrong), and Input_EntryID
4   which is the DIF entryID, and LocalID which is the output localID for the data entity. We also
5   need the localID for the organisation entity, which appears as repository_localid
6   :)
7(: Note algoritm for creating non-pre-existing organisations :)
8(: dgPersons are not created as one can't tell automatically which are people and which are orgs, and orgs are simpler :) 
9
10(:
11import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery';
12import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'xmldb:exist:///db/xqueryLib/Utilities/inputParse_xquery_lib.xquery';
13:)
14import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'vocab_xquery_lib.xquery';
15import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'inputParse_xquery_lib.xquery';
16
17
18declare default element namespace 'http://ndg.nerc.ac.uk/moles';
19declare namespace dif='http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
20declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
21
22(: Replacable parameters:)
23declare variable $targetCollection as xs:string := 'TargetCollection';
24declare variable $input_repository as xs:string := 'RepositoryID';
25declare variable $input_repository_local as xs:string := 'repository_localid';
26declare variable $input_entry_id as xs:string := 'Input_Entry_ID';
27declare variable $output_local_id as xs:string := 'LocalID';
28
29
30(:declare variable $input_entry_id as xs:string := 'badc.nerc.ac.uk:DIF:dataent_ukmo-midas';
31declare variable $targetCollection := doc("badc.nerc.ac.uk__DIF__dataent_ukmo-midas.xml";:)
32
33
34(: SJD get current date and manipulate to correct format as raw current-date not useable in MOLES:)
35declare variable $currentDate as xs:string := substring(current-date() cast as xs:string,1,10);
36(:declare variable $extractDate as xs:string := substring($fullDate,1,10);:)
37
38(:for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]:)
39for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]
40return
41element dgMetadata {
42element dgMetadataRecord {
43element dgMetadataID {
44element schemeIdentifier {'NDG-B0'},
45element repositoryIdentifier {$input_repository},
46if ($output_local_id != 'Output_LocalID') then
47element localIdentifier {$output_local_id}
48else
49element localIdentifier {data($DIF/dif:Entry_ID)}
50},
51element dgMetadataDescription {
52element metadataDescriptionID {
53element schemeIdentifier {'NDG-B0'},
54element repositoryIdentifier {$input_repository},
55if ($output_local_id != 'Output_LocalID') then
56element localIdentifier {concat('generated_desc-', $output_local_id)}
57else
58element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
59},
60element metadataDescriptionLastUpdated {$currentDate}                   ,
61    element abstract {
62        element abstractText {string($DIF/dif:Summary)}
63    }
64    ,
65    for $descOnline in $DIF/dif:Related_URL
66    return
67        element descriptionSection {
68            element descriptionOnlineReference {
69            element dgSimpleLink {
70                (:SJD changed to name picking up from Description element in DIF?? :)
71               (: element URL {encode-for-uri(data($descOnline/dif:URL))},:)
72                if (exists($descOnline/dif:Description)) then
73                    element name {data($descOnline/dif:Description)},
74                    element URL {data($descOnline/dif:URL) cast as xs:anyURI}
75                else (),
76                (: if logic SUCKS in Xquery.  :)
77                 if (not(exists($descOnline/dif:Description))) then
78                    element name {data('URL')},
79                    element URL {data($descOnline/dif:URL) cast as xs:anyURI}
80                  else()
81                } }             
82            }   
83        },
84        element name {string($DIF/dif:Entry_Title)},
85        element abbreviation {string($DIF/dif:Entry_Title)},
86        element dgDataEntity {
87        element dgDataSetType {''},
88        element dgDataSummary {
89        for $parameter in $DIF/dif:Parameters
90        return
91            element dgParameterSummary {
92                element dgParameterValue {
93                    element dgValueDataParameter {
94                        element Value {''},
95                        element dgStandardUnit {
96                            element dgValidTerm {'dummy'},
97                            element dgValidTermID {
98                                element ParentListID {$voclib:unknown_vocab_id},
99                                element TermID {encode-for-uri('dummy unit')}
100                            }
101                        }
102                    }
103                },
104            element dgStdParameterMeasured {
105                if ($parameter/dif:Category!='') then element dgValidTerm {string($parameter/dif:Category)}
106                else  element dgValidTerm {'unknown'},
107                    element dgValidTermID {
108                        element ParentListID {concat($voclib:gcmd_science_valids_categories, '/current')},
109                        element TermID {encode-for-uri($parameter/dif:Category)}
110                    },
111                    element dgValidSubterm {
112                        if ($parameter/dif:Topic!='') then element dgValidTerm {string($parameter/dif:Topic)}
113                        else  element dgValidTerm {'unknown'},
114                            element dgValidTermID {
115                                element ParentListID {concat($voclib:gcmd_science_valids_topics, '/current')},
116                                element TermID {encode-for-uri($parameter/dif:Topic)}
117                            },
118                    element dgValidSubterm {
119                        if ($parameter/dif:Term!='') then element dgValidTerm {string($parameter/dif:Term)}
120                        else  element dgValidTerm {'unknown'},
121                            element dgValidTermID {
122                                element ParentListID {concat($voclib:gcmd_science_valids_terms, '/current')},
123                                element TermID {encode-for-uri($parameter/dif:Term)}
124                            },
125                        if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
126                            element dgValidSubterm {
127                                element dgValidTerm {string($parameter/dif:Variable)},
128                                element dgValidTermID {
129                                    element ParentListID {concat($voclib:gcmd_science_valids_variables, '/current')},
130                                    element TermID {encode-for-uri($parameter/dif:Variable)}
131                                },
132                        if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
133                            element dgValidSubterm {
134                                element dgValidTerm {string($parameter/dif:Detailed_Variable)},
135                                element dgValidTermID {
136                                    element ParentListID {$voclib:unknown_vocab_id},
137                                    element TermID {encode-for-uri($parameter/dif:Detailed_Variable)}
138                                },
139                        element ListLevel {4}}
140                        else (),
141                        element ListLevel {3}
142                        }
143                        else (),
144                        element ListLevel {2}
145                        },
146                        element ListLevel {1}
147                        },
148                        element ListLevel {0}
149                        },
150                element ParameterName {
151                    concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
152                    if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
153                    concat(' > ', string($parameter/dif:Variable))
154                    else (),
155                    if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
156                    concat(' > ', string($parameter/dif:Detailed_Variable))
157                    else ()
158                },
159                element ParameterAbbreviation {
160                    concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
161                    if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
162                    concat(' > ', string($parameter/dif:Variable))
163                    else (),
164                    if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
165                    concat(' > ', string($parameter/dif:Detailed_Variable))
166                    else ()
167                }
168            }, (: end of dgParameterSummary :)
169if (exists($DIF/dif:Spatial_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Location) or exists($DIF/dif:Temporal_Coverage)) then
170element dgDataCoverage {
171    if (exists($DIF/dif:Spatial_Coverage)  or exists($DIF/dif:Location))  then
172    element dgSpatialCoverage {
173        for $boundingbox in $DIF/dif:Spatial_Coverage[exists(dif:Northernmost_Latitude)
174            and exists(dif:Southernmost_Latitude)
175            and exists(dif:Easternmost_Longitude)
176            and exists(dif:Westernmost_Longitude)]
177            return
178                element BoundingBox {
179                    element LimitNorth {data(inputParse:fix-coord($boundingbox/dif:Northernmost_Latitude))},
180                    element LimitSouth {data(inputParse:fix-coord($boundingbox/dif:Southernmost_Latitude))},
181                    element LimitWest {data(inputParse:fix-coord($boundingbox/dif:Westernmost_Longitude))},
182                    element LimitEast {data(inputParse:fix-coord($boundingbox/dif:Easternmost_Longitude))}
183                },
184        for $location in $DIF/dif:Location
185        return
186            element dgArea {
187                element dgValidTerm {string($location)},
188                element dgValidTermID {
189                element ParentListID {concat($voclib:gcmd_location_valids, '/current')},
190                element TermID {encode-for-uri($location)}
191            }
192        }
193    }
194    else (),
195    if (exists($DIF/dif:Temporal_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Chronostratigraphic_Unit)) then
196        element dgTemporalCoverage {
197               for $temporalcoverage in $DIF/dif:Temporal_Coverage[exists(dif:Start_Date)]
198        return
199            element DateRange {
200                element DateRangeStart {string($temporalcoverage/dif:Start_Date)},
201                element DateRangeEnd {string($temporalcoverage/dif:Stop_Date)}
202            },
203            for $paleotemporalcoverage in $DIF/dif:Paleo_Temporal_Coverage[exists(dif:Paleo_Start_Date)]
204            return
205                element DateRange {
206                element DateRangeStart {string($paleotemporalcoverage/dif:Paleo_Start_Date)},
207                element DateRangeEnd {string($paleotemporalcoverage/dif:Paleo_Stop_Date)}
208                },
209            for $chronostratigraphic in $DIF/dif:Chronostratigraphic_Unit
210            return
211                element dgChronostratigraphicTerm {
212                    element dgValidTerm {string($chronostratigraphic)},
213                        element dgValidTermID {
214                            element ParentListID {concat($voclib:gcmd_chronostratigraphic_valids, '/current')},
215                            element TermID {encode-for-uri($chronostratigraphic)}
216                        }
217                     }
218                }
219            else ()
220        }
221    else ()
222},
223element dgDataRoles {
224if (exists($DIF/dif:Data_Set_Citation/dif:Dataset_Creator)) then
225element dgDataCreator {
226element dgMetadataID {
227element schemeIdentifier {'NDG-B0'},
228element repositoryIdentifier {$input_repository},
229if ($output_local_id != 'Output_LocalID') then
230element localIdentifier {concat('generated_creator-', $output_local_id)}
231else
232element localIdentifier {concat('generated_creator-', encode-for-uri(string($DIF/dif:Entry_ID)))}
233},
234element roleName {'Data Creator'},
235element abbreviation {'Creator'},
236for $creatorID in $DIF/dif:Data_Set_Citation/dif:Dataset_Creator
237return
238element dgRoleHolder {
239(: SJD changed this to dgOrganisationID from dgMetadataID - assuming simple error :)
240    element dgOrganisationID {
241        element schemeIdentifier {'NDG-B0'},
242        element repositoryIdentifier {$input_repository},
243        if ($output_local_id != 'Output_LocalID') then
244            element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
245        else
246            element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
247        },
248       element startDate {current-date()}
249    }
250}
251else if (exists($DIF/dif:Originating_Center)) then
252element dgDataCreator {
253element dgMetadataID {
254element schemeIdentifier {'NDG-B0'},
255element repositoryIdentifier {$input_repository},
256if ($output_local_id != 'Output_LocalID') then
257element localIdentifier  {encode-for-uri(concat('generated_creator-', $output_local_id))}
258else
259element localIdentifier  {encode-for-uri(concat('generated_creator-', data($DIF/dif:Entry_ID)))}
260},
261element roleName {'Data Creator'},
262element abbreviation {'Creator'},
263for $creatorID in $DIF/dif:Originating_Center
264return
265element dgRoleHolder {
266element dgOrganisationID {
267element schemeIdentifier {'NDG-B0'},
268element repositoryIdentifier {$input_repository},
269if ($output_local_id != 'Output_LocalID') then
270element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
271else
272element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
273},
274element startDate {current-date()}
275}
276}
277else (),
278element dgDataCurator {
279    element dgMetadataID {
280        element schemeIdentifier {'NDG-B0'},
281        element repositoryIdentifier {$input_repository},
282        if ($output_local_id != 'Output_LocalID') then
283            element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
284        else
285            element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
286    },
287    element roleName {'Data Curator'},
288    element abbreviation {'Curator'},
289    element dgRoleHolder {
290        element dgOrganisationID {
291            element schemeIdentifier {'NDG-B0'},
292            element repositoryIdentifier {$input_repository},
293            (:SJD this not valid - just use same localIdentifier as above..:)
294            (:element localIdentifier {$input_repository_local}:)
295            if ($output_local_id != 'Output_LocalID') then
296            element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
297            else
298            element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
299        },
300    element startDate {current-date()}
301}
302}
303}
304},
305element dgStructuredKeyword {
306element dgValidTerm {'d2b converted record'},
307element dgValidTermID {
308element ParentListID {$voclib:unknown_vocab_id},
309element TermID {'d2b'}
310}
311},
312for $structuredKeywords in $DIF/dif:Keyword
313return
314element dgStructuredKeyword {
315element dgValidTerm {string($structuredKeywords)},
316element dgValidTermID {
317if ($structuredKeywords='MDIP' or $structuredKeywords='NERC' or $structuredKeywords='NERC_DDC' or $structuredKeywords='DPPP') then
318element ParentListID {concat($voclib:ndg_data_provider_vocab, '/current')}
319else
320element ParentListID {$voclib:unknown_vocab_id}
321,
322element TermID {encode-for-uri($structuredKeywords)}
323}
324},
325for $structuredKeywords in $DIF/dif:ISO_Topic_Category
326return
327element dgStructuredKeyword {
328element dgValidTerm {string($structuredKeywords)},
329element dgValidTermID {
330element ParentListID {concat($voclib:iso_topic_list, '/current')},
331element TermID {encode-for-uri($structuredKeywords)}
332}
333},
334if (exists($DIF/dif:DIF_Creation_Date) or exists($DIF/dif:Last_DIF_Revision_Date)) then
335element dgMetadataProvenance {
336if (exists($DIF/dif:DIF_Creation_Date)) then
337element RecordCreation {
338element CreatedDate {
339if (string($DIF/dif:DIF_Creation_Date) castable as xs:date) then
340string($DIF/dif:DIF_Creation_Date) cast as xs:date
341else (current-date())
342},
343element CreatedBy {$input_repository}
344}
345else
346element RecordCreation {
347element CreatedDate {current-date()},
348element CreatedBy {'MOLES Import'}
349},
350if (exists($DIF/dif:Last_DIF_Revision_Date)) then
351element RecordUpdate {
352element UpdateDate {
353if (string($DIF/dif:Last_DIF_Revision_Date) castable as xs:dateTime) then
354string($DIF/dif:Last_DIF_Revision_Date) cast as xs:dateTime
355else (current-dateTime())
356},
357element UpdatedBy {$input_repository}
358}
359else ()
360}
361else ()
362} (: </dgMetadataRecord>:),
363
364(: Had to add some stuff here so originating data centre stuff is picked up
365for $bum in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center | dif:Data_Center))
366return
367    element snooze {data('arse')},
368
369:)
370
371if (count(distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center ))) !=0) then
372    for $creator in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center ))
373    return
374        element dgOrganisation{
375            element dgMetadataID {
376                element schemeIdentifier {'NDG-B0'},
377                element repositoryIdentifier {$input_repository},
378                if ($output_local_id != 'Output_LocalID') then
379                    element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', $output_local_id))}
380                else
381                    element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', data($DIF/dif:Entry_ID)))}
382                },
383                element name {string($creator)},
384                element abbreviation {string($creator)},
385                element contactDetails { 
386                    element URI {data($DIF/dif:Data_Center/dif:Data_Center_URL)}
387                }
388        }
389 else
390     element dgOrganisation{
391         element dgMetadataID {
392                element schemeIdentifier {'NDG-B0'},
393                element repositoryIdentifier {$input_repository},               
394               if ($output_local_id != 'Output_LocalID') then
395                   element localIdentifier {concat('generated_desc-', $output_local_id)}
396               else
397                   element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
398               },
399                element name {data($DIF/dif:Data_Center/dif:Data_Center_Name/dif:Long_Name)},
400                element abbreviation {data($DIF/dif:Data_Center/dif:Data_Center_Name/dif:Short_Name)},
401                element contactDetails { 
402                    element URI {data($DIF/dif:Data_Center/dif:Data_Center_URL) cast as xs:anyURI}
403                }
404     }
405 
406} (:    </dgMetadata> :)
407
Note: See TracBrowser for help on using the repository browser.