source: exist/trunk/xquery/dif2moles.xq @ 4338

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/xquery/dif2moles.xq@4338
Revision 4338, 19.2 KB checked in by sdonegan, 11 years ago (diff)

Updated these dif2moles2dif conversions to not use encoded URIS as not needed in non-Exist installation??

Line 
1(: This query produces one MOLES data entity plus one organisation entry for a given DIF instance
2   Input is TargetCollection (where the DIF exists), RepositoryID, where the existing DIF lies, and
3   expected to be the output RepositoryID (fix it in the output xml if it's wrong), and Input_EntryID
4   which is the DIF entryID, and LocalID which is the output localID for the data entity. We also
5   need the localID for the organisation entity, which appears as repository_localid
6   :)
7(: Note algoritm for creating non-pre-existing organisations :)
8(: dgPersons are not created as one can't tell automatically which are people and which are orgs, and orgs are simpler :) 
9
10(:
11import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery';
12import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'xmldb:exist:///db/xqueryLib/Utilities/inputParse_xquery_lib.xquery';
13:)
14import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'vocab_xquery_lib.xquery';
15import module namespace inputParse='http://ndg.nerc.ac.uk/xquery/lib/inputParse' at 'inputParse_xquery_lib.xquery';
16
17
18declare default element namespace 'http://ndg.nerc.ac.uk/moles';
19declare namespace dif='http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
20declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
21
22(: Replacable parameters:)
23declare variable $targetCollection as xs:string := 'TargetCollection';
24declare variable $input_repository as xs:string := 'RepositoryID';
25declare variable $input_repository_local as xs:string := 'repository_localid';
26declare variable $input_entry_id as xs:string := 'Input_Entry_ID';
27declare variable $output_local_id as xs:string := 'LocalID';
28
29
30(:declare variable $input_entry_id as xs:string := 'badc.nerc.ac.uk:DIF:dataent_ukmo-midas';
31declare variable $targetCollection := doc("badc.nerc.ac.uk__DIF__dataent_ukmo-midas.xml";:)
32
33
34(: SJD get current date and manipulate to correct format as raw current-date not useable in MOLES:)
35declare variable $currentDate as xs:string := substring(current-date() cast as xs:string,1,10);
36(:declare variable $extractDate as xs:string := substring($fullDate,1,10);:)
37
38(:for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]:)
39for $DIF in collection($targetCollection)/dif:DIF[dif:Entry_ID=$input_entry_id]
40return
41element dgMetadata {
42element dgMetadataRecord {
43element dgMetadataID {
44element schemeIdentifier {'NDG-B0'},
45element repositoryIdentifier {$input_repository},
46if ($output_local_id != 'Output_LocalID') then
47element localIdentifier {$output_local_id}
48else
49element localIdentifier {data($DIF/dif:Entry_ID)}
50},
51element dgMetadataDescription {
52element metadataDescriptionID {
53element schemeIdentifier {'NDG-B0'},
54element repositoryIdentifier {$input_repository},
55if ($output_local_id != 'Output_LocalID') then
56element localIdentifier {concat('generated_desc-', $output_local_id)}
57else
58element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
59},
60element metadataDescriptionLastUpdated {$currentDate}                   ,
61    element abstract {
62        element abstractText {string($DIF/dif:Summary)}
63    }
64    ,
65    for $descOnline in $DIF/dif:Related_URL
66    return
67        element descriptionSection {
68            element descriptionOnlineReference {
69            element dgSimpleLink {
70                (:SJD changed to name picking up from Description element in DIF?? :)
71               (: element URL {encode-for-uri(data($descOnline/dif:URL))},:)
72                if (exists($descOnline/dif:Description)) then
73                    element name {data($descOnline/dif:Description)},
74                    element URL {data($descOnline/dif:URL) cast as xs:anyURI}
75                else
76                    element name {data('URL')}
77                    (:element URL {encode-for-uri(data($descOnline/dif:URL))}:)
78                } }             
79            }   
80        },
81        element name {string($DIF/dif:Entry_Title)},
82        element abbreviation {string($DIF/dif:Entry_Title)},
83        element dgDataEntity {
84        element dgDataSetType {''},
85        element dgDataSummary {
86        for $parameter in $DIF/dif:Parameters
87        return
88            element dgParameterSummary {
89                element dgParameterValue {
90                    element dgValueDataParameter {
91                        element Value {''},
92                        element dgStandardUnit {
93                            element dgValidTerm {'dummy'},
94                            element dgValidTermID {
95                                element ParentListID {$voclib:unknown_vocab_id},
96                                element TermID {encode-for-uri('dummy unit')}
97                            }
98                        }
99                    }
100                },
101            element dgStdParameterMeasured {
102                if ($parameter/dif:Category!='') then element dgValidTerm {string($parameter/dif:Category)}
103                else  element dgValidTerm {'unknown'},
104                    element dgValidTermID {
105                        element ParentListID {concat($voclib:gcmd_science_valids_categories, '/current')},
106                        element TermID {encode-for-uri($parameter/dif:Category)}
107                    },
108                    element dgValidSubterm {
109                        if ($parameter/dif:Topic!='') then element dgValidTerm {string($parameter/dif:Topic)}
110                        else  element dgValidTerm {'unknown'},
111                            element dgValidTermID {
112                                element ParentListID {concat($voclib:gcmd_science_valids_topics, '/current')},
113                                element TermID {encode-for-uri($parameter/dif:Topic)}
114                            },
115                    element dgValidSubterm {
116                        if ($parameter/dif:Term!='') then element dgValidTerm {string($parameter/dif:Term)}
117                        else  element dgValidTerm {'unknown'},
118                            element dgValidTermID {
119                                element ParentListID {concat($voclib:gcmd_science_valids_terms, '/current')},
120                                element TermID {encode-for-uri($parameter/dif:Term)}
121                            },
122                        if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
123                            element dgValidSubterm {
124                                element dgValidTerm {string($parameter/dif:Variable)},
125                                element dgValidTermID {
126                                    element ParentListID {concat($voclib:gcmd_science_valids_variables, '/current')},
127                                    element TermID {encode-for-uri($parameter/dif:Variable)}
128                                },
129                        if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
130                            element dgValidSubterm {
131                                element dgValidTerm {string($parameter/dif:Detailed_Variable)},
132                                element dgValidTermID {
133                                    element ParentListID {$voclib:unknown_vocab_id},
134                                    element TermID {encode-for-uri($parameter/dif:Detailed_Variable)}
135                                },
136                        element ListLevel {4}}
137                        else (),
138                        element ListLevel {3}
139                        }
140                        else (),
141                        element ListLevel {2}
142                        },
143                        element ListLevel {1}
144                        },
145                        element ListLevel {0}
146                        },
147                element ParameterName {
148                    concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
149                    if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
150                    concat(' > ', string($parameter/dif:Variable))
151                    else (),
152                    if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
153                    concat(' > ', string($parameter/dif:Detailed_Variable))
154                    else ()
155                },
156                element ParameterAbbreviation {
157                    concat (string($parameter/dif:Category), ' > ', string($parameter/dif:Topic), ' > ', string($parameter/dif:Term)),
158                    if (exists($parameter/dif:Variable) and $parameter/dif:Variable!='') then
159                    concat(' > ', string($parameter/dif:Variable))
160                    else (),
161                    if (exists($parameter/dif:Detailed_Variable) and $parameter/dif:Detailed_Variable!='') then
162                    concat(' > ', string($parameter/dif:Detailed_Variable))
163                    else ()
164                }
165            }, (: end of dgParameterSummary :)
166if (exists($DIF/dif:Spatial_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Location) or exists($DIF/dif:Temporal_Coverage)) then
167element dgDataCoverage {
168    if (exists($DIF/dif:Spatial_Coverage)  or exists($DIF/dif:Location))  then
169    element dgSpatialCoverage {
170        for $boundingbox in $DIF/dif:Spatial_Coverage[exists(dif:Northernmost_Latitude)
171            and exists(dif:Southernmost_Latitude)
172            and exists(dif:Easternmost_Longitude)
173            and exists(dif:Westernmost_Longitude)]
174            return
175                element BoundingBox {
176                    element LimitNorth {data(inputParse:fix-coord($boundingbox/dif:Northernmost_Latitude))},
177                    element LimitSouth {data(inputParse:fix-coord($boundingbox/dif:Southernmost_Latitude))},
178                    element LimitWest {data(inputParse:fix-coord($boundingbox/dif:Westernmost_Longitude))},
179                    element LimitEast {data(inputParse:fix-coord($boundingbox/dif:Easternmost_Longitude))}
180                },
181        for $location in $DIF/dif:Location
182        return
183            element dgArea {
184                element dgValidTerm {string($location)},
185                element dgValidTermID {
186                element ParentListID {concat($voclib:gcmd_location_valids, '/current')},
187                element TermID {encode-for-uri($location)}
188            }
189        }
190    }
191    else (),
192    if (exists($DIF/dif:Temporal_Coverage) or exists($DIF/dif:Paleo_Temporal_Coverage) or exists($DIF/dif:Chronostratigraphic_Unit)) then
193        element dgTemporalCoverage {
194               for $temporalcoverage in $DIF/dif:Temporal_Coverage[exists(dif:Start_Date)]
195        return
196            element DateRange {
197                element DateRangeStart {string($temporalcoverage/dif:Start_Date)},
198                element DateRangeEnd {string($temporalcoverage/dif:Stop_Date)}
199            },
200            for $paleotemporalcoverage in $DIF/dif:Paleo_Temporal_Coverage[exists(dif:Paleo_Start_Date)]
201            return
202                element DateRange {
203                element DateRangeStart {string($paleotemporalcoverage/dif:Paleo_Start_Date)},
204                element DateRangeEnd {string($paleotemporalcoverage/dif:Paleo_Stop_Date)}
205                },
206            for $chronostratigraphic in $DIF/dif:Chronostratigraphic_Unit
207            return
208                element dgChronostratigraphicTerm {
209                    element dgValidTerm {string($chronostratigraphic)},
210                        element dgValidTermID {
211                            element ParentListID {concat($voclib:gcmd_chronostratigraphic_valids, '/current')},
212                            element TermID {encode-for-uri($chronostratigraphic)}
213                        }
214                     }
215                }
216            else ()
217        }
218    else ()
219},
220element dgDataRoles {
221if (exists($DIF/dif:Data_Set_Citation/dif:Dataset_Creator)) then
222element dgDataCreator {
223element dgMetadataID {
224element schemeIdentifier {'NDG-B0'},
225element repositoryIdentifier {$input_repository},
226if ($output_local_id != 'Output_LocalID') then
227element localIdentifier {concat('generated_creator-', $output_local_id)}
228else
229element localIdentifier {concat('generated_creator-', encode-for-uri(string($DIF/dif:Entry_ID)))}
230},
231element roleName {'Data Creator'},
232element abbreviation {'Creator'},
233for $creatorID in $DIF/dif:Data_Set_Citation/dif:Dataset_Creator
234return
235element dgRoleHolder {
236(: SJD changed this to dgOrganisationID from dgMetadataID - assuming simple error :)
237    element dgOrganisationID {
238        element schemeIdentifier {'NDG-B0'},
239        element repositoryIdentifier {$input_repository},
240        if ($output_local_id != 'Output_LocalID') then
241            element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
242        else
243            element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
244        },
245       element startDate {current-date()}
246    }
247}
248else if (exists($DIF/dif:Originating_Center)) then
249element dgDataCreator {
250element dgMetadataID {
251element schemeIdentifier {'NDG-B0'},
252element repositoryIdentifier {$input_repository},
253if ($output_local_id != 'Output_LocalID') then
254element localIdentifier  {encode-for-uri(concat('generated_creator-', $output_local_id))}
255else
256element localIdentifier  {encode-for-uri(concat('generated_creator-', data($DIF/dif:Entry_ID)))}
257},
258element roleName {'Data Creator'},
259element abbreviation {'Creator'},
260for $creatorID in $DIF/dif:Originating_Center
261return
262element dgRoleHolder {
263element dgOrganisationID {
264element schemeIdentifier {'NDG-B0'},
265element repositoryIdentifier {$input_repository},
266if ($output_local_id != 'Output_LocalID') then
267element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', $output_local_id))}
268else
269element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creatorID), '-', data($DIF/dif:Entry_ID)))}
270},
271element startDate {current-date()}
272}
273}
274else (),
275element dgDataCurator {
276    element dgMetadataID {
277        element schemeIdentifier {'NDG-B0'},
278        element repositoryIdentifier {$input_repository},
279        if ($output_local_id != 'Output_LocalID') then
280            element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
281        else
282            element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
283    },
284    element roleName {'Data Curator'},
285    element abbreviation {'Curator'},
286    element dgRoleHolder {
287        element dgOrganisationID {
288            element schemeIdentifier {'NDG-B0'},
289            element repositoryIdentifier {$input_repository},
290            (:SJD this not valid - just use same localIdentifier as above..:)
291            (:element localIdentifier {$input_repository_local}:)
292            if ($output_local_id != 'Output_LocalID') then
293            element localIdentifier  {encode-for-uri(concat('generated_curator-', $output_local_id))}
294            else
295            element localIdentifier  {encode-for-uri(concat('generated_curator-', data($DIF/dif:Entry_ID)))}
296        },
297    element startDate {current-date()}
298}
299}
300}
301},
302element dgStructuredKeyword {
303element dgValidTerm {'d2b converted record'},
304element dgValidTermID {
305element ParentListID {$voclib:unknown_vocab_id},
306element TermID {'d2b'}
307}
308},
309for $structuredKeywords in $DIF/dif:Keyword
310return
311element dgStructuredKeyword {
312element dgValidTerm {string($structuredKeywords)},
313element dgValidTermID {
314if ($structuredKeywords='MDIP' or $structuredKeywords='NERC' or $structuredKeywords='NERC_DDC' or $structuredKeywords='DPPP') then
315element ParentListID {concat($voclib:ndg_data_provider_vocab, '/current')}
316else
317element ParentListID {$voclib:unknown_vocab_id}
318,
319element TermID {encode-for-uri($structuredKeywords)}
320}
321},
322for $structuredKeywords in $DIF/dif:ISO_Topic_Category
323return
324element dgStructuredKeyword {
325element dgValidTerm {string($structuredKeywords)},
326element dgValidTermID {
327element ParentListID {concat($voclib:iso_topic_list, '/current')},
328element TermID {encode-for-uri($structuredKeywords)}
329}
330},
331if (exists($DIF/dif:DIF_Creation_Date) or exists($DIF/dif:Last_DIF_Revision_Date)) then
332element dgMetadataProvenance {
333if (exists($DIF/dif:DIF_Creation_Date)) then
334element RecordCreation {
335element CreatedDate {
336if (string($DIF/dif:DIF_Creation_Date) castable as xs:date) then
337string($DIF/dif:DIF_Creation_Date) cast as xs:date
338else (current-date())
339},
340element CreatedBy {$input_repository}
341}
342else
343element RecordCreation {
344element CreatedDate {current-date()},
345element CreatedBy {'MOLES Import'}
346},
347if (exists($DIF/dif:Last_DIF_Revision_Date)) then
348element RecordUpdate {
349element UpdateDate {
350if (string($DIF/dif:Last_DIF_Revision_Date) castable as xs:dateTime) then
351string($DIF/dif:Last_DIF_Revision_Date) cast as xs:dateTime
352else (current-dateTime())
353},
354element UpdatedBy {$input_repository}
355}
356else ()
357}
358else ()
359} (: </dgMetadataRecord>:),
360
361(: Had to add some stuff here so originating data centre stuff is picked up
362for $bum in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center | dif:Data_Center))
363return
364    element snooze {data('arse')},
365
366:)
367
368if (count(distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center ))) !=0) then
369    for $creator in distinct-values($DIF/(dif:Data_Set_Citation/dif:Dataset_Creator | dif:Originating_Center ))
370    return
371        element dgOrganisation{
372            element dgMetadataID {
373                element schemeIdentifier {'NDG-B0'},
374                element repositoryIdentifier {$input_repository},
375                if ($output_local_id != 'Output_LocalID') then
376                    element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', $output_local_id))}
377                else
378                    element localIdentifier  {encode-for-uri(concat('generated_orgcit-', string($creator), '-', data($DIF/dif:Entry_ID)))}
379                },
380                element name {string($creator)},
381                element abbreviation {string($creator)},
382                element contactDetails { 
383                    element URI {data($DIF/dif:Data_Center/dif:Data_Center_URL)}
384                }
385        }
386 else
387     element dgOrganisation{
388         element dgMetadataID {
389                element schemeIdentifier {'NDG-B0'},
390                element repositoryIdentifier {$input_repository},               
391               if ($output_local_id != 'Output_LocalID') then
392                   element localIdentifier {concat('generated_desc-', $output_local_id)}
393               else
394                   element localIdentifier {concat('generated_desc-', encode-for-uri(string($DIF/dif:Entry_ID)))}
395               },
396                element name {data($DIF/dif:Data_Center/dif:Data_Center_Name/dif:Long_Name)},
397                element abbreviation {data($DIF/dif:Data_Center/dif:Data_Center_Name/dif:Short_Name)},
398                element contactDetails { 
399                    element URI {data($DIF/dif:Data_Center/dif:Data_Center_URL) cast as xs:anyURI}
400                }
401     }
402 
403} (:    </dgMetadata> :)
404
Note: See TracBrowser for help on using the repository browser.