source: ndgCommon/trunk/ndg/common/xmldb/xquery/atom2NERCiso.xq @ 7729

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/xmldb/xquery/atom2NERCiso.xq@7729
Revision 7729, 24.9 KB checked in by sdonegan, 10 years ago (diff)

get rid of windows char

Line 
1(: CEDA ATOM to NERC Discovery Metadata Standard conversion :)
2
3(: Converts to DMS v0.6 October 2010:)
4
5(:
6Steve Donegan, CEDA RALSpace, 20/10/2010
7
8Notes:
9    1. Note georss and gml namespaces changed to match those present in CEDA ATOMs in bora.badc.rl.ac.uk eXist database
10:)
11(: eXist library :)
12import module namespace isolib='http://ndg.nerc.ac.uk/xquery/lib/iso_utilities' at 'xmldb:exist:///db/xquery/lib/Utilities/isoUtilities_xquery_lib.xquery';
13
14declare default element namespace 'http://www.isotc211.org/2005/gmd';
15
16declare namespace atom = 'http://www.w3.org/2005/Atom';
17declare namespace moles="http://ndg.nerc.ac.uk/schema/moles2beta";
18declare namespace f='http://ndg.nerc.ac.uk/moles/localfunctions';
19declare namespace georss = "http://www.georss.org/georss/10";
20declare namespace fn = "http://www.w3.org/2005/02/xpath-functions";
21declare namespace xhtml="http://www.w3.org/1999/xhtml";
22
23(: need the dif namespace as we are picking up organisationInfo from dif based record currently in Bora db :)
24declare namespace dif = 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/';
25
26(: other NS we need to produce the NERC  ISO :)
27declare namespace gmd='http://www.isotc211.org/2005/gmd';
28declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
29declare namespace gco='http://www.isotc211.org/2005/gco';
30declare namespace gmx='http://www.isotc211.org/2005/gmx';
31declare namespace gml='http://www.opengis.net/gml';
32declare namespace xlink='http://www.w3.org/1999/xlink';
33
34(: Keep this in official lib version
35declare variable $targetCollection as xs:string := '/db/atoms/Published';
36declare variable $deploymentsDir as xs:string := '/deployments';
37declare variable $dataEntitiesDir as xs:string := '/data_entities';
38declare variable $repositoryIdentifier as xs:string := 'badc.nerc.ac.uk';
39declare variable $localIdentifier as xs:string := 'badc.nerc.ac.uk__ATOM__dataent_active';
40declare variable $dptTerm as xs:string := 'DPTTerm';
41declare variable $obsTerm as xs:string :=  'OBSTerm';
42declare variable $activityTerm as xs:string :=  'ACTIVITYTerm';
43declare variable $uriTerm as xs:string :=  'URITerm';
44declare variable $granuleTerm as xs:string :=  'GranuleTerm';
45:)
46
47
48(: Define the name of the metadata standard to be used in converted document :)
49declare variable $MetadataStandardName as xs:string := 'NERC Discovery Metadata Standard';
50declare variable $MetadataStandardCEDAPrefix as xs:string := 'NERC_DMS_0.7';
51declare variable $MetadataStandardVersion as xs:string := '0.7';
52
53(: CEDA eXist directory containing the data center organisation DIF snippets :)
54declare variable $organisationInfoDir as xs:string := '/db/atoms/resources';
55
56(: Development use only
57declare variable $targetCollection as xs:string := '/db/atoms/published/';
58declare variable $repositoryIdentifier as xs:string := 'neodc.nerc.ac.uk';
59declare variable $localIdentifier as xs:string := 'tag:badc.nerc.ac.uk,2009-12-10:/view/neodc.nerc.ac.uk__ATOM__dataent_12447304699327717'; :)
60
61(:Production use only:)
62declare variable $targetCollection as xs:string := 'TargetCollection';
63declare variable $repositoryIdentifier as xs:string := 'RepositoryID';
64declare variable $localIdentifier as xs:string := 'RepositoryID__ATOM__LocalID$';
65
66declare variable $dptTerm as xs:string := 'DPT - NOT YET SET UP/None/DPT';
67declare variable $obsTerm as xs:string :=  'OBS - NOT YET SET UP/None/OBS';
68declare variable $activityTerm as xs:string :=  'Activity - NOT YET SET UP/None/ACTIVITY';
69declare variable $granuleTerm as xs:string :=  'GRAN - NOT YET SET UP/None/GRANULE';
70declare variable $deploymentTerm as xs:string := 'NOT YET SET UP/None/Deployment';
71declare variable $uriTerm as xs:string :=  'URI';
72
73(: These two are unlikely to change :)
74declare variable $dataEntitiesDir as xs:string := 'data_entities';
75declare variable $deploymentsDir as xs:string := 'deployments';
76declare variable $deploymentDataDir as xs:string := 'deployment_data';
77
78(:declare variable $atomCollection as xs:string := concat('TargetCollection', $dataEntitiesDir, '/', 'RepositoryID'); :)
79declare variable $atomCollection as xs:string := concat($targetCollection, $dataEntitiesDir, '/', $repositoryIdentifier    );
80
81declare variable $currentDate as xs:string := substring(current-date() cast as xs:string,1,10);
82
83for $DE in collection($atomCollection)/atom:entry[matches(atom:id, $localIdentifier)]
84
85(:sort out complex variables here as easier to do here than embedded in the xQuery! :)
86
87(: set unique identifier as a variable :)   
88let $identifier := concat(string($DE/moles:entity/moles:molesISO/moles:providerID),':',$MetadataStandardCEDAPrefix,':', tokenize(string($DE/atom:id), '__ATOM__')[2])
89
90(: extract the CEDA Data Center ID :)
91let $cedaIdentifier := $DE/moles:entity/moles:molesISO/moles:providerID
92
93(:define string variable for publication and creation dates comprised of comma delimited list from ATOM elements:)
94let $publicationCreationDates := concat(data($DE/atom:published),"=",string('publication'),",",data($DE/moles:entity/moles:molesISO/moles:created),"=",string("creation"))
95
96return
97
98element gmd:MD_Metadata {
99   
100    element gmd:fileIdentifier {
101        element gco:CharacterString {
102            (: for CEDA purposes use main unique resource identifier here - element 6
103            concat(string($DE/moles:entity/moles:molesISO/moles:providerID),':NERC_v0.6:', tokenize(string($DE/atom:id), '__ATOM__')[2]):)
104            data($identifier)
105        }       
106     },
107     
108     (:Metadata_Language element 29:)
109     element gmd:language {
110     
111         (: TODO: CEDA supports some iso elements in atom, but needs provision for codelist values.  In this case "eng" :)
112         element gmd:LanguageCode {
113             attribute codeList {'http://www.loc.gov/standards/iso639-2/php/code_list.php'},
114             attribute codeListValue {'eng'},
115             data($DE/moles:entity/moles:molesISO/moles:metadataLanguage)         
116         }
117     },
118     
119     (: Resource Type element 4 :)
120     element gmd:hierarchyLevel {
121         element gmd:MD_ScopeCode {         
122             attribute codeList {'http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/Codelist/gmxCodelists.xml#MD_ScopeCode'},
123             attribute codeListValue {'series'},
124             string('series')
125         
126         }
127     },
128
129    (: Element 22 Responsible Party - Metadata point of contact.  For CEDA purposes this will be the relevent helpdesk contact :)
130   
131     (: TODO: get this info from ATOM :)
132   
133    element gmd:contact
134    {
135        (:isolib:CI_ResponsibleParty(string("CEDA"), element individualName {string("")}, element positionName{string("CEDA Data Scientist")}, element role {string("pointOfContact")}, element phone {string("01235 778123")}, element addressLine {string("RALSpace, HSIC, Rutherford Appleton Laboratory")}, element city {string("Didcot")}, element postCode {string("OX11 0QX")}, element email{string("ceda@stfc.ac.uk")}):)
136        isolib:cedaDataCenter(string("pointOfContact"), $organisationInfoDir, $cedaIdentifier)
137    },
138   
139    (: Date of update of metadata - Element 26 :)
140    (: For CEDA this translates to the timestamp of the last update in the MOLES db, rather than the datestamp of the application of this conversion... :)
141    element gmd:dateStamp {
142        element gco:Date {
143            data($DE/moles:entity/moles:molesISO/moles:created)
144        }
145    },
146   
147    (: Metadata Standard Name - Element  27 :)
148    (: For CEDA, this means the version of the NERC Discovery Metadata Standard this conversion is mapped to :)
149    element gmd:metadataStandardName {
150        element gco:CharacterString { data($MetadataStandardName)}
151    },
152   
153    (: Metadata Standard Version - Element 28 :)
154    element gmd:metadataStandardVersion {
155        element gco:CharacterString { data($MetadataStandardVersion)}
156    },
157   
158    (: Spatial Reference System - Element 15 :)
159    (: TODO: does CEDA need to include this in MOLES database? :)
160    element gmd:referenceSystemInfo {
161        element gmd:MD_ReferenceSystem {
162            element gmd:referenceSystemIdentifier {
163                element gmd:RS_Identifier {
164                    element gmd:code {
165                        (: TODO: find correct code! :)
166                        element gco:CharacterString { string("urn:ogc:def:crs:EPSG::27700")}
167                    }
168                }
169            }
170        }
171    },
172   
173    (: identificationInfo :)
174    element gmd:identificationInfo {
175        element gmd:MD_DataIdentification {
176            element gmd:citation {
177           
178                (: NOTE following elements handled by CI_Citation function :)
179           
180                (: Resource Title - Element 1 :)
181                (: Temporal Reference Date - Element 16 :)
182                (: TODO: check CEDA ATOM publication date has the same definition :)
183                (:Publication - Element 16.2 :)
184                (:Creation - Element 16.4 :)
185                (: Identifier - Element 6 :)
186                isolib:CI_Citation(data($DE/atom:title),$publicationCreationDates,$identifier,$DE/moles:entity/moles:molesISO/moles:providerID)
187               
188            },
189           
190            (: Abstract - Element 3 :)
191            element gmd:abstract {
192                element gco:CharacterString {data($DE/atom:summary)}                   
193            },
194           
195            (: Responsible party - Element 22 :)
196           
197            (: TODO: CEDA will need to better delineate this info as may need "originator" in addition to "dataPointOfContact" & metadataPointOfContact :)
198           
199            (:TODO: for CEDA purposes is organisationName sufficient?  Depends on type of pointOfContact as well as whats available :)
200           
201            (:local:pointOfContact ($organisationName, $individualName, $positionName, $role, $phone, $deliveryPoint, $city, $postalCode, $electronicalMailAddress):)
202           
203            (:
204            element gmd:pointOfContact
205            {
206                local:pointOfContact (string("CEDA"), string(""), string("CEDA Data Scientist"), string("pointOfContact"), string("01235 778123"), string("RALSpace, HSIC, Rutherford Appleton Laboratory"), string("Didcot"), string("OX11 0QX"), string("ceda@stfc.ac.uk"))
207            },               
208            :)
209            (:dataPointOfContact:)
210           
211            (:metadataDataPointOfContact:)
212           
213            (:distributor - NOTE - goes in distributionInfo bit :)
214           
215            (:originator:)
216            element gmd:pointOfContact
217            {               
218                (:isolib:CI_ResponsibleParty(string("CEDA"), element individualName {string("")}, element positionName{string("CEDA Data Scientist")}, element role {string("originator")}, element phone {string("01235 778123")}, element addressLine {string("RALSpace, HSIC, Rutherford Appleton Laboratory")}, element city {string("Didcot")}, element postCode {string("OX11 0QX")}, element email{string("ceda@stfc.ac.uk")}):)
219                isolib:cedaDataCenter(string("originator"), $organisationInfoDir, $cedaIdentifier)
220            },
221           
222            (:resourceMaintenance - Element # :)
223           
224            (:TODO: CEDA needs to record this resourceMaintenance info plus other INSPIRE related stuff HARDCODE for now! :)
225            element gmd:resourceMaintenance {
226                element gmd:MD_MaintenanceInformation {
227                    element gmd:maintenanceAndUpdateFrequency {
228                        element gmd:MD_MaintenanceFrequencyCode {
229                            attribute codeList {'http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/Codelist/gmxCodelists.xml#MD_MaintenanceFrequencyCode'},
230                            attribute codeListValue {'annually'},
231                            string('annually')                         
232                        }                   
233                    }
234                }
235            },
236           
237            (: Data Format :- Element #:)
238            (:TODO: need to get CEDA to evaluate this properly - codelist/vocab?  :)
239            element gmd:resourceFormat {
240                element gmd:MD_Format {
241                    element gmd:name {
242                        element gco:CharacterString { string("Dataset") }
243                    }
244                }
245            },
246           
247            (: Keywords - Element 11 :)
248           
249            (: Ensure NERC_DDC is in there :)
250           
251            (: TODO: CEDA/NERC - is this keyword in a vocab or codelist?/ :)
252            element gmd:descriptiveKeywords {
253                element gmd:MD_Keywords {
254                    element gmd:keyword {
255                        element gco:CharacterString { string("NERC_DDC") }
256                    }
257                }
258            },
259           
260            (: Vertical Extent - Element 14:)
261           
262            (: TODO - CEDA have to implement vertical extent keywords - must take from L131 list on NDG vocab server.  Optional element so can leave for now :)
263           
264            (: INSPIRE keywords :)
265           
266            (: TODO: CEDA - need to have info on INSPIRE themes available in the CEDA MOLES db - hardwire for now :)
267           
268            element gmd:descriptiveKeywords {
269                element gmd:MD_Keywords {
270                    element gmd:keyword {
271                        element gco:CharacterString { string("Elevation") }
272                    },
273                    element gmd:thesaurusName {                   
274                        isolib:CI_Citation(string('GEMET - INSPIRE themes, version 1.0'),concat($currentDate,"=",string('publication')),string(""),string(""))
275                    }
276                }
277            },
278           
279            (: TODO: CEDA - need to properly quantify keywords/parameters to actual vocabs.  For now map category attribute "label" to  keyword value in gmx:anchor and the scheme to xlink:href attritubte and term to title. 
280            follow medin example for nerc harvesting keyword.  This seriously needs tidying up :)
281            for $keyword in $DE/atom:category
282            return
283            element gmd:descriptiveKeywords {
284                element gmd:MD_Keywords {
285                    element gmd:keyword{
286                        element gmx:Anchor {
287                            attribute xlink:href {data($keyword/@scheme)},
288                            attribute xlink:title {data($keyword/@term)},
289                            data($keyword/@label)
290                        }                           
291                    }
292                }
293            },
294           
295            (: Conditions applying to access and use - Element 21 :)
296            (: TODO: CEDA doesnt record this in ATOM & its Mandatory.  Hardcode for now :)
297            element gmd:resourceConstraints {
298                element gmd:MD_Constraints {
299                    element gmd:useLimitation {
300                        (:TODO: CEDA - this is free text :)
301                        element gco:CharacterString { string("Information not yet available in this format.  Please contact ceda@stfc.ac.uk")}
302                    }
303                }
304            },
305           
306            (: Limitations on Public Access - Element 20 :)           
307            (: TODO: CEDA doesnt record this in ATOM - & its Mandatory.  Hardcode for now :)
308            element gmd:resourceConstraints {
309                element gmd:MD_LegalConstraints {
310                    element gmd:accessConstraints {
311                        element gmd:MD_RestrictionCode {
312                           
313                             (:TODO: CEDA will need link to this codelist for this element :)
314                            attribute codeList {'http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/Codelist/gmxCodelists.xml#MD_RestrictionCode'},
315                            attribute codeListValue {'TBA'},
316                            string("TO BE FINALISED (contact ceda@stfc.ac.uk)")
317                       
318                        }
319                    }
320                }
321            },
322           
323            (: Spatial Resolution - Element 18 :)
324           
325            (: TODO: CEDA doesnt record this in ATOM yet & its Mandatory: Hardcode for now to value of "0"... :)
326            element gmd:spatialResolution {
327                element gmd:MD_Resolution {
328                    element gmd:distance {
329                        element gco:Distance {
330                            string("0")
331                        }
332                    }
333                }           
334            },
335           
336            (: Resource Langauge - Element 8 :)
337            (:TODO: CEDA will need to include this element if any textual resource for the dataset - hardcode to english for now :)
338            element gmd:language {
339                element gmd:LanguageCode {
340                    attribute codeList {'http://www.loc.gov/standards/iso639-2/php/code_list.php'},
341                    attribute codeListValue {'eng'},
342                    string("English")
343                }
344            },
345           
346            (: Topic Category - Element 9:)
347            (:TODO: CEDA will also need to include this information as mandatory for datasets.  For now, hardcoded to "Geoscientific Information" :)
348            element gmd:topicCategory {
349                element gmd:MD_TopicCategoryCode {string("GeoscientificInformation")}
350            },
351           
352           
353           
354            (: TODO: Many CEDA datasets don't have boundingBox info - MAndatory for compliancy.. :)
355             element gmd:extent {
356                element gmd:EX_Extent {
357               
358                     (:Extent - Bounding Box - Element 12 (note difference from an actual extent - i.e. ICES sea areas:)
359               
360                    if (exists($DE/georss:where)) then
361                        for $boundingBox in $DE/georss:where
362                        return
363                            element gmd:geographicElement {
364                                element gmd:EX_GeographicBoundingBox {                                                   
365                                    element gmd:westBoundLongitude {
366                                        element gco:Decimal {
367                                            data(tokenize(string($boundingBox/gml:Envelope/gml:lowerCorner), ' ')[1])
368                                        }
369                                    },
370                                    element gmd:eastBoundLongitude {
371                                        element gco:Decimal {
372                                            data(tokenize(string($boundingBox/gml:Envelope/gml:upperCorner), ' ')[1])
373                                        }
374                                    },
375                                    element gmd:southBoundLatitude {
376                                        element gco:Decimal {
377                                            data(tokenize(string($boundingBox/gml:Envelope/gml:lowerCorner), ' ')[2])
378                                        }
379                                    },
380                                    element gmd:northBoundLatitude {
381                                        element gco:Decimal {
382                                            data(tokenize(string($boundingBox/gml:Envelope/gml:upperCorner), ' ')[2])
383                                        }
384                                    }
385                                }
386                            }
387                        else (),
388                       
389                        (: Temporal Reference - Element 16.1 :)
390                        (: TODO: CEDA - mandatory element yet not all dataEntities have this recorded...
391                            AND - ATOM doesnt distinguish a single date as either start or end!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!  blimey.
392                        :)
393                       
394                        element gmd:temporalElement {
395                            element gmd:EX_TemporalExtent {
396                                if (exists($DE/moles:temporalRange)) then
397                                    for $dates in $DE/moles:temporalRange
398                                    return
399                                        element gmd:extent {
400                                            element gml:TimePeriod {
401                                                element gml:beginPosition {
402                                                    data (tokenize(string($DE/moles:temporalRange), '/')[1])
403                                                },
404                                                element gml:endPosition {
405                                                    data (tokenize(string($DE/moles:temporalRange), '/')[2])
406                                                }
407                                            }
408                                        }
409                                else ()
410                            }                       
411                        }
412                    }                       
413             } (: /extent:)
414          }   
415    },  (: /identificationInfo :)
416   
417    (: distributionInfo :)
418    element gmd:distributionInfo {
419        element gmd:MD_Distribution {
420   
421            (: The ISO 19115 Constraints require this element! :)
422            element gmd:distributionFormat {
423                attribute gco:nilReason {"inapplicable"}       
424            },
425       
426            (: Responsible Party - Element 22 - Mandatory for datasets! NOTE role=distributor so ok to hardwire as CEDA :)
427            element gmd:distributor {
428                element gmd:MD_Distributor{
429                    element gmd:distributorContact {                                               
430                        isolib:cedaDataCenter(string("distributor"), $organisationInfoDir, $cedaIdentifier)
431                    }
432                }           
433            },
434           
435            (: Resource Locator - Element 5.  NOTE for NERC onlineResource must be provided for datasets:)
436           
437            (: TODO: For CEDA purposes - ensure link to dataset browser is explicitly included in info is in ATOM - indicated by attribute val "Data Directory" :)
438           
439            (:TODO: CEDA - must specifiy correct choice of onlineresource from vocab/codelist - hardwired for "download" right now. :)
440           
441            for $link in $DE/atom:link
442                let $url:= $link/@href
443                let $relation:= $link/@rel
444                let $title:= $link/@title
445                return               
446                    if ($title = "Data Directory") then
447                        (: For NERC DMS, download must only be used where online data is actually available.  So, until CEDA can link to the correct codelist and get these vals into the ATOM we must do this.. :)
448                        isolib:transferOptions (string($url), string($relation), string($title), string("download"))
449                    else
450                        (:otherwise treat everything else as "information" now :)
451                        isolib:transferOptions (string($url), string($relation), string($title), string("information"))
452               
453        }, (: /distributionInfo :)
454       
455        (: Lineage and INSPIRE conformity elements - Element 25 :)
456        element gmd:dataQualityInfo {
457            element gmd:DQ_DataQuality {
458               
459                (: Scope -required by ISO19115 :)
460                element gmd:scope {
461                    element gmd:level {
462                        element gmd:MD_ScopeCode {
463                       
464                            (: TODO: as part of CEDA creep towards INSPIRE conformity, this codelist should be listed:)
465                            attribute codeList {'http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_19139_Schemas/resources/Codelist/gmxCodelists.xml#MD_ScopeCode'},
466                            attribute codeListValue {'dataset'},
467                            string("dataset")
468                        }
469                    }
470                },
471               
472                (: Lineage - Element 17 :)
473               
474                (: TODO: CEDA : doesnt currently record this information, so hardwire for now - BUT can use ATOM content with CEDA basic strapline  :)               
475                element gmd:lineage {
476                    element gmd:LI_Lineage {
477                        element gmd:statement {
478                            element gco:CharacterString {
479                                data(concat(string("This Dataset has been acquired by CEDA and is hosted by agreement from the data originator.")," ",$DE/atom:content/xhtml:div))
480                            }
481                        }
482                    }
483                }
484               
485               
486            }       
487        } (: /dataQualityInfo :)
488    }
489    }
490   
491
Note: See TracBrowser for help on using the repository browser.