source: exist/trunk/xquery/moles2mdip.xq @ 4673

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/xquery/moles2mdip.xq@4673
Revision 4673, 11.4 KB checked in by sdonegan, 13 years ago (diff)

Removed cast as dates as actual mdip date contents ropey! Validation at ingest should ultimately sort this out.

Line 
1(: Create an MDIP record for moles documents in /misc/humid1/cbyrom/eclipseWorkspace/ingestAutomation-upgrade2/OAIBatch/data/badc/discovery/moles/ that have badc.nerc.ac.uk and test_file2 :)
2import module namespace voclib='http://ndg.nerc.ac.uk/xquery/lib/vocab' at 'vocab_xquery_lib.xquery';
3import module namespace utillib='http://ndg.nerc.ac.uk/xquery/lib/utilities' at 'utility_xquery_lib.xquery';
4
5declare default element namespace 'http://www.oceannet.org/mdip/xml';
6declare namespace moles='http://ndg.nerc.ac.uk/moles';
7declare namespace gco='http://www.isotc211.org/2005/gco';
8declare namespace xsi='http://www.w3.org/2001/XMLSchema-instance';
9
10(:declare variable collection($targetCollection) := doc("mdip2moles_OP.xml");:)
11
12declare variable $targetCollection as xs:string := 'TargetCollection';
13declare variable $repositoryIdentifier as xs:string := 'RepositoryID';
14declare variable $localIdentifier as xs:string := 'BGSSEA412';
15
16declare variable $ISO_639-2_ns as xs:string := $voclib:ISO_639-2_ns;
17declare variable $ISO_3166_ns as xs:string := $voclib:ISO_3166_ns;
18
19for $DE in collection($targetCollection)/moles:dgMetadata/moles:dgMetadataRecord[
20    exists('moles:dgDataEntity')
21    and moles:dgMetadataID/moles:schemeIdentifier='NDG-B0'
22    and moles:dgMetadataID/moles:repositoryIdentifier=$repositoryIdentifier
23    and moles:dgMetadataID/moles:localIdentifier=$localIdentifier]
24return
25element Metadata {   
26    element Title {data($DE/moles:name)},
27
28if (exists($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgDatasetLanguage)) then
29    for $lang in $DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgDatasetLanguage
30return
31    element Language{
32        element LanguageName {data($lang/moles:dgValidTerm)},
33        element LanguageVocab {data($lang/moles:dgValidTermID/moles:ParentListID)},
34        element LanguageCode {data($lang/moles:dgValidTermID/moles:TermID)}
35}
36(: put default in to keep schema happy.. :)
37else element Language{
38        element LanguageName {data("English")},
39        element LanguageVocab {data("UKGemini_LanguageCategories")},
40        element LanguageCode {data("eng")}
41},
42
43(:check for abstract text:)
44if (string-length($DE/moles:dgMetadataDescription/moles:abstract/moles:abstractText)  < 16)  then
45    element Abstract {data("There was either no or insufficient info in MOLES abstract to produce an MDIP abstract!")}
46else
47    element Abstract {string($DE/moles:dgMetadataDescription/moles:abstract/moles:abstractText)},
48
49(:check for topicCategory stuff in moles :)
50if (count($DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:iso_topic_list, moles:dgValidTermID/moles:ParentListID)] ) < 1 ) then
51    (: put padding in to produce valid mdip :)
52     element TopicCategory {
53        element TopicCategoryName {data("geoscientificInformation")},
54        element TopicCategoryVocab {data("ISO_19115:2003_MD_TopicCategoryCodes")},
55        element TopicCategoryCode {data("001")}
56    }
57else
58    for $isoTopic in $DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:iso_topic_list, moles:dgValidTermID/moles:ParentListID)]
59    return element TopicCategory {
60        element TopicCategoryName {data($isoTopic/moles:dgValidTerm)},
61        element TopicCategoryVocab {data($isoTopic/moles:dgValidTermID/moles:ParentListID)},
62        element TopicCategoryCode {data($isoTopic/moles:dgValidTermID/moles:TermID)}
63    },
64   
65(: Subject :)
66    (: standard parameter terms :)
67if (count($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgParameterSummary/moles:dgStdParameterMeasured[
68    voclib:spot-vocab($voclib:gcmd_science_valids, moles:dgValidTermID/moles:ParentListID)
69    or voclib:spot-vocab($voclib:gcmd_project_valids, moles:dgValidTermID/moles:ParentListID)
70    or voclib:spot-vocab($voclib:gcmd_science_valids_categories, moles:dgValidTermID/moles:ParentListID)
71    or voclib:spot-vocab($voclib:bodc_parameter_usage_vocab, moles:dgValidTermID/moles:ParentListID)
72    or voclib:spot-vocab($voclib:bodc_parameter_discovery_vocab, moles:dgValidTermID/moles:ParentListID)
73    ]) < 1)  then
74    element Subject {
75        element SubjectName {data("No subject identiified")},
76        element SubjectVocab {data("Other")},
77        element SubjectCode {data("00")}
78}
79else
80    for $subject in ($DE/moles:dgDataEntity/moles:dgDataSummary/moles:dgParameterSummary/moles:dgStdParameterMeasured[
81    voclib:spot-vocab($voclib:gcmd_science_valids, moles:dgValidTermID/moles:ParentListID)
82    or voclib:spot-vocab($voclib:gcmd_project_valids, moles:dgValidTermID/moles:ParentListID)
83    or voclib:spot-vocab($voclib:gcmd_science_valids_categories, moles:dgValidTermID/moles:ParentListID)
84    or voclib:spot-vocab($voclib:bodc_parameter_usage_vocab, moles:dgValidTermID/moles:ParentListID)
85    or voclib:spot-vocab($voclib:bodc_parameter_discovery_vocab, moles:dgValidTermID/moles:ParentListID)
86    ])
87    return (element Subject {
88        element SubjectName {data($subject/moles:dgValidTerm)},
89        element SubjectVocab {data($subject/moles:dgValidTermID/moles:ParentListID)},
90        element SubjectCode {data($subject/moles:dgValidTermID/moles:TermID)}
91},
92
93    (: standard parameter SUB terms :)
94for $subterm in $subject//moles:dgValidSubterm
95return (element Subject {
96    element SubjectName {data($subterm/moles:dgValidTerm)},
97    element SubjectVocab {data($subterm/moles:dgValidTermID/moles:ParentListID)},
98    element SubjectCode {data($subterm/moles:dgValidTermID/moles:TermID)}
99    }
100)
101),
102
103(:Date :)
104
105(: check empty stuff :)
106
107if (exists($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange))) then
108
109    (:Note MOLES can have any number of dateRanges but MDIP only 1.. :)
110    if (count($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange)) = 1) then
111        element Date {
112       
113             if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeStart) != '') then
114                element DatasetStartDate {
115                    let $dateStart:=min($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeStart))
116                    return
117                        if (empty($dateStart)) then ()
118                            else data($dateStart)
119                           
120                 }
121             else element DatasetStartDate {data('')},
122             
123            if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeEnd) != '') then
124                element DatasetEndDate{
125                  let $dateEnd:=max($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange/moles:DateRangeEnd))
126                  return
127                    if (empty($dateEnd)) then ()
128                        else data($dateEnd)
129                }
130            else element DatasetEndDate {data('')}
131    }
132   
133    else
134 
135    (: take first start date element and last end date element from the moles to bound :)
136        element Date {
137       
138         if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[1]/moles:DateRangeStart) != '') then
139            element DatasetStartDate {
140                    let $dateStart:=$DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[1]/moles:DateRangeStart)
141                    return
142                        if (empty($dateStart)) then ()
143                            else data($dateStart)
144                }
145             else element DatasetStartDate {data('')},
146             
147             if ($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[count($DE//moles:dgTemporalCoverage/moles:DateRange)]/moles:DateRangeEnd) != '') then
148                element DatasetEndDate {
149                    let $dateEnd:=$DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[count($DE//moles:dgTemporalCoverage/moles:DateRange)]/moles:DateRangeEnd)
150                    return
151                        if (empty($dateEnd)) then ()
152                            else data($dateEnd)
153                }
154                  else element DatasetEndDate {data('')}                               
155            (:
156            element DatasetStartDate {data($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[1]/moles:DateRangeStart))},
157            element DatasetEndDate {data($DE//moles:dgTemporalCoverage/(moles:DateSingle | moles:DateRange[count($DE//moles:dgTemporalCoverage/moles:DateRange)]/moles:DateRangeEnd))}
158            :)
159        }
160else (), (: if no date not a lot can do about it :)
161
162(:Coordinates, boundingBoxes etc :)
163(: MOLES, dif can have any number of boundingBoxes but MDIP only 1.. so check how many and produce an agregate box -wow -XPATH2 works here!:)
164if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then
165    let $minLon := min(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitWest) return element WestCoOrdinate {data ($minLon) }
166else
167    element WestCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitWest)},
168   
169if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then
170    let $maxLon := max(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitEast) return element EastCoOrdinate {data ($maxLon) }
171else
172    element EastCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitEast)},
173   
174if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then
175    let $maxLat := max(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitNorth) return element NorthCoOrdinate {data ($maxLat) }
176else
177    element NorthCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitNorth)},
178   
179if ( (count($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) ) > 1)  then
180    let $minLat := min(for $bbox in ($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox) return $bbox/moles:LimitSouth) return element SouthCoOrdinate {data ($minLat) }
181else
182    element SouthCoOrdinate {data($DE/moles:dgDataEntity/moles:dgDataSummary/moles:BoundingBox/moles:LimitSouth)},
183   
184
185(:
186let $west := min($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox/moles:LimitWest cast as xs:decimal)
187return element WestCoOrdinate {data($west)},
188let $east := max($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox/moles:LimitEast cast as xs:decimal)
189return element EastCoOrdinate {data($east)},
190let $north := max($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox/moles:LimitNorth cast as xs:decimal)
191return element NorthCoOrdinate {data($north)},
192let $south := min($DE/moles:dgDataEntity/moles:dgDataSummary//moles:BoundingBox/moles:LimitSouth cast as xs:decimal)
193return element SouthCoOrdinate {data($south)}
194:)
195(:metadata language - hardcode this.. :)
196element MetadataLanguage {
197    element LanguageName {"English"},
198    element LanguageVocab {"UKGemini_LanguageCategories"},
199    element LanguageCode {"eng"}
200},
201
202(:DatasetIdentifier:)
203element DatasetIdentifier {concat($DE/moles:dgMetadataID/moles:repositoryIdentifier, $utillib:moles_id_separator, 'MDIP', $utillib:moles_id_separator, $DE/moles:dgMetadataID/moles:localIdentifier)},
204
205(:Target:)
206for $target in $DE/moles:dgStructuredKeyword[voclib:spot-vocab($voclib:ndg_data_provider_vocab, moles:dgValidTermID/moles:ParentListID) ]
207return element Target {
208element TargetName {data($target/moles:dgValidTerm)},
209element TargetVocab {data($target/moles:dgValidTermID/moles:ParentListID)},
210element TargetCode {data($target/moles:dgValidTermID/moles:TermID)}
211}
212
213}
Note: See TracBrowser for help on using the repository browser.