1 | ''' |
---|
2 | Created on 15 Nov 2011 |
---|
3 | |
---|
4 | @author: mnagni |
---|
5 | ''' |
---|
6 | from ea_model.moles3_4.utilities.mo_responsiblepartyinfo import MO_ResponsiblePartyInfo |
---|
7 | from ea_model.iso_19115_2006_metadata_corrigendum.reference_system_information.md_identifier import MD_Identifier |
---|
8 | from httplib import HTTPConnection |
---|
9 | from xml.etree.ElementTree import XML, tostring |
---|
10 | import time, datetime |
---|
11 | from ea_model.upcomingiso.ci_organisation import CI_Organisation |
---|
12 | from ea_model.upcomingiso.ci_party import CI_Party |
---|
13 | from ea_model.upcomingiso.ci_individual import CI_Individual |
---|
14 | from libs.migration.exception.exceptions import NoDataLineage |
---|
15 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_citation import CI_Citation |
---|
16 | from ea_model.iso_19115_2006_metadata_corrigendum.data_quality_information.dq_element import DQ_Element |
---|
17 | from ea_model.iso_19115_2006_metadata_corrigendum.data_quality_information.dq_conformanceresult import DQ_ConformanceResult |
---|
18 | from hashlib import md5 |
---|
19 | from xml.sax.saxutils import unescape, escape |
---|
20 | import html5lib |
---|
21 | from html5lib import treebuilders |
---|
22 | from ea_model.ceda_metadatamodel.ceda_result.ceda_result import CEDA_Result |
---|
23 | from ea_model.moles3_4.result.mo_onlineresource import MO_OnlineResource |
---|
24 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.url import URL |
---|
25 | from ea_model.iso_19115_2006_metadata_corrigendum.extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox |
---|
26 | from ea_model.ceda_metadatamodel.ceda_computation.ceda_processing import CEDA_Processing |
---|
27 | from ea_model.ceda_metadatamodel.ceda_acquisition.ceda_instrument import CEDA_Instrument |
---|
28 | from ea_model.ceda_metadatamodel.ceda_observationprocess.ceda_compositeprocess import CEDA_CompositeProcess |
---|
29 | from ea_model.ceda_metadatamodel.ceda_acquisition.ceda_acquisition import CEDA_Acquisition |
---|
30 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_date import CI_Date |
---|
31 | from ea_model.iso_19103_2005_schema_language.basic_types.primitive.date_and_time.datetime import DateTime |
---|
32 | from ea_model.iso_19103_2005_schema_language.basic_types.primitive.date_and_time.date import Date |
---|
33 | from ea_model.iso_19108_2006_temporal_schema.temporal_reference_system.tm_position import TM_Position |
---|
34 | from ea_model.iso_19108_2006_temporal_schema.temporal_objects.tm_instant import TM_Instant |
---|
35 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_contact import CI_Contact |
---|
36 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_address import CI_Address |
---|
37 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_onlineresource import CI_OnlineResource |
---|
38 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_telephone import CI_Telephone |
---|
39 | from ea_model.iso_19108_2006_temporal_schema.temporal_objects.tm_period import TM_Period |
---|
40 | from ea_model.iso_19115_2006_metadata_corrigendum.constraint_information.md_constraints import MD_Constraints |
---|
41 | from ea_model.iso_19115_2006_metadata_corrigendum.constraint_information.md_legalconstraints import MD_LegalConstraints |
---|
42 | from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project |
---|
43 | from ea_model.iso_19115_2006_metadata_corrigendum.metadata_entity_set_information.md_metadata import MD_Metadata |
---|
44 | from ea_model.ceda_metadatamodel.ceda_utilities.ceda_review import CEDA_Review |
---|
45 | from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_responsibleparty import CI_ResponsibleParty |
---|
46 | from ea_model.iso_19115_2006_metadata_corrigendum.maintenance_information.md_maintenancefrequencycode import MD_MaintenanceFrequencyCode |
---|
47 | |
---|
48 | base = '/exist/rest/atoms' |
---|
49 | |
---|
50 | DS_pUBLISHED = 'published' |
---|
51 | DS_WORKING = 'working' |
---|
52 | DS_PUBLISHED = 'Published' |
---|
53 | docStatus = (DS_pUBLISHED, DS_WORKING, DS_PUBLISHED) |
---|
54 | |
---|
55 | DT_DEPLOYMENTS = 'deployments' |
---|
56 | DT_DATA_ENTITIES = 'data_entities' |
---|
57 | DT_DEPLOYMENT_DATA = 'deployment_data' |
---|
58 | DT_DATA_GRANULES = 'data_granules' |
---|
59 | docTypes = (DT_DEPLOYMENTS, DT_DATA_ENTITIES, DT_DEPLOYMENT_DATA, DT_DATA_GRANULES) |
---|
60 | |
---|
61 | DO_BADC = 'badc.nerc.ac.uk' |
---|
62 | DO_NEODC = 'neodc.nerc.ac.uk' |
---|
63 | DO_UKSSDC = 'ukssdc.ac.uk' |
---|
64 | |
---|
65 | CEDA = 'Centre for Environmental Data Archive' |
---|
66 | docOwners = (DO_BADC, DO_NEODC, DO_UKSSDC) |
---|
67 | |
---|
68 | atomNS = "{http://www.w3.org/2005/Atom}" |
---|
69 | existNS = "{http://exist.sourceforge.net/NS/exist}" |
---|
70 | molesNS = "{http://ndg.nerc.ac.uk/schema/moles2beta}" |
---|
71 | htmlNS = "{http://www.w3.org/1999/xhtml}" |
---|
72 | georssNS="{http://www.georss.org/georss/10}" |
---|
73 | gmlNS="{http://www.opengis.net/gml}" |
---|
74 | date_format = '%Y-%m-%d' |
---|
75 | datetime_format = date_format + 'T%H:%M:%SZ' |
---|
76 | ihost = 'bora.badc.rl.ac.uk' |
---|
77 | iport = '8080' |
---|
78 | |
---|
79 | linkMarkers = ['Deployment', 'DOWNLOAD', 'DOCUMENTATION', 'ACCESS', 'LOGO', 'ACTIVITY', 'DPT', 'OBS'] |
---|
80 | |
---|
81 | #MD_Identifier codes |
---|
82 | MD_CODE_MOLES2_CITATION = 'ceda_moles2_citation' |
---|
83 | |
---|
84 | htmlParser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("etree"), namespaceHTMLElements=False) |
---|
85 | |
---|
86 | def calculateHash(text): |
---|
87 | """ |
---|
88 | Returns an md5 hexadecimal representation of the given text |
---|
89 | @param text: the string to encode |
---|
90 | @return: the hexadecimal md5 value of the given text |
---|
91 | """ |
---|
92 | encoder = md5() |
---|
93 | encoder.update(text) |
---|
94 | return encoder.hexdigest() |
---|
95 | |
---|
96 | def buildExistDocPath(docStatus, docType, docOwner, docName): |
---|
97 | ''' |
---|
98 | @param docStatus: one value from commons.docStatus |
---|
99 | @param docType: one value from commons.docTypes |
---|
100 | @param docOwner: one value from commons.docOwners |
---|
101 | @param docName: one value from commons.docOwners |
---|
102 | ''' |
---|
103 | return '%s/%s' % (buildExistOwnerPath(docStatus, docType, docOwner), docName) |
---|
104 | |
---|
105 | def buildExistOwnerPath(docStatus, docType, docOwner): |
---|
106 | ''' |
---|
107 | @param docStatus: one value from commons.docStatus |
---|
108 | @param docType: one value from commons.docCollections |
---|
109 | @param docOwner: one value from commons.docOwners |
---|
110 | ''' |
---|
111 | return '%s/%s' % (buildExistTypePath(docStatus, docType), docOwner) |
---|
112 | |
---|
113 | def buildExistTypePath(docStatus, docType): |
---|
114 | ''' |
---|
115 | @param docStatus: one value from commons.docStatus |
---|
116 | @param docType: one value from commons.docCollections |
---|
117 | ''' |
---|
118 | return '%s/%s' % (buildExistStatusPath(docStatus), docType) |
---|
119 | |
---|
120 | def buildExistStatusPath(docStatus): |
---|
121 | ''' |
---|
122 | @param docStatus: one value from commons.docStatus |
---|
123 | ''' |
---|
124 | return '/exist/rest/atoms/%s' % (docStatus) |
---|
125 | |
---|
126 | def getAtomDocumentByMO(migrationObject): |
---|
127 | if migrationObject is None: |
---|
128 | raise Exception("migrationObject is None") |
---|
129 | mo_typeDict = {'DeploymentsMigration': DT_DEPLOYMENTS, 'DataEntityMigration': DT_DATA_ENTITIES, 'DeploymentDataMigration': DT_DEPLOYMENT_DATA} |
---|
130 | return getAtomDocumentAsElementtree(migrationObject.doc_status, mo_typeDict[type(migrationObject).__name__], migrationObject.doc_owner, migrationObject.doc_name) |
---|
131 | |
---|
132 | def getAtomDocumentHashByMO(migrationObject): |
---|
133 | if migrationObject is None: |
---|
134 | raise Exception("migrationObject is None") |
---|
135 | mo_typeDict = {'DeploymentsMigration': DT_DEPLOYMENTS, 'DataEntityMigration': DT_DATA_ENTITIES, 'DeploymentDataMigration': DT_DEPLOYMENT_DATA} |
---|
136 | text = _getAtomDocumentAsText(migrationObject.doc_status, mo_typeDict[type(migrationObject).__name__], migrationObject.doc_owner, migrationObject.doc_name) |
---|
137 | return calculateHash(text) |
---|
138 | |
---|
139 | def getAtomDocumentHash(docStatus, docType, docOwner, docName): |
---|
140 | source = buildExistDocPath(docStatus, docType, docOwner, docName) |
---|
141 | text = _getDocument(source) |
---|
142 | return calculateHash(text) |
---|
143 | |
---|
144 | def hasAtomDocumentSameHash(migrationObject): |
---|
145 | return getAtomDocumentHashByMO(migrationObject) == migrationObject.doc_hash |
---|
146 | |
---|
147 | def getAtomDocumentByType(migrationObject, docType): |
---|
148 | if migrationObject is None: |
---|
149 | raise Exception("migrationObject is None") |
---|
150 | return getAtomDocumentAsElementtree(migrationObject.doc_status, docType, migrationObject.doc_owner, migrationObject.doc_name) |
---|
151 | |
---|
152 | def _getAtomDocumentAsText(docStatus, docType, docOwner, docName): |
---|
153 | source = buildExistDocPath(docStatus, docType, docOwner, docName) |
---|
154 | return _getDocument(source) |
---|
155 | |
---|
156 | def getAtomDocumentAsElementtree(docStatus, docType, docOwner, docName): |
---|
157 | source = buildExistDocPath(docStatus, docType, docOwner, docName) |
---|
158 | return _getXMLDocument(source) |
---|
159 | |
---|
160 | def _getXMLDocument(source): |
---|
161 | return XML(_getDocument(source)) |
---|
162 | |
---|
163 | def stringToTimestamp(timestring): |
---|
164 | ''' |
---|
165 | Return a timestamp such as is returned by time.time(). |
---|
166 | @param timestring: a time string formatted as '%Y-%m-%dT%H:%M:%SZ' |
---|
167 | ''' |
---|
168 | return datetime.datetime.fromtimestamp(time.mktime(time.strptime(timestring, datetime_format))) |
---|
169 | |
---|
170 | def isoDateStringToTimeDate(datestring): |
---|
171 | ''' |
---|
172 | Return a datatime.datatime instance. |
---|
173 | @param datestring: a date string formatted as '%Y-%m-%d' |
---|
174 | ''' |
---|
175 | return datetime.datetime.strptime(datestring, date_format) |
---|
176 | |
---|
177 | def isoDateTimeStringToTimeDate(timestring): |
---|
178 | ''' |
---|
179 | Return a datatime.datatime instance. |
---|
180 | @param timestring: a time string formatted as '%Y-%m-%dT%H:%M:%SZ' |
---|
181 | ''' |
---|
182 | return datetime.datetime.strptime(timestring, datetime_format) |
---|
183 | |
---|
184 | def _getDocument(source): |
---|
185 | conn = HTTPConnection(host = ihost, port = iport) |
---|
186 | conn.connect() |
---|
187 | req = conn.request('GET', source) |
---|
188 | res = conn.getresponse() |
---|
189 | xmlDoc = res.read() |
---|
190 | ''' |
---|
191 | print (xmlDoc) |
---|
192 | ''' |
---|
193 | conn.close() |
---|
194 | return xmlDoc |
---|
195 | |
---|
196 | def _returnNotNoneText(element): |
---|
197 | if element is None: |
---|
198 | return None |
---|
199 | return element.text |
---|
200 | |
---|
201 | def findMolesCreationDate(resourceXML): |
---|
202 | creationDate = resourceXML.find('%sentity/%smolesISO/%screated' % (molesNS, molesNS, molesNS)) |
---|
203 | return _returnNotNoneText(creationDate) |
---|
204 | |
---|
205 | def findMolesLineage(dataEntityMigration): |
---|
206 | resourceXML = getAtomDocumentByMO(dataEntityMigration) |
---|
207 | lineage = resourceXML.find('%sentity/%smolesISO/%slineage' % (molesNS, molesNS, molesNS)) |
---|
208 | if lineage is None: |
---|
209 | raise NoDataLineage(dataEntityMigration) |
---|
210 | return lineage.text |
---|
211 | |
---|
212 | def extractMolesProviderID(migrationObject): |
---|
213 | resourceXML = getAtomDocumentByMO(migrationObject) |
---|
214 | provider_id = resourceXML.find('%sentity/%smolesISO/%sproviderID' % (molesNS, molesNS, molesNS)) |
---|
215 | return _returnNotNoneText(provider_id) |
---|
216 | |
---|
217 | def extractMolesQuality(migrationObject): |
---|
218 | resourceXML = getAtomDocumentByMO(migrationObject) |
---|
219 | quality = resourceXML.find('%sentity/%smolesISO/%squality' % (molesNS, molesNS, molesNS)) |
---|
220 | return _returnNotNoneText(quality) |
---|
221 | |
---|
222 | def extractMolesTemporalRange(migrationObject): |
---|
223 | resourceXML = getAtomDocumentByMO(migrationObject) |
---|
224 | temporal_range = resourceXML.find('%stemporalRange' % (molesNS)) |
---|
225 | return _returnNotNoneText(temporal_range) |
---|
226 | |
---|
227 | def extractMolesCreationDate(migrationObject): |
---|
228 | resourceXML = getAtomDocumentByMO(migrationObject) |
---|
229 | return findMolesCreationDate(resourceXML) |
---|
230 | |
---|
231 | def extractQuality(dataEntityMigration): |
---|
232 | resourceXML = getAtomDocumentByMO(dataEntityMigration) |
---|
233 | quality = resourceXML.find('%sentity/%smolesISO/%squality' % (molesNS, molesNS, molesNS)) |
---|
234 | return _returnNotNoneText(quality) |
---|
235 | |
---|
236 | def extractUpdateFrequency(dataEntityMigration): |
---|
237 | resourceXML = getAtomDocumentByMO(dataEntityMigration) |
---|
238 | update_frequency = resourceXML.find('%sentity/%smolesISO/%supdateFrequency' % (molesNS, molesNS, molesNS)) |
---|
239 | return _returnNotNoneText(update_frequency) |
---|
240 | |
---|
241 | def extractContent(dataEntityMigration): |
---|
242 | """ |
---|
243 | Returns a dictionary containing the div composing the |
---|
244 | <content> element in a dataentity document. |
---|
245 | """ |
---|
246 | resourceXML = getAtomDocumentByMO(dataEntityMigration) |
---|
247 | content = resourceXML.find('%scontent' % (atomNS)) |
---|
248 | text = _returnNotNoneText(content) |
---|
249 | contentDict = {} |
---|
250 | if text: |
---|
251 | doc = htmlParser.parse(unescape(text)) |
---|
252 | for el in doc.findall('body//div'): |
---|
253 | prop = el.get('property') |
---|
254 | if prop: |
---|
255 | if prop.startswith('cedacat'): |
---|
256 | contentDict[prop.split(':')[1]] = escape(tostring(el)) |
---|
257 | return contentDict |
---|
258 | |
---|
259 | def _extractAuthors(authorsCSV): |
---|
260 | if authorsCSV is None: |
---|
261 | return [] |
---|
262 | authors = authorsCSV.split(',') |
---|
263 | for index in range(len(authors)): |
---|
264 | authors[index] = authors[index].strip() |
---|
265 | if len(authors[index]) == 0: |
---|
266 | authors.remove(authors[index]) |
---|
267 | return authors |
---|
268 | |
---|
269 | def findAuthorsInResource(resourceMigration): |
---|
270 | ''' |
---|
271 | Returns a dictionary with the following keys: |
---|
272 | 'authors': a list of string representing the authors |
---|
273 | 'contributors': a list of string representing the contributors |
---|
274 | ''' |
---|
275 | ret = {} |
---|
276 | resourceXML = getAtomDocumentByMO(resourceMigration) |
---|
277 | ret['authors'] = findAuthorInResource(resourceXML) |
---|
278 | ret['contributors'] = findContributorInResource(resourceXML) |
---|
279 | return ret |
---|
280 | |
---|
281 | def findAuthorInResource(resourceXML): |
---|
282 | author = resourceXML.find('%sauthor/%sname' % (atomNS, atomNS)) |
---|
283 | return _returnNotNoneText(author) |
---|
284 | |
---|
285 | def findContributorInResource(resourceXML): |
---|
286 | contributors = resourceXML.find('%scontributor/%sname' % (atomNS, atomNS)) |
---|
287 | return _returnNotNoneText(contributors) |
---|
288 | |
---|
289 | def findPublishedDate(resourceMigration): |
---|
290 | resourceXML = getAtomDocumentByMO(resourceMigration) |
---|
291 | publishedDate = resourceXML.find('%spublished' % (atomNS)) |
---|
292 | return _returnNotNoneText(publishedDate) |
---|
293 | |
---|
294 | def findUpdatedDate(resourceMigration): |
---|
295 | resourceXML = getAtomDocumentByMO(resourceMigration) |
---|
296 | publishedDate = resourceXML.find('%supdated' % (atomNS)) |
---|
297 | return _returnNotNoneText(publishedDate) |
---|
298 | |
---|
299 | def findSummary(resourceMigration): |
---|
300 | """ |
---|
301 | Returns the <entry><summary> tag of an atom document |
---|
302 | @param resourceMigration: a MigrationObject instance |
---|
303 | @return: the <summary> value or None if empty |
---|
304 | """ |
---|
305 | resourceXML = getAtomDocumentByMO(resourceMigration) |
---|
306 | summary = resourceXML.find('%ssummary' % (atomNS)) |
---|
307 | return _returnNotNoneText(summary) |
---|
308 | |
---|
309 | def findID(dataEntXML): |
---|
310 | ent_id = dataEntXML.find('%sid' % (atomNS)) |
---|
311 | return _returnNotNoneText(ent_id) |
---|
312 | |
---|
313 | def _updateLinksDict(linksDict, link, linkMarker): |
---|
314 | if not linksDict.has_key(linkMarker): |
---|
315 | linksDict[linkMarker] = [] |
---|
316 | rel = link.get('rel') |
---|
317 | if rel and rel.endswith('/' + linkMarker): |
---|
318 | linksDict[linkMarker].append({'href': link.get('href'), 'title': link.get('title')}) |
---|
319 | |
---|
320 | def _extractLinks(dataEntXML, markers): |
---|
321 | linksDict = {} |
---|
322 | links = dataEntXML.findall('%slink' % (atomNS)) |
---|
323 | for link in links: |
---|
324 | for marker in markers: |
---|
325 | _updateLinksDict(linksDict, link, marker) |
---|
326 | return linksDict |
---|
327 | |
---|
328 | def findLinksInMigrationDocument(dataEntityMigration): |
---|
329 | dataEntXML = getAtomDocumentByMO(dataEntityMigration) |
---|
330 | linksDict = _extractLinks(dataEntXML, linkMarkers) |
---|
331 | return linksDict |
---|
332 | |
---|
333 | |
---|
334 | |
---|
335 | def findDownloadLinksInMigrationDocument(migrationObject): |
---|
336 | """ |
---|
337 | Return a list of dictionaries describing a <link rel="...DOWNLOAD..."> tag type |
---|
338 | Each dictionary has two keys: 'href' and 'title' |
---|
339 | @param migrationObject: the migration instance to retrieve and parse |
---|
340 | @return: a list of dictionaries |
---|
341 | """ |
---|
342 | linksDict = findLinksInMigrationDocument(migrationObject) |
---|
343 | if linksDict.has_key('DOWNLOAD'): |
---|
344 | return linksDict['DOWNLOAD'] |
---|
345 | return {} |
---|
346 | |
---|
347 | def findAccessLinksInMigrationDocument(migrationObject): |
---|
348 | """ |
---|
349 | Return a list of dictionaries describing a <link rel="...ACCESS..."> tag type |
---|
350 | Each dictionary has two keys: 'href' and 'title' |
---|
351 | @param migrationObject: the migration instance to retrieve and parse |
---|
352 | @return: a list of dictionaries |
---|
353 | """ |
---|
354 | linksDict = findLinksInMigrationDocument(migrationObject) |
---|
355 | if linksDict.has_key('ACCESS'): |
---|
356 | return linksDict['ACCESS'] |
---|
357 | return {} |
---|
358 | |
---|
359 | def findDocumentationInMigrationDocument(migrationObject): |
---|
360 | """ |
---|
361 | Return a list of dictionaries describing a <link rel="...DOCUMENTATION..."> tag type |
---|
362 | Each dictionary has two keys: 'href' and 'title' |
---|
363 | @param migrationObject: the migration instance to retrieve and parse |
---|
364 | @return: a list of dictionaries |
---|
365 | """ |
---|
366 | linksDict = findLinksInMigrationDocument(migrationObject) |
---|
367 | if linksDict.has_key('DOCUMENTATION'): |
---|
368 | return linksDict['DOCUMENTATION'] |
---|
369 | return {} |
---|
370 | |
---|
371 | def findDOIInMigrationDocument(migrationObject): |
---|
372 | """ |
---|
373 | Return a dictionary describing a <link rel="...DOCUMENTATION..."> tag type |
---|
374 | The dictionary has two keys: 'href' and 'title' |
---|
375 | @param migrationObject: the migration instance to retrieve and parse |
---|
376 | @return: a dictionary relative to the DOI, None otherwise |
---|
377 | """ |
---|
378 | for link in findDocumentationInMigrationDocument(migrationObject): |
---|
379 | if link['href'].startswith('doi:'): |
---|
380 | return link |
---|
381 | return None |
---|
382 | |
---|
383 | def findDeploymentsInDE(dataEntityMigration): |
---|
384 | linksDict = findLinksInMigrationDocument(dataEntityMigration) |
---|
385 | links = _extractLinksByMarker(linksDict, 'Deployment') |
---|
386 | return [depName + '.atom' for depName in links] |
---|
387 | |
---|
388 | def findSubTypeInDPT(resourceMigration): |
---|
389 | resourceXML = getAtomDocumentByMO(resourceMigration) |
---|
390 | categories = resourceXML.findall('%scategory' % (atomNS)) |
---|
391 | for category in categories: |
---|
392 | if category.get("term") == "ATOM_SUBTYPE": |
---|
393 | return category.get("label") |
---|
394 | |
---|
395 | def extractTitle(deploymentMigration): |
---|
396 | resourceXML = getAtomDocumentByMO(deploymentMigration) |
---|
397 | title = resourceXML.find('%stitle' % (atomNS)) |
---|
398 | return _returnNotNoneText(title) |
---|
399 | |
---|
400 | def extractSummary(deploymentMigration, dataEntityMigration): |
---|
401 | resourceXML = getAtomDocumentByMO(deploymentMigration) |
---|
402 | summary = resourceXML.find('%ssummary' % (atomNS)) |
---|
403 | ret = _returnNotNoneText(summary) |
---|
404 | if ret: |
---|
405 | return ret |
---|
406 | resourceXML = getAtomDocumentByMO(dataEntityMigration) |
---|
407 | summary = resourceXML.find('%ssummary' % (atomNS)) |
---|
408 | return _returnNotNoneText(summary) |
---|
409 | |
---|
410 | def extractGeographicExtentInMigrationDocument(migrationObject): |
---|
411 | """ |
---|
412 | Extracts if existing the georss:where/gel:Enveloper/upper-lowerCorner elements. |
---|
413 | @param migrationObject: a migration object to retrieve to parse for data |
---|
414 | @return: None if no data are found, otherwise a dictionary with keys: 'east', 'north', 'west', 'south' where |
---|
415 | the values are float |
---|
416 | """ |
---|
417 | resourceXML = getAtomDocumentByMO(migrationObject) |
---|
418 | upperCorner = resourceXML.find('%swhere/%sEnvelope/%supperCorner' % (georssNS, gmlNS, gmlNS)) |
---|
419 | lowerCorner = resourceXML.find('%swhere/%sEnvelope/%slowerCorner' % (georssNS, gmlNS, gmlNS)) |
---|
420 | ret = None |
---|
421 | if upperCorner != None and lowerCorner != None: |
---|
422 | upperCornerData = upperCorner.text.split() |
---|
423 | lowerCornerData = lowerCorner.text.split() |
---|
424 | ret = {'east': float(upperCornerData[0]), 'north': float(upperCornerData[1]), 'west': float(lowerCornerData[0]), 'south': float(lowerCornerData[1])} |
---|
425 | return ret |
---|
426 | |
---|
427 | def findLinksInDeployment(migrationObject): |
---|
428 | """ |
---|
429 | Returns a dictionary of links owned by the given dataEntity document |
---|
430 | @param deploymentMigration: a MigrationObject instance |
---|
431 | @return: a dictionary of links. |
---|
432 | """ |
---|
433 | links = {} |
---|
434 | linksDict = findLinksInMigrationDocument(migrationObject) |
---|
435 | for marker in linkMarkers: |
---|
436 | links[marker] = _extractLinksByMarker(linksDict, marker) |
---|
437 | return links |
---|
438 | |
---|
439 | def _extractLinksByMarker(linksDict, marker): |
---|
440 | dpt = [] |
---|
441 | if linksDict.has_key(marker): |
---|
442 | for link in linksDict[marker]: |
---|
443 | try: |
---|
444 | linkLongName = link['href'].split('/')[-1] |
---|
445 | linkName = linkLongName.rsplit('__ATOM__')[1] |
---|
446 | dpt.append(linkName) |
---|
447 | except Exception as ex: |
---|
448 | print "WARN - Cannot extractLinksByMarker %s" % (link) |
---|
449 | return dpt |
---|
450 | |
---|
451 | |
---|
452 | def getResourceRefs(deploymentRefs): |
---|
453 | ''' |
---|
454 | Returns a list of Elements representing the inner resource reference items |
---|
455 | @param resourceRefs: the name of the eXist collection name below the 'deployments' one |
---|
456 | ''' |
---|
457 | XMLDepl = _getXMLDocument(deploymentRefs) |
---|
458 | return XMLDepl.findall('%scollection/%sresource' % (existNS, existNS)) |
---|
459 | |
---|
460 | def getOwnerRefs(docStatus, docType, docOwner): |
---|
461 | ''' |
---|
462 | Returns a list of Elements representing the inner resource reference items |
---|
463 | @param resourceRefs: the name of the eXist collection name below the 'deployments' one |
---|
464 | ''' |
---|
465 | XMLDepl = _getXMLDocument(buildExistOwnerPath(docStatus, docType, docOwner)) |
---|
466 | return XMLDepl.findall('%scollection/%sresource' % (existNS, existNS)) |
---|
467 | |
---|
468 | def getTypeRefs(docStatus, docType): |
---|
469 | ''' |
---|
470 | Returns a list of Elements representing the inner resource reference items |
---|
471 | @param resourceRefs: the name of the eXist collection name below the 'deployments' one |
---|
472 | ''' |
---|
473 | XMLDepl = _getXMLDocument(buildExistTypePath(docStatus, docType)) |
---|
474 | return XMLDepl.findall('%scollection/%sresource' % (existNS, existNS)) |
---|
475 | |
---|
476 | |
---|
477 | |
---|
478 | def getCollectionRefs(publishedRefs): |
---|
479 | ''' |
---|
480 | Returns a list of Elements representing the inner deployment reference items |
---|
481 | @param basePublished: the name of the eXist collection name below the 'published' one |
---|
482 | ''' |
---|
483 | XMLPubl = _getXMLDocument(publishedRefs) |
---|
484 | return XMLPubl.findall('%scollection/%scollection' % (existNS, existNS)) |
---|
485 | |
---|
486 | def getResource(source, resourceName): |
---|
487 | resourceSource = '%s/%s' % (source, resourceName) |
---|
488 | resourceDoc = _getDocument(resourceSource) |
---|
489 | return XML(resourceDoc) |
---|
490 | |
---|
491 | def createDateTime(datetime): |
---|
492 | ''' |
---|
493 | Creates a new DateTime instance. |
---|
494 | @param datetime: a datetime.datetime instance |
---|
495 | ''' |
---|
496 | dateTime = DateTime() |
---|
497 | dateTime.century = (datetime.year / 100) + 1 |
---|
498 | dateTime.year = datetime.year |
---|
499 | dateTime.month = datetime.month |
---|
500 | dateTime.day = datetime.day |
---|
501 | dateTime.hour = datetime.hour |
---|
502 | dateTime.minute = datetime.minute |
---|
503 | dateTime.second = datetime.second |
---|
504 | dateTime.timeZone = datetime.tzinfo |
---|
505 | return dateTime |
---|
506 | |
---|
507 | def createDate(date): |
---|
508 | ''' |
---|
509 | Creates a new Date instance. |
---|
510 | @param date: a datetime.datetime instance |
---|
511 | ''' |
---|
512 | idate = Date() |
---|
513 | idate.century = (date.year / 100) + 1 |
---|
514 | idate.year = date.year |
---|
515 | idate.month = date.month |
---|
516 | idate.day = date.day |
---|
517 | return idate |
---|
518 | |
---|
519 | def createTM_Position(anyOther = None, date8601 = None, dateTime8601 = None, time8601 = None): |
---|
520 | ''' |
---|
521 | Creates a new TM_Position instance |
---|
522 | @param anyOther: a TM_TemporalPosition instance |
---|
523 | @param date8601: a Date instance |
---|
524 | @param dateTime8601:a DateTime instance |
---|
525 | @param time8601: a Time instance |
---|
526 | ''' |
---|
527 | tm_position = TM_Position() |
---|
528 | if anyOther: |
---|
529 | tm_position.anyOther = anyOther |
---|
530 | if date8601: |
---|
531 | tm_position.date8601 = date8601 |
---|
532 | if dateTime8601: |
---|
533 | tm_position.dateTime8601 = dateTime8601 |
---|
534 | if time8601: |
---|
535 | tm_position.time8601 = time8601 |
---|
536 | return tm_position |
---|
537 | |
---|
538 | def createTM_Instant(position): |
---|
539 | ''' |
---|
540 | Creates a new TM_Position instance |
---|
541 | @param position: a TM_Position instance |
---|
542 | ''' |
---|
543 | tm_instant = TM_Instant() |
---|
544 | tm_instant.position = position |
---|
545 | return tm_instant |
---|
546 | |
---|
547 | def createTM_Period(begin, end): |
---|
548 | ''' |
---|
549 | Creates a new TM_Position instance |
---|
550 | @param position: a TM_Position instance |
---|
551 | ''' |
---|
552 | tm_period = TM_Period() |
---|
553 | tm_period.begin = begin |
---|
554 | tm_period.end = end |
---|
555 | return tm_period |
---|
556 | |
---|
557 | def createCI_Address(deliveryPoint = None, electronicMailAddress = None, city = None, country = None, postalCode = None): |
---|
558 | ''' |
---|
559 | Creates a new CI_Address instance |
---|
560 | @param deliveryPoint: an array of Strings |
---|
561 | @param electronicMailAddress: an array of Strings |
---|
562 | @param city: a string |
---|
563 | @param country: a string |
---|
564 | @param postalCode: a string |
---|
565 | ''' |
---|
566 | ci_address = CI_Address() |
---|
567 | if deliveryPoint: |
---|
568 | ci_address.deliveryPoint = deliveryPoint |
---|
569 | if electronicMailAddress: |
---|
570 | ci_address.electronicMailAddress = electronicMailAddress |
---|
571 | if postalCode: |
---|
572 | ci_address.postalCode = postalCode |
---|
573 | if city: |
---|
574 | ci_address.city = city |
---|
575 | if country: |
---|
576 | ci_address.country = country |
---|
577 | if postalCode: |
---|
578 | ci_address.postalCode = postalCode |
---|
579 | return ci_address |
---|
580 | |
---|
581 | def createCI_OnlineResource(linkage, name=None): |
---|
582 | ''' |
---|
583 | Creates a new CI_OnlineResource instance |
---|
584 | @param linkage: a string (the URL class is associated with a String) |
---|
585 | @param name: a String |
---|
586 | ''' |
---|
587 | ci_online_resource = CI_OnlineResource() |
---|
588 | ci_online_resource.linkage = linkage |
---|
589 | if name: |
---|
590 | ci_online_resource.name = name |
---|
591 | return ci_online_resource |
---|
592 | |
---|
593 | def createCI_Telephone(voice=None, facsimile=None): |
---|
594 | ''' |
---|
595 | Creates a new CI_Telephone instance |
---|
596 | @param voice: an array of strings |
---|
597 | @param facsimile: an array of strings |
---|
598 | ''' |
---|
599 | ci_telephone = CI_Telephone() |
---|
600 | if voice: |
---|
601 | ci_telephone.voice = voice |
---|
602 | if facsimile: |
---|
603 | ci_telephone.facsimile = voice |
---|
604 | return ci_telephone |
---|
605 | |
---|
606 | def createCI_Contact(phone, address = None, onlineResource = None): |
---|
607 | ''' |
---|
608 | Creates a new CI_Contact instance |
---|
609 | @param phone: a CI_Telephone instance |
---|
610 | @param address: a CI_Address instance |
---|
611 | @param onlineResource: a CI_OnlineResource instance |
---|
612 | ''' |
---|
613 | ci_contact = CI_Contact() |
---|
614 | ci_contact.phone = phone |
---|
615 | if address: |
---|
616 | ci_contact.address = address |
---|
617 | if onlineResource: |
---|
618 | ci_contact.onlineResource = onlineResource |
---|
619 | return ci_contact |
---|
620 | |
---|
621 | def createCI_Individual(name = None, contactInfo = None): |
---|
622 | ''' |
---|
623 | Creates a new CI_Individual instance |
---|
624 | @param name: a String |
---|
625 | @param contactInfo: an array of CI_Contact |
---|
626 | ''' |
---|
627 | ci_party = CI_Individual() |
---|
628 | if name: |
---|
629 | ci_party.name = name |
---|
630 | if contactInfo: |
---|
631 | ci_party.contactInfo = contactInfo |
---|
632 | return ci_party |
---|
633 | |
---|
634 | def createCI_Organization(name = None, contactInfo = None): |
---|
635 | ''' |
---|
636 | Creates a new CI_Individual instance |
---|
637 | @param name: a String |
---|
638 | @param contactInfo: an array of CI_Contact |
---|
639 | ''' |
---|
640 | ci_party = CI_Organisation() |
---|
641 | if name: |
---|
642 | ci_party.name = name |
---|
643 | if contactInfo: |
---|
644 | ci_party.contactInfo = contactInfo |
---|
645 | return ci_party |
---|
646 | |
---|
647 | def createMO_Organization(name = None, contactInfo = None): |
---|
648 | ''' |
---|
649 | Creates a new MO_Organization instance. |
---|
650 | TEMPORARILY RETURNS A CI_ORGANIZATION BECAUSE A DB SCHEMA PROBLEM!!!! |
---|
651 | @param name: a String |
---|
652 | @param contactInfo: an array of CI_Contact |
---|
653 | ''' |
---|
654 | ci_party = CI_Organisation() |
---|
655 | if name: |
---|
656 | ci_party.name = name |
---|
657 | if contactInfo: |
---|
658 | ci_party.contactInfo = contactInfo |
---|
659 | return ci_party |
---|
660 | |
---|
661 | def createMO_ResponsiblePartyInfo(role, i_party): |
---|
662 | """ |
---|
663 | @param role: a CI_RoleCode/MO_RoleValue assigned to this ResponsibleParty |
---|
664 | @param party: a list of MO_Organization/CI_Individual instances |
---|
665 | """ |
---|
666 | mo_responsableInfo = MO_ResponsiblePartyInfo() |
---|
667 | mo_responsableInfo.role = role |
---|
668 | mo_responsableInfo.party.extend(i_party) |
---|
669 | return mo_responsableInfo |
---|
670 | |
---|
671 | |
---|
672 | def createCI_Date(dateType, date = None): |
---|
673 | """ |
---|
674 | Creates a new CI_Date |
---|
675 | @param dateType: a CI_DateTypeCode value |
---|
676 | @param date: a DateTime instance |
---|
677 | """ |
---|
678 | ci_date = CI_Date() |
---|
679 | ci_date.dateType = dateType |
---|
680 | if date: |
---|
681 | ci_date.date = date |
---|
682 | return ci_date |
---|
683 | |
---|
684 | def createCI_Citation(title, date = None, citedResponsibleParty = None): |
---|
685 | """ |
---|
686 | Creates a new CI_Citation |
---|
687 | @param title: the CI_Citation title |
---|
688 | @param date: an array of CI_Date instances |
---|
689 | @param citedResponsibleParty: a list of CI_ResponsibleParty instances |
---|
690 | """ |
---|
691 | ci_citation = CI_Citation() |
---|
692 | ci_citation.title = title |
---|
693 | if date and type(date) == list: |
---|
694 | ci_citation.date = date |
---|
695 | if citedResponsibleParty: |
---|
696 | ci_citation.extend(citedResponsibleParty) |
---|
697 | return ci_citation |
---|
698 | |
---|
699 | def createMD_Constraints(useLimitation = None): |
---|
700 | """ |
---|
701 | Creates a new MD_Constrains |
---|
702 | @param useLimitation: a string array |
---|
703 | """ |
---|
704 | md_constraints = MD_Constraints() |
---|
705 | if useLimitation and type(useLimitation) == list: |
---|
706 | md_constraints.useLimitation = useLimitation |
---|
707 | return md_constraints |
---|
708 | |
---|
709 | def createMD_LegalConstraints(useLimitation = None, accessConstrains = None): |
---|
710 | """ |
---|
711 | Creates a new MD_LegalConstrains |
---|
712 | @param useLimitation: a string array |
---|
713 | @param accessConstrains: an MD_RestrictionCode array |
---|
714 | """ |
---|
715 | md_legalconstraints = MD_LegalConstraints() |
---|
716 | if useLimitation and type(useLimitation) == list: |
---|
717 | md_legalconstraints.useLimitation = useLimitation |
---|
718 | if accessConstrains and type(accessConstrains) == list: |
---|
719 | md_legalconstraints.accessConstrains = accessConstrains |
---|
720 | return md_legalconstraints |
---|
721 | |
---|
722 | def createMD_Identifier(code, authority = None): |
---|
723 | """ |
---|
724 | Creates a new MD_Identifier |
---|
725 | @param code: a String |
---|
726 | @param authority: a CI_Citation instance |
---|
727 | """ |
---|
728 | md_identifier = MD_Identifier() |
---|
729 | md_identifier.code = code |
---|
730 | if authority: |
---|
731 | md_identifier.authority = authority |
---|
732 | return md_identifier |
---|
733 | |
---|
734 | def createCI_ResponsibleParty(role, organizationName = None, individualName = None): |
---|
735 | """ |
---|
736 | Creates a new CI_ResponsibeParty |
---|
737 | @param role: a CI_RoleCode |
---|
738 | """ |
---|
739 | ci_responsible_party = CI_ResponsibleParty() |
---|
740 | ci_responsible_party.role = role |
---|
741 | if organizationName: |
---|
742 | ci_responsible_party.organisationName = organizationName |
---|
743 | if individualName: |
---|
744 | ci_responsible_party.individualName = individualName |
---|
745 | return ci_responsible_party |
---|
746 | |
---|
747 | def createMD_Metadata(date_stamp, contact, language = None): |
---|
748 | """ |
---|
749 | Creates a new MD_Metadata |
---|
750 | @param date_stamp: a Date instance |
---|
751 | @param contacts: a CI_ResponsibleParty array instances |
---|
752 | @param language: a string |
---|
753 | """ |
---|
754 | md_metadata = MD_Metadata() |
---|
755 | md_metadata.dateStamp = date_stamp |
---|
756 | for item in contact: |
---|
757 | md_metadata.contact.append(item) |
---|
758 | if language: |
---|
759 | md_metadata.language = language |
---|
760 | return md_metadata |
---|
761 | |
---|
762 | def createMO_OnlineResource(linkage, name = None, function = None, description = None, applicationProfile = None): |
---|
763 | """ |
---|
764 | Creates a new CEDA_Result |
---|
765 | @param linkage: the MO_OnlineResource.linkage.url.??? field |
---|
766 | @param name: the MO_OnlineResource.linkage.name field |
---|
767 | @param function: the MO_OnlineResource.function field |
---|
768 | @param description: the MO_OnlineResource.description field |
---|
769 | @param applicationProfile: the MO_OnlineResource.applicationProfile field |
---|
770 | """ |
---|
771 | on_line_resource = MO_OnlineResource() |
---|
772 | on_line_resource.linkage = linkage |
---|
773 | if name: |
---|
774 | on_line_resource.name = name |
---|
775 | if function: |
---|
776 | on_line_resource.function = function |
---|
777 | if description: |
---|
778 | on_line_resource.description = description |
---|
779 | if applicationProfile: |
---|
780 | on_line_resource.applicationProfile = applicationProfile |
---|
781 | return on_line_resource |
---|
782 | |
---|
783 | def createCEDA_Result(curation_category, internal_path, source = None): |
---|
784 | """ |
---|
785 | Creates a new CEDA_Result |
---|
786 | @param curation_category: a CEDA_CurationValue instance |
---|
787 | @param internal_path: a String |
---|
788 | @param source: an array of MO_OnlineResource instances |
---|
789 | """ |
---|
790 | ceda_result = CEDA_Result() |
---|
791 | ceda_result.curationCategory = curation_category |
---|
792 | ceda_result.internalPath = internal_path |
---|
793 | if source: |
---|
794 | ceda_result.source = source |
---|
795 | return ceda_result |
---|
796 | |
---|
797 | |
---|
798 | |
---|
799 | def createDQ_ConformanceResult(explanation, pass_, specification): |
---|
800 | """ |
---|
801 | Creates a DQ_ConformanceResult instance |
---|
802 | @param explanation: a String |
---|
803 | @param pass_: a boolean value |
---|
804 | @param specification: a CI_Citation instance |
---|
805 | """ |
---|
806 | dq_conformanceResult = DQ_ConformanceResult() |
---|
807 | dq_conformanceResult.explanation = explanation |
---|
808 | dq_conformanceResult.pass_ = pass_ |
---|
809 | dq_conformanceResult.specification = specification |
---|
810 | return dq_conformanceResult |
---|
811 | |
---|
812 | def createDQ_Element(result): |
---|
813 | """ |
---|
814 | Creates a DQ_Element instance |
---|
815 | @param result: a DQ_Result array (mix 1, max 2 items) |
---|
816 | """ |
---|
817 | dq_element = DQ_Element() |
---|
818 | if result and (type(result) == list) and (len(result) >= 1 and len(result) <= 2): |
---|
819 | dq_element.result = result |
---|
820 | return dq_element |
---|
821 | |
---|
822 | def createEX_GeographicBoundingBox(east, north, west, south): |
---|
823 | """ |
---|
824 | Creates an EX_GeographicBoundingBox instance |
---|
825 | @param east: the eastBoundLongitude attribute as float |
---|
826 | @param north: the northBoundLongitude attribute as float |
---|
827 | @param west: the westBoundLongitude attribute as float |
---|
828 | @param south: the southBoundLongitude attribute as float |
---|
829 | """ |
---|
830 | ex_geographic_bb = EX_GeographicBoundingBox() |
---|
831 | ex_geographic_bb.eastBoundLongitude = east |
---|
832 | ex_geographic_bb.northBoundLatitude = north |
---|
833 | ex_geographic_bb.westBoundLongitude = west |
---|
834 | ex_geographic_bb.southBoundLatitude = south |
---|
835 | return ex_geographic_bb |
---|
836 | |
---|
837 | def createCEDA_Processing(): |
---|
838 | ceda_processing = CEDA_Processing() |
---|
839 | return ceda_processing |
---|
840 | |
---|
841 | |
---|
842 | def createCEDA_Instrument(): |
---|
843 | ceda_instrument = CEDA_Instrument() |
---|
844 | return ceda_instrument |
---|
845 | |
---|
846 | def createCEDA_CompositeProcess(): |
---|
847 | ceda_cp = CEDA_CompositeProcess() |
---|
848 | return ceda_cp |
---|
849 | |
---|
850 | def createCEDA_Acquisition(): |
---|
851 | ceda_acquisition = CEDA_Acquisition() |
---|
852 | return ceda_acquisition |
---|
853 | |
---|
854 | def createCEDA_Review(reviewer, reviewFrequency, reviewStatus): |
---|
855 | """ |
---|
856 | Returns a new CEDA_Review |
---|
857 | @param reviewer: an MO_ResponsibilityPartyInfo |
---|
858 | @param reviewFrequency: a CEDA_ReviewFrequencyValue |
---|
859 | @param reviewStatus: a CEDA_ReviewStatusValue |
---|
860 | """ |
---|
861 | ceda_review = CEDA_Review() |
---|
862 | ceda_review.reviewer = reviewer |
---|
863 | ceda_review.reviewFrequency = reviewFrequency |
---|
864 | ceda_review.reviewStatus = reviewStatus |
---|
865 | return ceda_review |
---|
866 | |
---|
867 | def createCEDA_Project(abstract = None, publication_state = None, documentation = None, project_resource=None): |
---|
868 | ceda_project = CEDA_Project() |
---|
869 | if abstract: |
---|
870 | ceda_project.abstract = abstract |
---|
871 | if publication_state: |
---|
872 | ceda_project.publicationState = publication_state |
---|
873 | if documentation and type(documentation) == list: |
---|
874 | ceda_project.documentation = documentation |
---|
875 | if project_resource and type(project_resource) == list: |
---|
876 | ceda_project.projectResource = project_resource |
---|
877 | return ceda_project |
---|