source: mauRepo/MolesManager/trunk/src/libs/migration/processor/commons.py @ 8477

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/mauRepo/MolesManager/trunk/src/libs/migration/processor/commons.py@8477
Revision 8477, 39.0 KB checked in by mnagni, 8 years ago (diff)

Implemented adapters for TM_Instant, TM_Period and partially for CEDA_Observation

Added a Sphinx documentation folder

Line 
1'''
2BSD Licence
3Copyright (c) 2012, Science & Technology Facilities Council (STFC)
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification,
7are permitted provided that the following conditions are met:
8
9    * Redistributions of source code must retain the above copyright notice,
10        this list of conditions and the following disclaimer.
11    * Redistributions in binary form must reproduce the above copyright notice,
12        this list of conditions and the following disclaimer in the documentation
13        and/or other materials provided with the distribution.
14    * Neither the name of the Science & Technology Facilities Council (STFC)
15        nor the names of its contributors may be used to endorse or promote
16        products derived from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
23OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29Created on 15 Nov 2011
30
31@author: mnagni
32'''
33from ea_model.moles3_4.utilities.mo_responsiblepartyinfo import MO_ResponsiblePartyInfo
34from httplib import HTTPConnection
35from xml.etree.ElementTree import XML, tostring
36import time, datetime
37from libs.migration.exception.exceptions import NoDataLineage
38from hashlib import md5
39from xml.sax.saxutils import unescape, escape
40import html5lib
41from html5lib import treebuilders
42from ea_model.moles3_4.result.mo_onlineresource import MO_OnlineResource
43from ea_model.iso_19115_2006_metadata_corrigendum.reference_system_information.md_identifier import MD_Identifier
44from ea_model.iso_19115_2006_metadata_corrigendum.identification_information.md_keywords import MD_Keywords
45from ea_model.iso_19103_2005_schema_language.basic_types.primitive.date_and_time.datetime import DateTime
46from ea_model.iso_19103_2005_schema_language.basic_types.primitive.date_and_time.date import Date
47from ea_model.iso_19108_2006_temporal_schema.temporal_reference_system.tm_position import TM_Position
48from ea_model.iso_19108_2006_temporal_schema.temporal_objects.tm_instant import TM_Instant
49from ea_model.iso_19108_2006_temporal_schema.temporal_objects.tm_period import TM_Period
50from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_address import CI_Address
51from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_onlineresource import CI_OnlineResource
52from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_telephone import CI_Telephone
53from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_contact import CI_Contact
54from ea_model.moles3_4.utilities.mo_individual import MO_Individual
55from ea_model.moles3_4.utilities.mo_organisation import MO_Organisation
56from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_date import CI_Date
57from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_citation import CI_Citation
58from ea_model.iso_19115_2006_metadata_corrigendum.constraint_information.md_constraints import MD_Constraints
59from ea_model.iso_19115_2006_metadata_corrigendum.constraint_information.md_legalconstraints import MD_LegalConstraints
60from ea_model.iso_19115_2006_metadata_corrigendum.citation_and_responsible_party_information.ci_responsibleparty import CI_ResponsibleParty
61from ea_model.iso_19115_2006_metadata_corrigendum.metadata_entity_set_information.md_metadata import MD_Metadata
62from ea_model.iso_19115_2006_metadata_corrigendum.data_quality_information.dq_conformanceresult import DQ_ConformanceResult
63from ea_model.iso_19115_2006_metadata_corrigendum.extent_information.ex_geographicboundingbox import EX_GeographicBoundingBox
64from ea_model.iso_19115_2006_metadata_corrigendum.data_quality_information.dq_element import DQ_Element
65from ea_model.ceda_metadatamodel.ceda_project.ceda_project import CEDA_Project
66from ea_model.ceda_metadatamodel.ceda_utilities.ceda_review import CEDA_Review
67from ea_model.ceda_metadatamodel.ceda_acquisition.ceda_acquisition import CEDA_Acquisition
68from ea_model.ceda_metadatamodel.ceda_observationprocess.ceda_compositeprocess import CEDA_CompositeProcess
69from ea_model.ceda_metadatamodel.ceda_acquisition.ceda_instrument import CEDA_Instrument
70from ea_model.ceda_metadatamodel.ceda_computation.ceda_processing import CEDA_Processing
71from ea_model.ceda_metadatamodel.ceda_result.ceda_result import CEDA_Result
72import datetime
73
74base = '/exist/rest/atoms'
75
76DS_pUBLISHED = 'published'
77DS_WORKING = 'working'
78DS_PUBLISHED = 'Published'
79DOC_STATUS = (DS_pUBLISHED, DS_WORKING, DS_PUBLISHED)
80
81DT_DEPLOYMENTS = 'deployments'
82DT_DATA_ENTITIES = 'data_entities'
83DT_DEPLOYMENT_DATA = 'deployment_data'
84DT_DATA_GRANULES = 'data_granules'
85DOC_TYPES = (DT_DEPLOYMENTS, DT_DATA_ENTITIES, DT_DEPLOYMENT_DATA, DT_DATA_GRANULES)
86
87DO_BADC = 'badc.nerc.ac.uk'
88DO_NEODC = 'neodc.nerc.ac.uk'
89DO_UKSSDC = 'ukssdc.ac.uk'
90
91CEDA = 'Centre for Environmental Data Archive'
92docOwners = (DO_BADC, DO_NEODC, DO_UKSSDC)
93
94atomNS = "{http://www.w3.org/2005/Atom}"
95existNS = "{http://exist.sourceforge.net/NS/exist}"
96molesNS = "{http://ndg.nerc.ac.uk/schema/moles2beta}"
97htmlNS = "{http://www.w3.org/1999/xhtml}"
98georssNS="{http://www.georss.org/georss/10}"
99gmlNS="{http://www.opengis.net/gml}"
100date_format = '%Y-%m-%d'
101datetime_format = date_format + 'T%H:%M:%SZ'
102ihost = 'bora.badc.rl.ac.uk'
103iport = '8080'
104
105linkMarkers = ['Deployment', 'DOWNLOAD', 'DOCUMENTATION', 'ACCESS', 'LOGO', 'ACTIVITY', 'DPT', 'OBS']
106
107#MD_Identifier codes
108MD_CODE_MOLES2_CITATION = 'ceda_moles2_citation'
109
110htmlParser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("etree"), namespaceHTMLElements=False)
111
112def calculateHash(text):
113    """
114        Returns an md5 hexadecimal representation of the given text
115        @param text: the string to encode
116        @return: the hexadecimal md5 value of the given text
117    """
118    encoder = md5()
119    encoder.update(text)
120    return encoder.hexdigest()
121
122def buildExistDocPath(docStatus, docType, docOwner, docName):
123    '''
124        @param docStatus: one value from commons.docStatus
125        @param docType: one value from commons.docTypes
126        @param docOwner: one value from commons.docOwners
127        @param docName: one value from commons.docOwners       
128    '''       
129    return '%s/%s' % (buildExistOwnerPath(docStatus, docType, docOwner), docName)
130
131def buildExistOwnerPath(docStatus, docType, docOwner):
132    '''
133        @param docStatus: one value from commons.docStatus
134        @param docType: one value from commons.docCollections
135        @param docOwner: one value from commons.docOwners
136    '''       
137    return '%s/%s' % (buildExistTypePath(docStatus, docType), docOwner)
138
139def buildExistTypePath(docStatus, docType):
140    '''
141        @param docStatus: one value from commons.docStatus
142        @param docType: one value from commons.docCollections
143    '''       
144    return '%s/%s' % (buildExistStatusPath(docStatus), docType)
145
146def buildExistStatusPath(docStatus):
147    '''
148        @param docStatus: one value from commons.docStatus
149    '''       
150    return '/exist/rest/atoms/%s' % (docStatus)
151
152def getAtomDocumentByMO(migrationObject):
153    if migrationObject is None:
154        raise Exception("migrationObject is None")
155    mo_typeDict = {'DeploymentsMigration': DT_DEPLOYMENTS, 'DataEntityMigration': DT_DATA_ENTITIES, 'DeploymentDataMigration': DT_DEPLOYMENT_DATA}     
156    return getAtomDocumentAsElementtree(migrationObject.doc_status, mo_typeDict[type(migrationObject).__name__], migrationObject.doc_owner, migrationObject.doc_name)
157
158def getAtomDocumentHashByMO(migrationObject):
159    if migrationObject is None:
160        raise Exception("migrationObject is None")   
161    mo_typeDict = {'DeploymentsMigration': DT_DEPLOYMENTS, 'DataEntityMigration': DT_DATA_ENTITIES, 'DeploymentDataMigration': DT_DEPLOYMENT_DATA}     
162    text = _getAtomDocumentAsText(migrationObject.doc_status, mo_typeDict[type(migrationObject).__name__], migrationObject.doc_owner, migrationObject.doc_name)
163    return calculateHash(text)
164
165def getAtomDocumentHash(docStatus, docType, docOwner, docName):
166    source = buildExistDocPath(docStatus, docType, docOwner, docName)
167    text = _getDocument(source)
168    return calculateHash(text)
169
170def hasMOBeenProcessed(migrationObject):
171    '''
172        Checks if a migration object has been already processed.
173        @param migrationObject: an instance of DataEntityMigration or DeploymentsMigration
174        @return True if has been processed, otherwise False
175    '''
176    if migrationObject is None:
177        return False
178   
179    if not hasattr(migrationObject, 'doc_hash'):
180        return False
181    else:
182        if migrationObject.doc_hash is None:
183            return False
184       
185    return True
186
187def hasMOSameHash(migrationObject):
188    '''
189        Checks if a migration object has changed.
190        @param migrationObject: an instance of DataEntityMigration or DeploymentsMigration or DeploymentDataMigration
191        @return True if the hash of the actual document is the same of the migrationObject, otherwise False
192    '''
193    if not hasMOBeenProcessed(migrationObject):
194        return False
195    return getAtomDocumentHashByMO(migrationObject) == migrationObject.doc_hash
196
197def getAtomDocumentByType(migrationObject, docType):
198    if migrationObject is None:
199        raise Exception("migrationObject is None")   
200    return getAtomDocumentAsElementtree(migrationObject.doc_status, docType, migrationObject.doc_owner, migrationObject.doc_name)
201
202def _getAtomDocumentAsText(docStatus, docType, docOwner, docName):
203    source = buildExistDocPath(docStatus, docType, docOwner, docName)
204    return _getDocument(source)
205
206def getAtomDocumentAsElementtree(docStatus, docType, docOwner, docName):
207    source = buildExistDocPath(docStatus, docType, docOwner, docName)
208    return _getXMLDocument(source)
209
210def _getXMLDocument(source):
211    return XML(_getDocument(source))
212
213def stringToTimestamp(timestring):
214    '''
215        Return a timestamp such as is returned by time.time().
216        @param timestring: a time string formatted as '%Y-%m-%dT%H:%M:%SZ'
217    '''   
218    return datetime.datetime.fromtimestamp(time.mktime(time.strptime(timestring, datetime_format)))
219
220def isoDateStringToTimeDate(datestring):
221    '''
222        Return a datatime.datatime instance.
223        @param datestring: a date string formatted as '%Y-%m-%d'
224    '''
225    return datetime.datetime.strptime(datestring, date_format)
226
227def isoDateTimeStringToTimeDate(timestring):
228    '''
229        Return a datatime.datatime instance.
230        @param timestring: a time string formatted as '%Y-%m-%dT%H:%M:%SZ'
231    '''
232    try:
233        return datetime.datetime.strptime(timestring, datetime_format)
234    except:
235        pass
236
237def _getDocument(source):
238    conn = HTTPConnection(host = ihost, port = iport)
239    conn.connect()
240    req = conn.request('GET', source)
241    res = conn.getresponse()
242    xmlDoc = res.read()
243    '''
244    print (xmlDoc)
245    '''
246    conn.close()
247    return xmlDoc
248
249def _returnNotNoneText(element):
250    if element is None:
251        return None
252    return element.text
253
254def findMolesCreationDate(resourceXML):
255    creationDate = resourceXML.find('%sentity/%smolesISO/%screated' % (molesNS, molesNS, molesNS))
256    return _returnNotNoneText(creationDate)
257
258def findMolesPublishedDate(resourceXML):
259    creationDate = resourceXML.find('%sentity/%smolesISO/%spublished' % (molesNS, molesNS, molesNS))
260    return _returnNotNoneText(creationDate)
261
262def findMolesLineage(dataEntityMigration):
263    resourceXML = getAtomDocumentByMO(dataEntityMigration)
264    lineage = resourceXML.find('%sentity/%smolesISO/%slineage' % (molesNS, molesNS, molesNS))
265    if lineage is None:
266        raise NoDataLineage(dataEntityMigration)
267    return lineage.text
268
269def extractMolesProviderID(migrationObject):
270    resourceXML = getAtomDocumentByMO(migrationObject)
271    provider_id = resourceXML.find('%sentity/%smolesISO/%sproviderID' % (molesNS, molesNS, molesNS))
272    return _returnNotNoneText(provider_id)
273
274def extractMolesQuality(migrationObject):
275    resourceXML = getAtomDocumentByMO(migrationObject)
276    quality = resourceXML.find('%sentity/%smolesISO/%squality' % (molesNS, molesNS, molesNS))
277    return _returnNotNoneText(quality)
278
279def extractMolesTemporalRange(migrationObject):
280    resourceXML = getAtomDocumentByMO(migrationObject)
281    temporal_range = resourceXML.find('%stemporalRange' % (molesNS))
282    return _returnNotNoneText(temporal_range)
283
284def extractMolesCreationDate(migrationObject):
285    resourceXML = getAtomDocumentByMO(migrationObject)
286    return findMolesCreationDate(resourceXML)
287
288def extractMolesPublishedDate(migrationObject):
289    resourceXML = getAtomDocumentByMO(migrationObject)
290    return findMolesPublishedDate(resourceXML)
291
292def extractQuality(dataEntityMigration):
293    resourceXML = getAtomDocumentByMO(dataEntityMigration)
294    quality = resourceXML.find('%sentity/%smolesISO/%squality' % (molesNS, molesNS, molesNS))
295    return _returnNotNoneText(quality)
296
297def extractUpdateFrequency(dataEntityMigration):
298    resourceXML = getAtomDocumentByMO(dataEntityMigration)
299    update_frequency = resourceXML.find('%sentity/%smolesISO/%supdateFrequency' % (molesNS, molesNS, molesNS))
300    return _returnNotNoneText(update_frequency)
301
302def extractContent(dataEntityMigration):
303    """
304        Returns a dictionary containing the div composing the
305        <content> element in a dataentity document.
306    """
307    resourceXML = getAtomDocumentByMO(dataEntityMigration)
308    content = resourceXML.find('%scontent' % (atomNS))
309    text = _returnNotNoneText(content)
310    contentDict = {}
311    if text:
312        doc = htmlParser.parse(unescape(text))
313        for el in doc.findall('body//div'):   
314            prop = el.get('property')
315            if prop:
316                if prop.startswith('cedacat'):
317                    contentDict[prop.split(':')[1]] = escape(tostring(el))
318    return contentDict
319
320def _extractAuthors(authorsCSV):
321    if authorsCSV is None:
322        return []
323    authors = authorsCSV.split(',')
324    for index in range(len(authors)):
325        authors[index] = authors[index].strip()
326        if len(authors[index]) == 0:
327            authors.remove(authors[index])
328    return authors
329
330def findAuthorsInResource(resourceMigration):
331    '''
332        Returns a dictionary with the following keys:
333        'authors': a list of string representing the authors
334        'contributors': a list of string representing the contributors
335    '''
336    ret = {}   
337    resourceXML = getAtomDocumentByMO(resourceMigration)
338    ret['authors'] = findAuthorInResource(resourceXML)
339    ret['contributors'] = findContributorInResource(resourceXML)
340    return ret
341
342def findAuthorInResource(resourceXML): 
343    author = resourceXML.find('%sauthor/%sname' % (atomNS, atomNS))
344    return _returnNotNoneText(author)
345
346def findContributorInResource(resourceXML): 
347    contributors = resourceXML.find('%scontributor/%sname' % (atomNS, atomNS))
348    return _returnNotNoneText(contributors)
349
350def findPublishedDate(resourceMigration):
351    resourceXML = getAtomDocumentByMO(resourceMigration)
352    publishedDate = resourceXML.find('%spublished' % (atomNS))
353    return _returnNotNoneText(publishedDate)
354
355def findUpdatedDate(resourceMigration):
356    resourceXML = getAtomDocumentByMO(resourceMigration)
357    publishedDate = resourceXML.find('%supdated' % (atomNS))
358    return _returnNotNoneText(publishedDate)
359
360def findSummary(resourceMigration):
361    """
362        Returns the <entry><summary> tag of an atom document
363        @param resourceMigration: a MigrationObject instance
364        @return: the <summary> value or None if empty
365    """
366    resourceXML = getAtomDocumentByMO(resourceMigration)
367    summary = resourceXML.find('%ssummary' % (atomNS))
368    return _returnNotNoneText(summary)
369
370def findID(dataEntXML):
371    ent_id = dataEntXML.find('%sid' % (atomNS))
372    return _returnNotNoneText(ent_id)
373
374def _updateLinksDict(linksDict, link, linkMarker):
375    if not linksDict.has_key(linkMarker):
376        linksDict[linkMarker] = []
377    rel = link.get('rel')
378    if rel and rel.endswith('/' + linkMarker):
379        linksDict[linkMarker].append({'href': link.get('href'), 'title': link.get('title')}) 
380
381def _extractLinks(dataEntXML, markers):
382    linksDict = {}
383    links = dataEntXML.findall('%slink' % (atomNS))
384    for link in links:
385        for marker in markers:
386            _updateLinksDict(linksDict, link, marker)       
387    return linksDict
388
389def findLinksInMigrationDocument(dataEntityMigration):
390    dataEntXML = getAtomDocumentByMO(dataEntityMigration)
391    linksDict = _extractLinks(dataEntXML, linkMarkers)
392    return linksDict
393
394
395
396def findDownloadLinksInMigrationDocument(migrationObject):
397    """
398        Return a list of dictionaries describing a <link rel="...DOWNLOAD..."> tag type
399        Each dictionary has two keys: 'href' and 'title'
400        @param migrationObject: the migration instance to retrieve and parse
401        @return: a list of dictionaries
402    """
403    linksDict = findLinksInMigrationDocument(migrationObject)
404    if linksDict.has_key('DOWNLOAD'):
405        return linksDict['DOWNLOAD']   
406    return {}       
407
408def findAccessLinksInMigrationDocument(migrationObject):
409    """
410        Return a list of dictionaries describing a <link rel="...ACCESS..."> tag type
411        Each dictionary has two keys: 'href' and 'title'
412        @param migrationObject: the migration instance to retrieve and parse
413        @return: a list of dictionaries
414    """
415    linksDict = findLinksInMigrationDocument(migrationObject)
416    if linksDict.has_key('ACCESS'):
417        return linksDict['ACCESS']     
418    return {}   
419
420def findDocumentationInMigrationDocument(migrationObject):
421    """
422        Return a list of dictionaries describing a <link rel="...DOCUMENTATION..."> tag type
423        Each dictionary has two keys: 'href' and 'title'
424        @param migrationObject: the migration instance to retrieve and parse
425        @return: a list of dictionaries
426    """
427    linksDict = findLinksInMigrationDocument(migrationObject)
428    if linksDict.has_key('DOCUMENTATION'):
429        return linksDict['DOCUMENTATION']     
430    return {}
431
432def findDOIInMigrationDocument(migrationObject):
433    """
434        Return a dictionary describing a <link rel="...DOCUMENTATION..."> tag type
435        The dictionary has two keys: 'href' and 'title'
436        @param migrationObject: the migration instance to retrieve and parse
437        @return: a dictionary relative to the DOI, None otherwise
438    """
439    for link in findDocumentationInMigrationDocument(migrationObject):
440        if link['href'].startswith('http://dx.doi.org/doi:'):
441            return link
442    return None
443
444def findDeploymentsInDE(dataEntityMigration):
445    linksDict = findLinksInMigrationDocument(dataEntityMigration)
446    links = _extractLinksByMarker(linksDict, 'Deployment')
447    return [depName + '.atom' for depName in links]
448
449def findSubTypeInDPT(resourceMigration):
450    resourceXML = getAtomDocumentByMO(resourceMigration)
451    categories = resourceXML.findall('%scategory' % (atomNS))
452    for category in categories:
453        if category.get("term") == "ATOM_SUBTYPE":
454            return category.get("label")   
455       
456def extractTitle(deploymentMigration):
457    resourceXML = getAtomDocumentByMO(deploymentMigration)
458    title = resourceXML.find('%stitle' % (atomNS))
459    return _returnNotNoneText(title)
460
461def extractSummary(migrationObject):
462    resourceXML = getAtomDocumentByMO(migrationObject)
463    summary = resourceXML.find('%ssummary' % (atomNS))
464    return _returnNotNoneText(summary)
465
466def extractGeographicExtentInMigrationDocument(migrationObject):
467    """
468        Extracts if existing the georss:where/gel:Enveloper/upper-lowerCorner elements.
469        @param migrationObject: a migration object to retrieve to parse for data
470        @return: None if no data are found, otherwise a dictionary with keys: 'east', 'north', 'west', 'south' where
471        the values are float
472    """
473    resourceXML = getAtomDocumentByMO(migrationObject)
474    upperCorner = resourceXML.find('%swhere/%sEnvelope/%supperCorner' % (georssNS, gmlNS, gmlNS))
475    lowerCorner = resourceXML.find('%swhere/%sEnvelope/%slowerCorner' % (georssNS, gmlNS, gmlNS))
476    ret = None
477    if upperCorner != None and lowerCorner != None:
478        upperCornerData = upperCorner.text.split()
479        lowerCornerData = lowerCorner.text.split()
480        ret = {'east': float(upperCornerData[0]), 'north': float(upperCornerData[1]), 'west': float(lowerCornerData[0]), 'south': float(lowerCornerData[1])}
481    return ret
482
483def findLinksInDeployment(migrationObject):
484    """
485        Returns a dictionary of links owned by the given dataEntity document
486        @param deploymentMigration: a MigrationObject instance
487        @return: a dictionary of links.
488    """
489    links = {}
490    linksDict = findLinksInMigrationDocument(migrationObject)
491    for marker in linkMarkers:   
492        links[marker] = _extractLinksByMarker(linksDict, marker)
493    return links
494
495def _extractLinksByMarker(linksDict, marker):
496    dpt = []
497    if linksDict.has_key(marker):
498        for link in linksDict[marker]:
499            try:
500                linkLongName = link['href'].split('/')[-1]
501                if '__ATOM__' in linkLongName:
502                    linkName = linkLongName.rsplit('__ATOM__')[1]
503                else:
504                    linkName = linkLongName
505                dpt.append(linkName)
506            except Exception as ex:
507                print "WARN - Cannot extractLinksByMarker %s" % (link)
508    return dpt
509
510
511def getResourceRefs(deploymentRefs):
512    '''
513        Returns a list of Elements representing the inner resource reference items
514        @param resourceRefs: the name of the eXist collection name below the 'deployments' one
515    ''' 
516    XMLDepl = _getXMLDocument(deploymentRefs)
517    return XMLDepl.findall('%scollection/%sresource' % (existNS, existNS))
518
519def getOwnerRefs(docStatus, docType, docOwner):
520    '''
521        Returns a list of Elements representing the inner resource reference items
522        @param resourceRefs: the name of the eXist collection name below the 'deployments' one
523    '''     
524    XMLDepl = _getXMLDocument(buildExistOwnerPath(docStatus, docType, docOwner))
525    return XMLDepl.findall('%scollection/%sresource' % (existNS, existNS))
526
527def getTypeRefs(docStatus, docType):
528    '''
529        Returns a list of Elements representing the inner resource reference items
530        @param resourceRefs: the name of the eXist collection name below the 'deployments' one
531    '''     
532    XMLDepl = _getXMLDocument(buildExistTypePath(docStatus, docType))
533    return XMLDepl.findall('%scollection/%sresource' % (existNS, existNS))
534
535
536
537def getCollectionRefs(publishedRefs):
538    '''
539        Returns a list of Elements representing the inner deployment reference items
540        @param basePublished: the name of the eXist collection name below the 'published' one
541    ''' 
542    XMLPubl = _getXMLDocument(publishedRefs)
543    return XMLPubl.findall('%scollection/%scollection' % (existNS, existNS))
544
545def getResource(source, resourceName):
546    resourceSource = '%s/%s' % (source, resourceName)
547    resourceDoc = _getDocument(resourceSource)
548    return XML(resourceDoc)
549
550def createMD_Keywords(keywords, k_type=None, thesaurusName=None):
551    '''
552        Creates a new MD_Keywords instance.
553        @param keywords: a String array       
554    '''   
555    md_keywords = MD_Keywords()
556    md_keywords.keyword.extend(keywords)
557    if k_type:
558        md_keywords.type = k_type
559    if thesaurusName:
560        md_keywords.thesaurusName = thesaurusName       
561    return md_keywords
562   
563
564def createDateTime(datetime):
565    '''
566        Creates a new DateTime instance.
567        @param datetime: a datetime.datetime instance       
568    '''
569    if datetime is None:
570        return
571    dateTime = DateTime()
572    dateTime.century = (datetime.year / 100) + 1
573    dateTime.year = datetime.year
574    dateTime.month = datetime.month       
575    dateTime.day = datetime.day
576    dateTime.hour = datetime.hour
577    dateTime.minute = datetime.minute
578    dateTime.second = datetime.second
579    dateTime.timeZone = datetime.tzinfo       
580    return dateTime       
581       
582def createDate(date):
583    '''
584        Creates a new Date instance.
585        @param date: a datetime.datetime instance
586    '''
587    idate = Date()
588    idate.century = (date.year / 100) + 1
589    idate.year = date.year
590    idate.month = date.month       
591    idate.day = date.day       
592    return idate       
593       
594def createTM_Position(anyOther = None, date8601 = None, dateTime8601 = None, time8601 = None):
595    '''
596        Creates a new TM_Position instance
597        @param anyOther: a TM_TemporalPosition instance
598        @param date8601: a Date instance
599        @param dateTime8601:a DateTime instance
600        @param time8601: a Time instance   
601    '''
602    tm_position = TM_Position()
603    if anyOther:
604        tm_position.anyOther = anyOther
605    if date8601:
606        tm_position.date8601 = date8601 
607    if dateTime8601:
608        tm_position.dateTime8601 = dateTime8601
609    if time8601:
610        tm_position.time8601 = time8601
611    return tm_position           
612
613def createTM_Instant(position):
614    '''
615        Creates a new TM_Position instance
616        @param position: a TM_Position instance
617    '''
618    tm_instant = TM_Instant()
619    tm_instant.position = position
620    return tm_instant
621
622def createTM_Period(begin, end):
623    '''
624        Creates a new TM_Position instance
625        @param position: a TM_Position instance
626    '''
627    tm_period = TM_Period()
628    tm_period.begin = begin
629    tm_period.end = end   
630    return tm_period
631
632def createCI_Address(deliveryPoint = None, electronicMailAddress = None, city = None, country = None, postalCode = None):
633    '''
634        Creates a new CI_Address instance
635        @param deliveryPoint: an array of Strings
636        @param electronicMailAddress: an array of Strings
637        @param city: a string
638        @param country: a string               
639        @param postalCode: a string       
640    '''
641    ci_address = CI_Address()
642    if deliveryPoint:
643        ci_address.deliveryPoint = deliveryPoint
644    if electronicMailAddress:
645        ci_address.electronicMailAddress = electronicMailAddress       
646    if postalCode:
647        ci_address.postalCode = postalCode       
648    if city:
649        ci_address.city = city       
650    if country:
651        ci_address.country = country       
652    if postalCode:
653        ci_address.postalCode = postalCode       
654    return ci_address
655
656def createCI_OnlineResource(linkage, name=None):
657    '''
658        Creates a new CI_OnlineResource instance
659        @param linkage: a string (the URL class is associated with a String)
660        @param name: a String
661    '''
662    ci_online_resource = CI_OnlineResource()
663    ci_online_resource.linkage = linkage
664    if name:
665        ci_online_resource.name = name
666    return ci_online_resource
667
668def createCI_Telephone(voice=None, facsimile=None):
669    '''
670        Creates a new CI_Telephone instance
671        @param voice: an array of strings       
672        @param facsimile: an array of strings
673    '''
674    ci_telephone = CI_Telephone()
675    if voice:
676        ci_telephone.voice = voice
677    if facsimile:
678        ci_telephone.facsimile = voice       
679    return ci_telephone
680
681def createCI_Contact(phone, address = None, onlineResource = None):
682    '''
683        Creates a new CI_Contact instance
684        @param phone: a CI_Telephone instance       
685        @param address: a CI_Address instance
686        @param onlineResource: a CI_OnlineResource instance
687    '''
688    ci_contact = CI_Contact()
689    ci_contact.phone = phone
690    if address:
691        ci_contact.address = address
692    if onlineResource:
693        ci_contact.onlineResource = onlineResource       
694    return ci_contact
695
696def createMO_Individual(name = None, contactInfo = None):
697    '''
698        Creates a new MO_Individual instance
699        @param name: a String
700        @param contactInfo: an array of CI_Contact       
701    '''   
702    ci_party = MO_Individual()
703    if name:
704        ci_party.name = name
705    if contactInfo:
706        ci_party.contactInfo = contactInfo       
707    return ci_party
708
709def createMO_Organization(name = None, contactInfo = None):
710    '''
711        Creates a new MO_Organization instance.
712        @param name: a String
713        @param contactInfo: an array of CI_Contact       
714    '''       
715    ci_party = MO_Organisation()
716    if name:
717        ci_party.name = name
718    if contactInfo:
719        ci_party.contactInfo.extend(contactInfo)         
720    return ci_party
721
722def createMO_ResponsiblePartyInfo(role, i_party):
723    """
724        @param role: a CI_RoleCode/MO_RoleValue assigned to this ResponsibleParty
725        @param party: a list of MO_Organization/CI_Individual instances
726    """
727    mo_responsableInfo = MO_ResponsiblePartyInfo()
728    mo_responsableInfo.role = role
729    mo_responsableInfo.party.extend(i_party)
730    return mo_responsableInfo
731
732
733def createCI_Date(dateType, date = None):
734    """
735        Creates a new CI_Date
736        @param dateType: a CI_DateTypeCode value
737        @param date: a DateTime instance
738    """
739    ci_date = CI_Date()
740    ci_date.dateType = dateType
741    if date:
742        ci_date.date = date
743    return ci_date
744
745def createCI_Citation(title, date = None, icitedResponsibleParty = None):
746    """
747        Creates a new CI_Citation
748        @param title: the CI_Citation title
749        @param date: an array of CI_Date instances
750        @param icitedResponsibleParty: a list of CI_ResponsibleParty instances
751    """   
752    ci_citation = CI_Citation()
753    ci_citation.title = title
754    if date and type(date) == list:
755        ci_citation.date.extend(date)
756    if icitedResponsibleParty:
757        ci_citation.citedResponsibleParty.extend(icitedResponsibleParty)
758    return ci_citation
759
760def createMD_Constraints(useLimitation = None):
761    """
762        Creates a new MD_Constrains
763        @param useLimitation: a string array
764    """ 
765    md_constraints = MD_Constraints()
766    if useLimitation and type(useLimitation) == list:
767        md_constraints.useLimitation = useLimitation
768    return md_constraints
769
770def createMD_LegalConstraints(useLimitation = None, accessConstrains = None):
771    """
772        Creates a new MD_LegalConstrains
773        @param useLimitation: a string array
774        @param accessConstrains: an MD_RestrictionCode array
775    """ 
776    md_legalconstraints = MD_LegalConstraints()
777    if useLimitation and type(useLimitation) == list:
778        md_legalconstraints.useLimitation = useLimitation
779    if accessConstrains and type(accessConstrains) == list:
780        md_legalconstraints.accessConstrains = accessConstrains       
781    return md_legalconstraints
782
783def createMD_Identifier(code, authority = None):
784    """
785        Creates a new MD_Identifier
786        @param code: a String
787        @param authority: a CI_Citation instance
788    """
789    md_identifier = MD_Identifier()
790    if code == None:
791        raise NoNullableElement()
792    md_identifier.code = code
793    if authority:
794        md_identifier.authority = authority
795    return md_identifier
796
797def createCI_ResponsibleParty(role, organizationName = None, individualName = None):
798    """
799        Creates a new CI_ResponsibeParty
800        @param role: a CI_RoleCode
801    """
802    ci_responsible_party = CI_ResponsibleParty()
803    ci_responsible_party.role = role
804    if organizationName:
805        ci_responsible_party.organisationName = organizationName
806    if individualName:
807        ci_responsible_party.individualName = individualName   
808    return ci_responsible_party
809
810def createMD_Metadata(date_stamp, contact, language = None):
811    """
812        Creates a new MD_Metadata
813        @param date_stamp: a Date instance
814        @param contacts: a CI_ResponsibleParty array instances
815        @param language: a string
816    """   
817    md_metadata = MD_Metadata()
818    md_metadata.dateStamp = date_stamp
819    for item in contact:
820        md_metadata.contact.append(item)
821    if language:
822        md_metadata.language = language   
823    return md_metadata
824
825def createMO_OnlineResource(linkage, instance = None, name = None, function = None, description = None, applicationProfile = None):
826    """
827        Creates a new CEDA_Result
828        @param linkage: the MO_OnlineResource.linkage.url.??? field
829        @param name: the MO_OnlineResource.linkage.name field
830        @param function: the MO_OnlineResource.function field
831        @param description: the MO_OnlineResource.description field
832        @param applicationProfile: the MO_OnlineResource.applicationProfile field
833    """
834    if instance is None:
835        on_line_resource = MO_OnlineResource()
836         
837    on_line_resource.linkage = linkage
838    if name and on_line_resource.name != name:         
839        on_line_resource.name = name
840    if function and on_line_resource.function != function:
841        on_line_resource.function = function
842    if description and on_line_resource.description != description:
843        on_line_resource.description = description
844    if applicationProfile and on_line_resource.applicationProfile != applicationProfile:
845        on_line_resource.applicationProfile = applicationProfile
846    return on_line_resource       
847
848def createCEDA_Result(curation_category, internal_path, source = None):
849    """
850        Creates a new CEDA_Result
851        @param curation_category: a CEDA_CurationValue instance
852        @param internal_path: a String
853        @param source: an array of MO_OnlineResource instances
854    """
855    ceda_result = CEDA_Result()
856    ceda_result.curationCategory = curation_category
857    ceda_result.internalPath = internal_path           
858    if source: 
859        ceda_result.source = source
860    return ceda_result
861
862
863
864def createDQ_ConformanceResult(explanation, pass_, specification):
865    """
866        Creates a DQ_ConformanceResult instance
867        @param explanation: a String
868        @param pass_: a boolean value
869        @param specification: a CI_Citation instance 
870    """
871    dq_conformanceResult = DQ_ConformanceResult()
872    dq_conformanceResult.explanation = explanation
873    dq_conformanceResult.pass_ = pass_
874    dq_conformanceResult.specification = specification
875    return dq_conformanceResult
876
877def createDQ_Element(result):
878    """
879        Creates a DQ_Element instance
880        @param result: a DQ_Result array (mix 1, max 2 items)
881    """
882    dq_element = DQ_Element()
883    if result and (type(result) == list) and (len(result) >= 1 and len(result) <= 2):
884        dq_element.result = result
885    return dq_element
886
887def createEX_GeographicBoundingBox(east, north, west, south):
888    """
889        Creates an EX_GeographicBoundingBox instance
890        @param east: the eastBoundLongitude attribute as float
891        @param north: the northBoundLongitude attribute as float
892        @param west: the westBoundLongitude attribute as float
893        @param south: the southBoundLongitude attribute as float               
894    """
895    ex_geographic_bb = EX_GeographicBoundingBox()
896    ex_geographic_bb.eastBoundLongitude = east
897    ex_geographic_bb.northBoundLatitude = north
898    ex_geographic_bb.westBoundLongitude = west
899    ex_geographic_bb.southBoundLatitude = south
900    return ex_geographic_bb
901   
902def createCEDA_Processing():
903    ceda_processing = CEDA_Processing()
904    return ceda_processing
905
906
907def createCEDA_Instrument():
908    ceda_instrument = CEDA_Instrument()
909    return ceda_instrument
910
911def createCEDA_CompositeProcess():
912    ceda_cp = CEDA_CompositeProcess()
913    return ceda_cp
914
915def createCEDA_Acquisition():
916    ceda_acquisition = CEDA_Acquisition()
917    return ceda_acquisition
918
919def createCEDA_Review(reviewer, reviewFrequency, reviewStatus):
920    """
921        Returns a new CEDA_Review
922        @param reviewer: an MO_ResponsibilityPartyInfo
923        @param reviewFrequency: a CEDA_ReviewFrequencyValue
924        @param reviewStatus: a CEDA_ReviewStatusValue
925    """
926    ceda_review = CEDA_Review()
927    ceda_review.reviewer = reviewer
928    ceda_review.reviewFrequency = reviewFrequency
929    ceda_review.reviewStatus = reviewStatus           
930    return ceda_review
931
932def createCEDA_Project(abstract = None, publication_state = None, documentation = None, project_resource=None):
933    ceda_project = CEDA_Project()
934    if abstract:
935        ceda_project.abstract = abstract
936    if publication_state:
937        ceda_project.publicationState = publication_state       
938    if documentation and type(documentation) == list:
939        ceda_project.documentation = documentation
940    if project_resource and type(project_resource) == list:
941        ceda_project.projectResource = project_resource   
942    return ceda_project
943
944def fromDateStringToPhenomenonTime(doc_phenomenon_time):
945    """
946        Transforms a date string like '2002-07-22' (startDate) in a TM_Instant instance or   
947        '2002-07-22/2011-08-06' (start/endDate) in a TM_Period instance
948        @param doc_phenomenon_time: a date string in the expected format
949    """
950    if doc_phenomenon_time:
951        pt = None
952        if '/' in doc_phenomenon_time:
953            period = doc_phenomenon_time.split('/')
954            begin_date = createDate(isoDateStringToTimeDate(period[0]))
955            begin_position = createTM_Position(date8601 = begin_date)
956            begin_tm_instant = createTM_Instant(begin_position)
957           
958            end_date = createDate(isoDateStringToTimeDate(period[1]))
959            end_position = createTM_Position(date8601 = end_date)
960            end_tm_instant = createTM_Instant(end_position)
961           
962            pt = createTM_Period(begin_tm_instant, end_tm_instant)
963        else:
964            tm_position = createTM_Position(date8601 = createDate(isoDateStringToTimeDate(doc_phenomenon_time)))
965            pt = createTM_Instant(tm_position)
966        return pt
967
968def fromPhenomenonTimeToString(phenomenonTime):
969    """
970        Transforms a TM_Instant instance in a date string like '2002-07-22' (startDate) or   
971        a TM_Period instance in a string like '2002-07-22/2011-08-06' (start/endDate)
972        @param phenomenonTime: a aTM_Instace or a TM_Period instance
973        @return a pair startDate, endDate. If endDate does not exists return startDate, None
974    """   
975    if phenomenonTime is None:
976        return None
977    startDate =None
978    endDate = None
979    if isinstance(phenomenonTime, TM_Instant):
980        startDate = _tm_InstantToString(phenomenonTime)
981    elif isinstance(phenomenonTime, TM_Period):
982        startDate = _tm_InstantToString(phenomenonTime.begin)       
983        endDate = _tm_InstantToString(phenomenonTime.end)
984    return startDate, endDate
985
986def comparePhenomenonTimes(p1, p2):
987    s1 = fromPhenomenonTimeToString(p1)
988    s2 = fromPhenomenonTimeToString(p2)
989    return s1[0] == s2[0] and s1[1] == s2[1] 
990
991def _tm_InstantToString(tm_instant):
992    idate = tm_instant.position.date8601
993    return str(datetime.date(int(idate.year), int(idate.month), int(idate.day)))
994
995def fromGeographicBoundingBoxToString(gbb):
996    if gbb is None:
997        return None
998    return '{0} {1},{2} {3}'.format(gbb.eastBoundLongitude, gbb.northBoundLatitude, gbb.westBoundLongitude, gbb.southBoundLatitude)
999   
1000def compareGeographicBoundingBoxes(gb1, gb2):
1001    return fromGeographicBoundingBoxToString(gb1) == fromGeographicBoundingBoxToString(gb2)
Note: See TracBrowser for help on using the repository browser.