source: exist/trunk/python/ndgUtils/models/Atom.py @ 4512

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4512
Revision 4512, 44.6 KB checked in by cbyrom, 12 years ago (diff)

Fix problem with retaining empty category data + improve output error logging + improve robustness of exist file retrieval.

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
16from ndgUtils.eXistConnector import eXistConnector
17from ndgUtils.ETxmlView import et2text
18from ndgUtils.lib.utilities import getTripleData, escapeSpecialCharacters, \
19    tidyUpParameters, getISO8601Date, normaliseLongitude, formatDateYYYYMMDD
20from ndgUtils.vocabtermdata import VocabTermData as VTD
21from ndgUtils.models import MolesEntity as ME
22import csml.parser as CsmlParser
23from ndgUtils.models import Deployment as Deployment
24
25
26class AtomError(Exception):
27    """
28    Exception handling for Atom class.
29    """
30    def __init__(self, msg):
31        logging.error(msg)
32        Exception.__init__(self, msg)
33
34
35class Person(object):
36    '''
37    Class representing atom author type data - with name, uri and role attributes
38    '''
39    AUTHOR_TYPE = 0
40    CONTRIBUTOR_TYPE = 1
41    RESPONSIBLE_PARTY_TYPE = 2
42    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
43   
44    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
45        self.type = personType
46        self.ns = namespace
47        self.name = ""
48        self.uri = ""
49        self.role = ""
50       
51        # NB, the atom format specifies slightly different data contents
52        self.uriTagName = "email"
53        # NB, responsible party data is always stored in the moles section
54        if self.type == self.RESPONSIBLE_PARTY_TYPE:
55            self.ns = 'moles'
56            self.uriTagName = "uri"
57
58    def __str__(self):
59        if self.name or self.uri or self.role:
60            return self.name + " | " + self.uri + " | " + self.role
61        return ""
62   
63    def fromString(self, personString):
64        (self.name, self.uri, self.role) = getTripleData(personString)
65       
66    def fromETElement(self, personTag):
67        self.name = personTag.findtext('name') or ""
68        self.role = personTag.findtext('role') or ""
69        self.uri = personTag.findtext(self.uriTagName) or ""
70        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
71                      %(self.name, self.role, self.uriTagName, self.uri))
72
73    def toXML(self):
74        prefix = ""
75        if self.ns:
76            prefix = self.ns + ':'
77
78        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
79
80        if self.name:
81            name = ET.SubElement(author, prefix + "name")
82            name.text = self.name
83       
84        if self.uri:
85            uri = ET.SubElement(author, prefix + self.uriTagName)
86            uri.text = self.uri
87       
88        if self.role:
89            role = ET.SubElement(author, prefix + "role")
90            role.text = self.role
91
92        return author
93   
94    def __cmp__(self, person1):
95        '''
96        Override comparison to allow proper object comparison when checking
97        if Person objects are in an array already - i.e. if person in personArray...
98        '''
99        if not person1:
100            return -1
101       
102        if self is person1:
103            return 0
104        elif self.uri == person1.uri and self.name == person1.name and \
105                self.role == person1.role and self.type == person1.type:
106            return 0
107        return 1
108
109
110class Link(object):
111    '''
112    Class representing an atom link - with href, title and rel attributes
113    '''
114
115    def __init__(self):
116        self.href = ""
117        self.title = ""
118        self.rel = ""
119
120    def fromString(self, linkString):
121        (self.href, self.title, self.rel) = getTripleData(linkString)
122       
123    def fromETElement(self, linkTag):
124        self.href = linkTag.attrib.get('href') or ""
125        self.rel = linkTag.attrib.get('rel') or ""
126        self.title = linkTag.attrib.get('title') or ""
127
128    def toXML(self):
129        link = ET.Element("link")
130        link.attrib["href"] = self.href
131        link.attrib["title"] = self.title
132        link.attrib["rel"] = self.rel
133        return link
134
135    def hasValue(self):
136        # NB, just a rel on its own is meaningless - so ignore
137        if self.href or self.title:
138            return True
139        return False
140   
141    def __str__(self):
142        if self.href or self.title or self.rel:
143            return self.href + " | " + self.title + " | " + self.rel
144        return ""
145   
146    def isChildAtom(self):
147        '''
148        Determines whether the link refers to another atom - e.g. a link to
149        a data granule
150        @return True, if so; False otherwise
151        '''
152        if self.rel.endswith(VTD.GRANULE_TERM) or \
153            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
154            self.rel.endswith(VTD.ACTIVITY_TERM) or \
155            self.rel.endswith(VTD.DPT_TERM) or \
156            self.rel.endswith(VTD.OBS_TERM):
157            return True
158       
159        return False
160   
161    def __cmp__(self, link1):
162        '''
163        Override comparison to allow proper object comparison when checking
164        if Link objects are in an array already - i.e. if link in linkArray...
165        '''
166        if not link1:
167            return -1
168       
169        if self is link1:
170            return 0
171        elif self.href == link1.href and self.title == link1.title and \
172                self.rel == link1.rel:
173            return 0
174        return 1
175
176
177class Category(object):
178    '''
179    Class representing an atom category - with term, scheme and label attributes
180    '''
181    def __init__(self):
182        self.term = ""
183        self.scheme = ""
184        self.label = ""
185
186    def fromString(self, linkString, escapeSpecialCharacters=True):
187        '''
188        Create Category from triple string of format, 'label | scheme | term'
189        @param linkString: triple string to create category with
190        @keyword escapeSpecialCharacters: if set to True, special characters in
191        triple string are escaped (default)
192        '''
193        (self.label, self.scheme, self.term) = getTripleData(linkString, \
194            doEscape=escapeSpecialCharacters)
195       
196    def fromETElement(self, linkTag):
197        self.term = linkTag.attrib.get('term') or ""
198        self.label = linkTag.attrib.get('label') or ""
199        self.scheme = linkTag.attrib.get('scheme') or ""
200
201    def toXML(self):
202        link = ET.Element("category")
203        link.attrib["term"] = self.term
204        link.attrib["scheme"] = self.scheme
205        link.attrib["label"] = self.label
206        return link
207   
208    def hasValue(self):
209        if self.scheme or self.label or self.term:
210            return True
211        return False
212
213
214class Atom(object):
215
216    # labels for use with the atom categories
217    ATOM_TYPE = "ATOM_TYPE"
218    ATOM_SUBTYPE = "ATOM_SUBTYPE"
219
220    # labels for use with the templates to set/extract specific inputs
221    ONLINE_REF_LABEL = "online_ref"
222    PARAMETER_LABEL = "parameter"
223    ATOM_REF_LABEL = "atom_ref"
224    DELIMITER = "---"
225    REMOVE_LABEL = "remove"
226   
227    # format to use for t1-t2 date range
228    YEAR_FORMAT = '%Y-%m-%d'
229
230    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
231                 xmlString = None, state = eXistConnector.WORKING_COLLECTION_PATH, **inputs):
232        '''
233        Constructor - initialise the atom variables
234        '''
235        logging.info("Initialising atom")
236        if atomType:
237            logging.info(" - of type '%s'" %atomType)
238        self.atomTypeID = atomType
239
240        # some data have further subtypes specified
241        self.subtypeID = None # this should be the termID
242        self.subtype = None # and this should be the fully formed vocab URL
243       
244        self.ndgObject = ndgObject
245
246        self.atomName = None
247        self.files = []
248        self.author = None
249        self.contributors = []
250        self.atomAuthors = []
251        self.parameters = []
252        self.spatialData = []
253        self.temporalData = []
254        self.relatedLinks = []
255        self.summary = []
256        self.content = []
257        # NB, this deployments data duplicates other atom data - and is only used for a
258        # convenient way to collect the info (by lookupDeploymentsInfo()) for use in templates
259        self.deployments = []   
260        self.csmlFile = None
261        self.cdmlFile = None
262        # general variable to use for setting the atom content - NB, if a csmlFile is specified
263        # (either directly or via a cdmlFile specification), this will be the content by default
264        # for this purpose
265        self.contentFile = None     
266        self.title = None
267        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
268        self.atomID = None
269   
270        # boundary box info - to replace spatial/temporalData?
271        self.minX = None
272        self.minY = None
273        self.maxX = None
274        self.maxY = None
275        self.t1 = None
276        self.t2 = None
277
278        self.ME = ME.MolesEntity()
279       
280        # date when the atom was first ingested
281        self.publishedDate = None
282
283        # last update date
284        self.updatedDate = None
285
286        # assume atom in working state by default - this is used to define what collection
287        # in eXist the atom is stored in
288        self.state = state
289       
290        # additional, non standard atom data can be included in the molesExtra element
291        if vocabTermData:
292            self.VTD = vocabTermData
293        else:
294            self.VTD = VTD()
295       
296        if xmlString:
297            self.fromString(xmlString)
298
299        # if inputs passed in as dict, add these now
300        if inputs:
301            logging.info("Adding info to atom from input dict")
302            logging.debug(inputs)
303            self.__dict__.update(inputs)
304           
305            # NB, this doesn't trigger the Summary Property, so do this
306            # explicitly, if need be
307            if inputs.has_key('Summary'):
308                self.Summary = inputs.get('Summary')
309            if inputs.has_key('Content'):
310                self.Content = inputs.get('Content')
311           
312            # also pass any moles data up to the moles entity object
313            if inputs.get('providerID'):
314                self.ME.providerID = inputs.get('providerID')
315               
316            if inputs.get('abbreviation'):
317                self.ME.abbreviation = inputs.get('abbreviation')
318
319        if self.atomTypeID:
320            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
321
322        logging.info("Atom initialised")
323
324
325    def addOnlineReferences(self, links):
326        '''
327        Add online reference data associated with the atom
328        - NB, care needs to be taken here since this data is stored in the atom
329        link elements and these are also used for the various atom associations
330        @param links: a Link or array of Links to add to the relatedLinks attribute
331        '''
332        logging.debug("Adding online references")
333        if not links:
334            return
335       
336        if type(links) is not list:
337            links = [links]
338       
339        # firstly clear out any online refs data from the existing related links
340        newLinks = []
341        for link in self.relatedLinks:
342            if link.isChildAtom():
343                newLinks.append(link)
344       
345        newLinks.extend(links)
346        self.relatedLinks = newLinks
347        logging.debug("Online references added")
348
349
350    def addUniqueRelatedLinks(self, links):
351        '''
352        Add links to relatedLinks array - if they are not already included
353        @param links: a Link or array of Links to add to the relatedLinks attribute
354        '''
355        self.addUniqueLinks(self.relatedLinks, links)
356       
357
358    def removeRelatedLinks(self, linksToDelete):
359        '''
360        Remove any links in the input list from the atom's related links list
361        @param linksToDelete: array of Link objects to remove from atom
362        '''
363        logging.debug("Removing related links from atom")
364        if not linksToDelete:
365            return
366       
367        if type(linksToDelete) is not list:
368            linksToDelete = [linksToDelete]
369       
370        updatedLinks = []
371        for link in self.relatedLinks:
372            if type(link) is not Link:
373                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
374                continue
375            if link in linksToDelete:
376                logging.debug("- found link to remove")
377            else:
378                updatedLinks.append(link)
379
380        self.relatedLinks = updatedLinks
381        logging.debug("Links removed")
382       
383
384    def getDefaultCollectionPath(self):
385        '''
386        Determine the correct collection to use for the atom in eXist
387        '''
388        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state
389       
390        if self.atomTypeID == VTD.DE_TERM:
391            collectionPath += eXistConnector.DE_COLLECTION_PATH
392        elif self.atomTypeID == VTD.GRANULE_TERM:
393            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
394        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
395            self.subtypeID == VTD.DEPLOYMENT_TERM:
396            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
397        else:
398            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
399       
400        if not self.ME.providerID:
401            raise AtomError("Error: cannot determine atom collection path because " + \
402                            "the provider ID is not defined")
403           
404        collectionPath += self.ME.providerID + "/"
405        return collectionPath
406
407
408    def __addAtomTypeDataXML(self, root):
409        '''
410        Add the atom type, and subtype data, if available, to atom categories
411        - and lookup and add the appropriate vocab term data
412        '''
413        if self.atomTypeID:
414            logging.info("Adding atom type info to XML output")
415            category = Category()
416            category.label = self.atomTypeID
417            # look up the appropriate vocab term data
418            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
419            category.term = self.ATOM_TYPE
420            root.append(category.toXML())
421
422        if self.subtypeID:
423            logging.info("Adding atom subtype info to XML output")
424            # NB subtypes not all defined, so leave this out for the moment
425            category.label = self.subtypeID
426            # look up the appropriate vocab term data
427            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
428            category.term = self.ATOM_SUBTYPE
429            root.append(category.toXML())
430
431
432    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
433        '''
434        Add data to include in the moles entity element
435        '''
436        logging.debug('Adding moles entity information')
437        self.ME.abbreviation = abbreviation
438        self.ME.providerID = provider_id
439        self.ME.createdDate = getISO8601Date(object_creation_time)
440        logging.debug('Moles entity information added')
441
442
443    def addAuthors(self, authors):
444        '''
445        Add author data appropriately to the atom
446        NB, these will overwrite any existing authors of the same type
447        @param authors: list of Person objects with the author data
448        '''
449        logging.debug('Adding authors data to Atom')
450        isFirstAuthor = {}
451        authorArray = None
452        for author in authors:
453            # NB, we're only allowed one atom author
454            if author.type == Person.AUTHOR_TYPE:
455                self.author = author
456                if isFirstAuthor.has_key(author.type):
457                    raise AtomError("Error: an atom can only have one author specified")
458                isFirstAuthor[author.type] = 1
459                continue
460            elif author.type == Person.CONTRIBUTOR_TYPE:
461                authorArray = self.contributors
462            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
463                authorArray = self.ME.responsibleParties
464               
465            # check if this is the first addition - if so, clear out the
466            # array in advance
467            if not isFirstAuthor.has_key(author.type):
468                logging.debug("Clearing out author array")
469                # NB, need to be careful to clear the array, not create a ref
470                # to a new array
471                del authorArray[:]
472                isFirstAuthor[author.type] = 1
473
474            if str(author) != "" and author not in authorArray:
475                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
476                              %(author.type, author.name, author.uri, author.role))
477                authorArray.append(author)
478
479        logging.debug('Finished adding authors data')
480
481
482    def _isNewParameter(self, param):
483        '''
484        Check if a parameter is already specified in the atom, return False if
485        so, otherwise return True
486        '''
487        for p in self.parameters:
488            if p.term == param.term and \
489                p.scheme == param.scheme and \
490                p.label == param.label:
491                return False
492        return True
493
494
495    def addRelatedLinks(self, linkVals):
496        '''
497        Add related links in string format - converting to Link objects
498        @param linkVals: string of format, 'uri | title | vocabServerURL'
499        '''
500        self.relatedLinks.append(self.objectify(linkVals, 'relatedLinks'))
501
502
503    def addParameters(self, params):
504        '''
505        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
506        @params param: parameter, as string array, to add to atom parameters collection
507        '''
508        # avoid strings being parsed character by character
509        if type(params) is str:
510            params = [params]
511           
512        for param in params:
513            # firstly tidy parameter
514            param = tidyUpParameters(param)
515            category = Category()
516            # NB, data already tidied up here, so set keyword to avoid this happening again
517            category.fromString(param, escapeSpecialCharacters=True)
518
519            # now check for uniqueness
520            if self._isNewParameter(category):
521                logging.debug("Adding new parameter: %s" %param)
522                self.parameters.append(category)
523   
524   
525    def _linksToXML(self, root):
526        '''
527        Add required links to the input element
528        @param root: element to add links to - NB, should be the root element of the atom
529        '''
530        selfLink = ET.SubElement(root, "link")
531        selfLink.attrib["href"] = self.atomBrowseURL
532        selfLink.attrib["rel"] = "self"
533        if self.subtypeID != VTD.DEPLOYMENT_TERM:
534            molesLink = ET.SubElement(root, "link")
535            molesDoc = re.sub('ATOM','NDG-B1', self.atomBrowseURL)
536            molesLink.attrib["href"] = molesDoc
537            molesLink.attrib["rel"] = 'related'
538       
539        for relatedLink in self.relatedLinks:
540            if relatedLink.hasValue():
541                root.append(relatedLink.toXML())
542   
543    def toXML(self):
544        '''
545        Convert the atom into XML representation and return this
546        @return: xml version of atom
547        '''
548        logging.info("Creating formatted XML version of Atom")
549        root = ET.Element("entry")
550        root.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
551        root.attrib["xmlns:moles"] = "http://ndg.nerc.ac.uk/schema/moles2beta"
552        root.attrib["xmlns:georss"] = "http://www.georss.org/georss/10"
553        root.attrib["xmlns:gml"] = "http://www.opengis.net/gml"
554        id = ET.SubElement(root, "id")
555        id.text = self.atomID
556        title = ET.SubElement(root, "title")
557        title.text = self.title
558        self._linksToXML(root)
559
560        # NB, the author tag is mandatory for atoms - so if an explicit
561        # author has not been set, just take the author to be the provider
562        if not self.author:
563            author = Person()
564            author.name = self.ME.providerID
565            #author.uri = self.ME.providerID
566            self.author = author
567
568        root.append(self.author.toXML())
569           
570        for contributor in self.contributors:
571            root.append(contributor.toXML())
572
573        # add the moles entity section, if it is required
574        if self.ME:
575            root.append(self.ME.toXML())
576
577        # add parameters data
578        for param in self.parameters:
579            if param.hasValue():
580                root.append(param.toXML())
581
582        # add the type and subtype data
583        self.__addAtomTypeDataXML(root)
584                   
585        summary = ET.SubElement(root, "summary")
586        summary.text = self.Summary
587                   
588        # add link to content, if required - NB, can only have one content element in atom
589        # - and this is mandatory
590        content = ET.SubElement(root, "content")
591        contentFile = self.contentFile or self.csmlFile or self.cdmlFile
592        if contentFile:
593            content.attrib["type"] = "application/xml"
594            content.attrib["src"] = contentFile
595        else:
596            content.text = self.Content
597            content.attrib["type"] = "xhtml"
598       
599        # if there's a published date already defined, assume we're doing an update now
600        # NB, update element is mandatory
601        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
602        if not self.publishedDate:
603            self.publishedDate = currentDate
604
605        updated = ET.SubElement(root, "updated")
606        if not self.updatedDate:
607            self.updatedDate = currentDate
608        updated.text = self.updatedDate
609
610        published = ET.SubElement(root, "published")
611        published.text = self.publishedDate
612
613        # add temporal range data, if available
614        temporalRange = ET.SubElement(root, "moles:temporalRange")
615        if self.t1:
616            temporalRange.text = self.t1
617            if self.t2:
618                temporalRange.text += "/" + self.t2
619
620        # add spatial range data, if available
621        self._addSpatialData(root)
622
623        tree = ET.ElementTree(root)
624        logging.info("XML version of Atom created")
625        return tree
626
627
628    def __getSummary(self):
629        logging.debug("Getting summary data")
630        summaryString = ""
631        for summary_line in self.summary:
632            summaryString += summary_line + "\n"
633
634        return summaryString
635
636    def __setSummary(self, summary):
637        logging.debug("Adding summary data")
638        self.summary = []
639        for summary_line in summary.split('\n'):
640            self.summary.append(escapeSpecialCharacters(summary_line))
641           
642    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
643
644
645    def __getContent(self):
646        logging.debug("Getting content data")
647        contentString = ""
648        # NB, there must be content specified in an atom
649        if not self.content:
650            return "Metadata document"
651       
652        for content_line in self.content:
653            contentString += content_line + "\n"
654
655        return contentString
656
657    def __setContent(self, content):
658        logging.debug("Adding content data")
659        self.content = []
660        for content_line in content.split('\n'):
661            self.content.append(content_line)
662           
663    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
664
665           
666    def fromString(self, xmlString):
667        '''
668        Initialise Atom object using an xmlString
669        @param xmlString: representation of atom as an XML string
670        '''
671        logging.info("Ingesting data from XML string")
672       
673        # firstly, remove any namespaces used - to avoid problems with elementtree
674        logging.debug("Stripping moles namespace from string to allow easy handling with elementtree")
675        xmlString = xmlString.replace('moles:', '')
676        xmlString = xmlString.replace('georss:', '')
677        xmlString = xmlString.replace('gml:', '')
678        xmlString = xmlString.replace('xmlns="http://www.w3.org/2005/Atom"', '')
679        xmlString = xmlString.replace('default:', '')
680
681        # now create elementtree with the XML string
682        logging.debug("Create elementtree instance with XML string")
683        tree = ET.fromstring(xmlString)
684       
685        title = tree.findtext('title')
686        if title:
687            logging.debug("Adding title data")
688            self.title = title
689
690        summary = tree.findtext('summary')
691        if summary:
692            self.Summary = summary#.decode('unicode_escape')
693
694        authorElement = tree.find('author')
695        logging.debug("Adding author data")
696        author = Person()
697        author.fromETElement(authorElement)
698        self.author = author
699
700        contributorElements = tree.findall('contributor')
701        for contributorElement in contributorElements:
702            logging.debug("Adding contributor data")
703            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
704            contributor.fromETElement(contributorElement)
705            self.contributors.append(contributor)
706
707        molesElement = tree.find('entity')
708        if molesElement:
709            self.ME.fromET(molesElement)
710               
711        self.atomID = tree.findtext('id')
712
713        self._parseCategoryData(tree.findall('category'))
714
715        self._parseLinksData(tree.findall('link'))
716           
717        contentTag = tree.find('content')
718        if contentTag != None:
719            logging.debug("Found content tag - checking for CSML/CDML file data")
720            file = contentTag.attrib.get('src')
721            if file:
722                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
723                if file.upper().find('CSML') > -1:
724                    logging.debug("Adding CSML file data")
725                    self.csmlFile = file
726                elif file.upper().find('CDML') > -1:
727                    logging.debug("Adding CDML file data")
728                    self.cdmlFile = file
729                self.contentFile = file
730            else:
731                logging.debug("No file data - adding contents of element instead")
732                self.Content = contentTag.text
733       
734        range = tree.findtext('temporalRange')
735        if range:
736            logging.debug("Adding temporal range data")
737            timeData = range.split('/')
738            self.t1 = timeData[0]
739            if len(timeData) > 1:
740                self.t2 = timeData[1]
741       
742        # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
743        minBBox = tree.findall('.//lowerCorner')
744        if minBBox:
745            logging.debug("Adding min spatial range data")
746            minBBox = minBBox[0]
747            spatialData = minBBox.text.split()
748            self.minX = spatialData[0]
749            if len(spatialData) > 1:
750                self.minY = spatialData[1]
751       
752        maxBBox = tree.findall('.//upperCorner')
753        if maxBBox:
754            maxBBox = maxBBox[0]
755            logging.debug("Adding max spatial range data")
756            spatialData = maxBBox.text.split()
757            self.maxX = spatialData[0]
758            if len(spatialData) > 1:
759                self.maxY = spatialData[1]
760               
761        publishedDate = tree.findtext('published')
762        if publishedDate:
763            logging.debug("Adding published date")
764            self.publishedDate = publishedDate
765               
766        updatedDate = tree.findtext('updated')
767        if updatedDate:
768            logging.debug("Adding updated date")
769            self.updatedDate = updatedDate
770           
771        logging.info("Completed data ingest")
772   
773   
774    def _parseCategoryData(self, categories):
775        logging.debug("Adding category/parameters data")
776        for category in categories:
777            cat = Category()
778            cat.fromETElement(category)
779           
780            if cat.term == self.ATOM_TYPE:
781                logging.debug("Found atom type data")
782                self.atomTypeID = cat.label
783                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
784                continue
785            elif cat.term == self.ATOM_SUBTYPE:
786                logging.debug("Found atom subtype data")
787                self.subtypeID = cat.label
788                self.subtype = cat.scheme
789                continue
790
791            self.parameters.append(cat)
792   
793
794    def setDatasetID(self, datasetID):
795        '''
796        Set the dataset ID for the atom - and generate an appropriate atom name using this
797        @param datasetID: ID to set for the atom
798        '''
799        self.datasetID = datasetID
800        self._generateAtomName(datasetID) 
801        self.atomID = self.createAtomID(datasetID)
802
803
804    def createAtomID(self, datasetID):
805        '''
806        Create a unique ID, conforming to atom standards, for atom
807        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
808        @param datasetID: ID of atom's dataset
809        @return: unique ID
810        '''
811        logging.info("Creating unique ID for atom")
812        if not self.atomBrowseURL:
813            self._generateAtomName(datasetID)
814        urlBit = self.atomBrowseURL.split('://')[1]
815        urlBit = urlBit.replace('#', '')
816        urlBits = urlBit.split('/')
817        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
818       
819        id = "tag:" + urlBits[0] + "," + dateBit + ":/" + "/".join(urlBits[1:])
820        logging.info("- unique ID created for atom")
821        logging.debug(" - '%s'" %id)
822        return id
823       
824       
825    def _generateAtomName(self, datasetID):
826        '''
827        Generate a consistent name for the atom - with full eXist doc path
828        @param datasetID: ID of atom's dataset
829        '''
830        self.atomName = datasetID + ".atom"
831        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
832        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
833
834
835    def _parseLinksData(self, links):
836        '''
837        Extract links and atom data from array of link elements in the XML representation of the atom
838        @param links: an array of <link> elements
839        '''
840        # firstly, get all data to start with, so we can properly process it afterwards
841        linkData = {}
842        logging.debug("Getting link data")
843        for linkTag in links:
844            link = Link()
845            link.fromETElement(linkTag)
846
847            if not linkData.has_key(link.rel):
848                linkData[link.rel] = []
849           
850            linkData[link.rel].append(link)
851
852        # there should be one self referencing link - which will provide info on the atom itself
853        if not linkData.has_key('self'):
854            errorMessage = "Atom does not have self referencing link - " + \
855                "cannot ascertain datasetID without this - please fix"
856            logging.error(errorMessage)
857            raise ValueError(errorMessage)
858       
859        # this is the link describing the atom itself
860        self.atomBrowseURL = linkData['self'][0].href
861       
862        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
863        self.atomName = self.datasetID + ".atom"
864        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
865       
866        # now remove this value and the associated moles doc link
867        del linkData['self']
868        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
869        if linkData.has_key('related'):
870            relatedLinks = []
871            for link in linkData['related']:
872                if link.href != molesDoc:
873                    relatedLinks.append(link)
874           
875            linkData['related'] = relatedLinks
876               
877        # now add the remaining links to the atom
878        for key in linkData:
879            for link in linkData[key]:
880                logging.debug("Adding link data")
881                self.relatedLinks.append(link)
882       
883
884    def _addSpatialData(self, element):
885        '''
886        Add spatial coverage element to an input element
887        @param element: element to add coverage data to
888        '''
889        logging.info("Adding spatial data to Atom")
890        if not self.minX:
891            logging.info("No spatial data specified")
892            return
893        bbox = ET.SubElement(element, "georss:where")
894        envelope = ET.SubElement(bbox, "gml:Envelope")
895        lc = ET.SubElement(envelope, "gml:lowerCorner")
896        lc.text = str(self.minX) + " " + str(self.minY)
897        uc = ET.SubElement(envelope, "gml:upperCorner")
898        uc.text = str(self.maxX) + " " + str(self.maxY)
899
900       
901    def setAttribute(self, attributeName, attributeValue):
902        '''
903        Set the value of an atom attribute - and do some basic tidying up of the string content
904        - to escape any XML unfriendly characters
905        @param attributeName: name of the attribute whose value to set
906        @param attributeValue: value to set the attribute to 
907        '''
908        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
909        origValue = attributeValue
910       
911        # escape any special characters if a value has been specified
912        # NB, need to cope with both single values and arrays
913        if attributeValue:
914            if type(attributeValue) is list:
915                newVals = []
916                for val in attributeValue:
917                    newVals.append(self.objectify(escapeSpecialCharacters(val), attributeName))
918                attributeValue = newVals
919                   
920            else:
921                attributeValue = self.objectify(escapeSpecialCharacters(attributeValue), attributeName)
922
923        # handle the special case of authors; only one author is allowed per atom
924        # - the others should be treated as contributors
925        if attributeName == "authors":
926            setattr(self, "author", attributeValue[0])
927            if len(attributeValue) > 1:
928                setattr(self, "contributors", attributeValue[1:])
929        elif attributeName == "atomAuthors":
930            self.ME.responsibleParties.append(attributeValue)
931        else:
932            setattr(self, attributeName, attributeValue)
933
934
935    def objectify(self, objectVals, attributeName):
936        '''
937        Some inputs are specified as strings but need to be converted into
938        objects - do this here
939        @param objectVals: a '|' delimited string of values
940        @param attributeName: name of attribute the values belong to
941        '''
942        obj = None
943        if type(objectVals) != str:
944            return objectVals
945       
946        if attributeName == "relatedLinks":
947            obj = Link()
948        elif attributeName == "atomAuthors" or attributeName == "authors":
949            obj = Person()
950
951        if obj:
952            obj.fromString(objectVals)
953            return obj
954       
955        return objectVals
956
957
958    def toPrettyXML(self):
959        '''
960        Returns nicely formatted XML as string
961        '''
962        atomXML = self.toXML()
963
964        # create the string
965        logging.debug("Converting the elementtree object into a string")
966        prettyXML = et2text(atomXML.getroot())
967
968        # add XML version tag
969        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
970        logging.info("Created formatted version of XML object")
971        return prettyXML
972
973
974    def getLinksOfType(self, termID):
975        '''
976        Returns links in the atom related links attribute which match the specified
977        term ID
978        @param termID: the termID to look for in the related links - NB, this is
979        matched to the end of the link.rel value
980        @return links: array of Link objects with matching term type
981        '''
982        logging.debug("Getting atom links of type, '%s'" %termID)
983        matchingLinks = []
984        for link in self.relatedLinks:
985            # firstly, handle special case where we only want the online ref type links
986            # returned
987            if termID == self.ONLINE_REF_LABEL:
988                if not link.isChildAtom():
989                    logging.debug("- found link with matching term type")
990                    matchingLinks.append(link)
991               
992            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
993                logging.debug("- found link with matching term type")
994                matchingLinks.append(link)
995               
996        logging.debug("Returning matched links")
997        return matchingLinks
998       
999       
1000    def getLogos(self):
1001        '''
1002        Return related links that are logos
1003        @return: array of Links containing the logos for the atom
1004        '''
1005        logos = []
1006        for link in self.relatedLinks:
1007            if link.rel.lower().endswith(VTD.LOGO_TERM.lower()):
1008                logos.append(link)
1009               
1010        return logos
1011   
1012   
1013    def isGranule(self):
1014        if self.atomTypeID == VTD.GRANULE_TERM:
1015            return True
1016        return False
1017   
1018   
1019    def isDE(self):
1020        if self.atomTypeID == VTD.DE_TERM:
1021            return True
1022        return False
1023   
1024    def isDeployment(self):
1025        if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM:
1026            return True
1027        return False
1028   
1029    def isDeployable(self):
1030        if (self.atomTypeID == VTD.ACTIVITY_TERM and self.subtypeID != VTD.DEPLOYMENT_TERM) or \
1031            self.atomTypeID == VTD.DPT_TERM or \
1032            self.atomTypeID == VTD.OBS_TERM:
1033            return True
1034        return False
1035
1036       
1037    def addCSMLData(self, csmlName, csmlContent, aggregateCoverage=False, useCSMLID=False):
1038        '''
1039        Parse CSML data and add extracted info to the atom
1040        @param csmlName: name of the csml file
1041        @param csmlContent: content of the csml file - NB, if this is set to None and the
1042        file, csmlName, is available locally, CsmlParser.Dataset will read in the file
1043        directly
1044        @keyword aggregateCoverage: if set to True, only coverage data that extends the
1045        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
1046        this should only be True if creating a new atom - e.g. from a granulite
1047        atom coverage data will be added
1048        @return csmlDoc: the CsmlParser.Dataset object with the csml data in
1049        '''
1050        logging.info("Creating CSML data model")
1051        self.csmlFile = csmlName
1052        self.contentFile = csmlName
1053        content = csmlContent or csmlName
1054        csmlDoc = CsmlParser.Dataset(file=content)
1055       
1056        logging.info("Extracting info from CSML file")
1057        logging.debug("Got dataset ID: %s" %csmlDoc.id)
1058        if useCSMLID:
1059            logging.debug(" - using this ID for the atom")
1060            self.setDatasetID(VTD.GRANULE_TERM + '_' + csmlDoc.id)
1061       
1062        title = csmlDoc.name.CONTENT
1063        logging.debug("Got dataset name (title): '%s'" %title)
1064        # NB, if a title is specified (and not as the default value), it automatically is used in
1065        # place of anything in the granulite file
1066        if title and title != "NAME OF DATASET GOES HERE":
1067            logging.info("Title, '%s', extracted from CSML file" %title)
1068            if self.title:
1069                logging.info("- NB, this will override the title specified in the granulite file ('%s')" \
1070                             %self.title)
1071            self.title = title
1072               
1073        bbox1 = csmlDoc.getBoundingBox()
1074        bbox2 = csmlDoc.getCSMLBoundingBox()
1075        time = bbox2.getTimeLimits()
1076   
1077        # now check for other parameters to add to granule
1078        # Firstly, extract the bounding envelope
1079        if bbox1:
1080            w, e = normaliseLongitude(bbox1[0],bbox1[2])
1081            n, s = (bbox1[3], bbox1[1])
1082   
1083            if not aggregateCoverage or (not self.maxY or float(n) > float(self.maxY)):
1084                self.maxY = n
1085               
1086            if not aggregateCoverage or (not self.minY or float(s) < float(self.minY)):
1087                self.minY = s
1088           
1089            if not aggregateCoverage or (not self.minX or float(w) < float(self.minX)):
1090                self.minX = w
1091   
1092            if not aggregateCoverage or (not self.maxX or float(e) > float(self.maxX)):
1093                self.maxX = e
1094           
1095            logging.debug("Got bounding box data from file: (%s, %s) , (%s, %s)" \
1096                          %(w, s, e, n))
1097           
1098            logging.debug("Updated atom bounding box data: (%s, %s) , (%s, %s)" \
1099                          %(self.minX, self.minY, self.maxX, self.maxY))
1100        else:
1101            logging.debug("No valid bounding box data found")
1102   
1103        if time:
1104            t1 = formatDateYYYYMMDD(time[0])
1105            if not aggregateCoverage or \
1106                (not self.t1 or datetime.datetime.strptime(t1, YEAR_FORMAT) < \
1107                    datetime.datetime.strptime(self.t1, YEAR_FORMAT)):
1108                self.t1 = t1
1109   
1110            t2 = time[1]
1111            if t2 and t2 != 'None':
1112                t2 = formatDateYYYYMMDD(t2)
1113                if not aggregateCoverage or \
1114                    (not self.t2 or datetime.datetime.strptime(t2, YEAR_FORMAT) > \
1115                        datetime.datetime.strptime(self.t2, YEAR_FORMAT)):
1116                    self.t2 = t2
1117           
1118            logging.debug("Got time range: %s -> %s" %(self.t1, self.t2))
1119        else:
1120            logging.debug("No valid time range data found")
1121   
1122        #create parameter summaries:
1123        #set up list to hold the parameters data
1124        parameters = []
1125        for feature in csmlDoc.featureCollection.featureMembers:
1126            if hasattr(feature.parameter, 'href'):
1127                paramTriple = ""
1128                if hasattr(feature, 'description'):
1129                    paramTriple = feature.description.CONTENT
1130                    paramTriple += " | " + feature.parameter.href
1131                   
1132                    term = ""
1133                    if hasattr(feature, 'name'):
1134                        term = feature.name.CONTENT
1135   
1136                    paramTriple += " | " + term
1137                   
1138                    logging.debug("Got parameter info: %s" %paramTriple)
1139                    parameters.append(paramTriple)
1140       
1141        # update the atom with the extracted parameters
1142        logging.info("Adding CSML parameters to granule atom")
1143        self.addParameters(parameters)
1144        logging.info("Finished adding CSML data")
1145        return csmlDoc
1146
1147
1148    def lookupDeploymentsInfo(self, dr, lookupIndirectReferences=False):
1149        '''
1150        Check through the atom links and retrieve any associated deployments
1151        data
1152        @param dr: Instance of DocumentRetrieve object - NB, this requires eXist
1153        config details which are not available to the Atom object
1154        @keyword lookupIndirectReferences: if True, the atom ID is used to search
1155        defined deployments to find those which reference it, otherwise only
1156        deployments data featured in the atom related links are processed
1157        '''
1158        logging.info("Looking up deployments info")
1159        self.deployments = []
1160        self.allActivities = []
1161        self.allObs = []
1162        self.allDpts = []
1163
1164        if lookupIndirectReferences:
1165            logging.info("Looking up references to this atom from other deployments")
1166            doc = dr.get(self.ME.providerID, dr.ATOM_DEPLOYMENTS, self.atomBrowseURL, \
1167                                        targetCollection='/db/atoms/')
1168            # now need to turn this results set into actual atoms
1169            tree = ET.fromstring(doc)
1170            links = []
1171            for atom in tree:
1172                logging.debug("- found reference in deployment")
1173                links.append(ET.tostring(atom))
1174           
1175        else:
1176            links = self.getLinksOfType(self.VTD.DEPLOYMENT_TERM)
1177
1178        for link in links:
1179            if lookupIndirectReferences:
1180                deploymentAtom = link
1181            else:
1182                localID = link.href.split("__ATOM__")[-1]
1183                deploymentAtom = dr.get(self.ME.providerID, 'ATOM', localID, \
1184                                        targetCollection='/db/atoms/')
1185
1186            deployment = Deployment.Deployment(Atom(xmlString=str(deploymentAtom)))
1187            self.deployments.append(deployment)
1188           
1189            self.addUniqueLinks(self.allActivities, deployment.activities)
1190            self.addUniqueLinks(self.allObs, deployment.obs)
1191            self.addUniqueLinks(self.allDpts, deployment.dpts)
1192       
1193        logging.info("Finished looking up deployments info")
1194
1195
1196    def addUniqueLinks(self, dataArray, links):
1197        '''
1198        Add links to specified array - if they are not already included
1199        @param dataArray: a list, potentially arlready containing links
1200        @param links: a Link or array of Links to add to the dataArray
1201        '''
1202        logging.debug("Adding new links")
1203        if not links:
1204            return
1205       
1206        if type(links) is not list:
1207            links = [links]
1208       
1209        for link in links:
1210            if type(link) is not Link:
1211                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
1212                continue
1213            if link not in dataArray:
1214                logging.debug("- adding unique link")
1215                dataArray.append(link)
1216        logging.debug("Finished adding links")
Note: See TracBrowser for help on using the repository browser.