source: exist/trunk/python/ndgUtils/models/Atom.py @ 4490

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4490
Revision 4490, 43.9 KB checked in by cbyrom, 12 years ago (diff)

Add methods to the eXist DB client to ingest the required atom xsd docs + to allow validation of atoms against these schemae. Add ability to validate docs already in eXist and also to temporarily ingest them to allow validation.

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
16from ndgUtils.eXistConnector import eXistConnector
17from ndgUtils.ETxmlView import et2text
18from ndgUtils.lib.utilities import getTripleData, escapeSpecialCharacters, \
19    tidyUpParameters, getISO8601Date, normaliseLongitude
20from ndgUtils.vocabtermdata import VocabTermData as VTD
21from ndgUtils.models import MolesEntity as ME
22import csml.parser as CsmlParser
23from ndgUtils.models import Deployment as Deployment
24
25
26class AtomError(Exception):
27    """
28    Exception handling for Atom class.
29    """
30    def __init__(self, msg):
31        logging.error(msg)
32        Exception.__init__(self, msg)
33
34
35class Person(object):
36    '''
37    Class representing atom author type data - with name, uri and role attributes
38    '''
39    AUTHOR_TYPE = 0
40    CONTRIBUTOR_TYPE = 1
41    RESPONSIBLE_PARTY_TYPE = 2
42    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
43   
44    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
45        self.type = personType
46        self.ns = namespace
47        self.name = ""
48        self.uri = ""
49        self.role = ""
50       
51        # NB, the atom format specifies slightly different data contents
52        self.uriTagName = "email"
53        # NB, responsible party data is always stored in the moles section
54        if self.type == self.RESPONSIBLE_PARTY_TYPE:
55            self.ns = 'moles'
56            self.uriTagName = "uri"
57
58    def __str__(self):
59        if self.name or self.uri or self.role:
60            return self.name + " | " + self.uri + " | " + self.role
61        return ""
62   
63    def fromString(self, personString):
64        (self.name, self.uri, self.role) = getTripleData(personString)
65       
66    def fromETElement(self, personTag):
67        self.name = personTag.findtext('name') or ""
68        self.role = personTag.findtext('role') or ""
69        self.uri = personTag.findtext(self.uriTagName) or ""
70        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
71                      %(self.name, self.role, self.uriTagName, self.uri))
72
73    def toXML(self):
74        prefix = ""
75        if self.ns:
76            prefix = self.ns + ':'
77
78        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
79
80        if self.name:
81            name = ET.SubElement(author, prefix + "name")
82            name.text = self.name
83       
84        if self.uri:
85            uri = ET.SubElement(author, prefix + self.uriTagName)
86            uri.text = self.uri
87       
88        if self.role:
89            role = ET.SubElement(author, prefix + "role")
90            role.text = self.role
91
92        return author
93   
94    def __cmp__(self, person1):
95        '''
96        Override comparison to allow proper object comparison when checking
97        if Person objects are in an array already - i.e. if person in personArray...
98        '''
99        if not person1:
100            return -1
101       
102        if self is person1:
103            return 0
104        elif self.uri == person1.uri and self.name == person1.name and \
105                self.role == person1.role and self.type == person1.type:
106            return 0
107        return 1
108
109
110class Link(object):
111    '''
112    Class representing an atom link - with href, title and rel attributes
113    '''
114
115    def __init__(self):
116        self.href = ""
117        self.title = ""
118        self.rel = ""
119
120    def fromString(self, linkString):
121        (self.href, self.title, self.rel) = getTripleData(linkString)
122       
123    def fromETElement(self, linkTag):
124        self.href = linkTag.attrib.get('href') or ""
125        self.rel = linkTag.attrib.get('rel') or ""
126        self.title = linkTag.attrib.get('title') or ""
127
128    def toXML(self):
129        link = ET.Element("link")
130        link.attrib["href"] = self.href
131        link.attrib["title"] = self.title
132        link.attrib["rel"] = self.rel
133        return link
134
135    def hasValue(self):
136        # NB, just a rel on its own is meaningless - so ignore
137        if self.href or self.title:
138            return True
139        return False
140   
141    def __str__(self):
142        if self.href or self.title or self.rel:
143            return self.href + " | " + self.title + " | " + self.rel
144        return ""
145   
146    def isChildAtom(self):
147        '''
148        Determines whether the link refers to another atom - e.g. a link to
149        a data granule
150        @return True, if so; False otherwise
151        '''
152        if self.rel.endswith(VTD.GRANULE_TERM) or \
153            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
154            self.rel.endswith(VTD.ACTIVITY_TERM) or \
155            self.rel.endswith(VTD.DPT_TERM) or \
156            self.rel.endswith(VTD.OBS_TERM):
157            return True
158       
159        return False
160   
161    def __cmp__(self, link1):
162        '''
163        Override comparison to allow proper object comparison when checking
164        if Link objects are in an array already - i.e. if link in linkArray...
165        '''
166        if not link1:
167            return -1
168       
169        if self is link1:
170            return 0
171        elif self.href == link1.href and self.title == link1.title and \
172                self.rel == link1.rel:
173            return 0
174        return 1
175
176
177class Category(object):
178    '''
179    Class representing an atom category - with term, scheme and label attributes
180    '''
181    def __init__(self):
182        self.term = ""
183        self.scheme = ""
184        self.label = ""
185
186    def fromString(self, linkString, escapeSpecialCharacters=True):
187        '''
188        Create Category from triple string of format, 'label | scheme | term'
189        @param linkString: triple string to create category with
190        @keyword escapeSpecialCharacters: if set to True, special characters in
191        triple string are escaped (default)
192        '''
193        (self.label, self.scheme, self.term) = getTripleData(linkString, \
194            doEscape=escapeSpecialCharacters)
195       
196    def fromETElement(self, linkTag):
197        self.term = linkTag.attrib.get('term') or ""
198        self.label = linkTag.attrib.get('label') or ""
199        self.scheme = linkTag.attrib.get('scheme') or ""
200
201    def toXML(self):
202        link = ET.Element("category")
203        link.attrib["term"] = self.term
204        link.attrib["scheme"] = self.scheme
205        link.attrib["label"] = self.label
206        return link
207   
208    def hasValue(self):
209        if self.scheme or self.label or self.term:
210            return True
211        return False
212
213
214class Atom(object):
215
216    # labels for use with the atom categories
217    ATOM_TYPE = "ATOM_TYPE"
218    ATOM_SUBTYPE = "ATOM_SUBTYPE"
219
220    # labels for use with the templates to set/extract specific inputs
221    ONLINE_REF_LABEL = "online_ref"
222    PARAMETER_LABEL = "parameter"
223    ATOM_REF_LABEL = "atom_ref"
224    DELIMITER = "---"
225    REMOVE_LABEL = "remove"
226   
227    # format to use for t1-t2 date range
228    YEAR_FORMAT = '%Y-%m-%d'
229
230    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
231                 xmlString = None, state = eXistConnector.WORKING_COLLECTION_PATH, **inputs):
232        '''
233        Constructor - initialise the atom variables
234        '''
235        logging.info("Initialising atom")
236        if atomType:
237            logging.info(" - of type '%s'" %atomType)
238        self.atomTypeID = atomType
239
240        # some data have further subtypes specified
241        self.subtypeID = None # this should be the termID
242        self.subtype = None # and this should be the fully formed vocab URL
243       
244        self.ndgObject = ndgObject
245
246        self.atomName = None
247        self.files = []
248        self.author = None
249        self.contributors = []
250        self.atomAuthors = []
251        self.parameters = []
252        self.spatialData = []
253        self.temporalData = []
254        self.relatedLinks = []
255        self.summary = []
256        self.content = []
257        # NB, this deployments data duplicates other atom data - and is only used for a
258        # convenient way to collect the info (by lookupDeploymentsInfo()) for use in templates
259        self.deployments = []   
260        self.csmlFile = None
261        self.cdmlFile = None
262        # general variable to use for setting the atom content - NB, if a csmlFile is specified
263        # (either directly or via a cdmlFile specification), this will be the content by default
264        # for this purpose
265        self.contentFile = None     
266        self.title = None
267        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
268        self.atomID = None
269   
270        # boundary box info - to replace spatial/temporalData?
271        self.minX = None
272        self.minY = None
273        self.maxX = None
274        self.maxY = None
275        self.t1 = None
276        self.t2 = None
277
278        self.ME = ME.MolesEntity()
279       
280        # date when the atom was first ingested
281        self.publishedDate = None
282
283        # last update date
284        self.updatedDate = None
285
286        # assume atom in working state by default - this is used to define what collection
287        # in eXist the atom is stored in
288        self.state = state
289       
290        # additional, non standard atom data can be included in the molesExtra element
291        if vocabTermData:
292            self.VTD = vocabTermData
293        else:
294            self.VTD = VTD()
295       
296        if xmlString:
297            self.fromString(xmlString)
298
299        # if inputs passed in as dict, add these now
300        if inputs:
301            logging.info("Adding info to atom from input dict")
302            logging.debug(inputs)
303            self.__dict__.update(inputs)
304           
305            # NB, this doesn't trigger the Summary Property, so do this
306            # explicitly, if need be
307            if inputs.has_key('Summary'):
308                self.Summary = inputs.get('Summary')
309            if inputs.has_key('Content'):
310                self.Content = inputs.get('Content')
311           
312            # also pass any moles data up to the moles entity object
313            if inputs.get('providerID'):
314                self.ME.providerID = inputs.get('providerID')
315               
316            if inputs.get('abbreviation'):
317                self.ME.abbreviation = inputs.get('abbreviation')
318
319        if self.atomTypeID:
320            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
321
322        logging.info("Atom initialised")
323
324
325    def addOnlineReferences(self, links):
326        '''
327        Add online reference data associated with the atom
328        - NB, care needs to be taken here since this data is stored in the atom
329        link elements and these are also used for the various atom associations
330        @param links: a Link or array of Links to add to the relatedLinks attribute
331        '''
332        logging.debug("Adding online references")
333        if not links:
334            return
335       
336        if type(links) is not list:
337            links = [links]
338       
339        # firstly clear out any online refs data from the existing related links
340        newLinks = []
341        for link in self.relatedLinks:
342            if link.isChildAtom():
343                newLinks.append(link)
344       
345        newLinks.extend(links)
346        self.relatedLinks = newLinks
347        logging.debug("Online references added")
348
349
350    def addUniqueRelatedLinks(self, links):
351        '''
352        Add links to relatedLinks array - if they are not already included
353        @param links: a Link or array of Links to add to the relatedLinks attribute
354        '''
355        self.addUniqueLinks(self.relatedLinks, links)
356       
357
358    def removeRelatedLinks(self, linksToDelete):
359        '''
360        Remove any links in the input list from the atom's related links list
361        @param linksToDelete: array of Link objects to remove from atom
362        '''
363        logging.debug("Removing related links from atom")
364        if not linksToDelete:
365            return
366       
367        if type(linksToDelete) is not list:
368            linksToDelete = [linksToDelete]
369       
370        updatedLinks = []
371        for link in self.relatedLinks:
372            if type(link) is not Link:
373                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
374                continue
375            if link in linksToDelete:
376                logging.debug("- found link to remove")
377            else:
378                updatedLinks.append(link)
379
380        self.relatedLinks = updatedLinks
381        logging.debug("Links removed")
382       
383
384    def getDefaultCollectionPath(self):
385        '''
386        Determine the correct collection to use for the atom in eXist
387        '''
388        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state
389       
390        if self.atomTypeID == VTD.DE_TERM:
391            collectionPath += eXistConnector.DE_COLLECTION_PATH
392        elif self.atomTypeID == VTD.GRANULE_TERM:
393            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
394        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
395            self.subtypeID == VTD.DEPLOYMENT_TERM:
396            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
397        else:
398            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
399       
400        if not self.ME.providerID:
401            raise AtomError("Error: cannot determine atom collection path because " + \
402                            "the provider ID is not defined")
403           
404        collectionPath += self.ME.providerID + "/"
405        return collectionPath
406
407
408    def __addAtomTypeDataXML(self, root):
409        '''
410        Add the atom type, and subtype data, if available, to atom categories
411        - and lookup and add the appropriate vocab term data
412        '''
413        if self.atomTypeID:
414            logging.info("Adding atom type info to XML output")
415            category = Category()
416            category.label = self.atomTypeID
417            # look up the appropriate vocab term data
418            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
419            category.term = self.ATOM_TYPE
420            root.append(category.toXML())
421
422        if self.subtypeID:
423            logging.info("Adding atom subtype info to XML output")
424            # NB subtypes not all defined, so leave this out for the moment
425            category.label = self.subtypeID
426            # look up the appropriate vocab term data
427            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
428            category.term = self.ATOM_SUBTYPE
429            root.append(category.toXML())
430
431
432    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
433        '''
434        Add data to include in the moles entity element
435        '''
436        logging.debug('Adding moles entity information')
437        self.ME.abbreviation = abbreviation
438        self.ME.providerID = provider_id
439        self.ME.createdDate = getISO8601Date(object_creation_time)
440        logging.debug('Moles entity information added')
441
442
443    def addAuthors(self, authors):
444        '''
445        Add author data appropriately to the atom
446        NB, these will overwrite any existing authors of the same type
447        @param authors: list of Person objects with the author data
448        '''
449        logging.debug('Adding authors data to Atom')
450        isFirstAuthor = {}
451        authorArray = None
452        for author in authors:
453            # NB, we're only allowed one atom author
454            if author.type == Person.AUTHOR_TYPE:
455                self.author = author
456                if isFirstAuthor.has_key(author.type):
457                    raise AtomError("Error: an atom can only have one author specified")
458                isFirstAuthor[author.type] = 1
459                continue
460            elif author.type == Person.CONTRIBUTOR_TYPE:
461                authorArray = self.contributors
462            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
463                authorArray = self.ME.responsibleParties
464               
465            # check if this is the first addition - if so, clear out the
466            # array in advance
467            if not isFirstAuthor.has_key(author.type):
468                logging.debug("Clearing out author array")
469                # NB, need to be careful to clear the array, not create a ref
470                # to a new array
471                del authorArray[:]
472                isFirstAuthor[author.type] = 1
473
474            if str(author) != "" and author not in authorArray:
475                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
476                              %(author.type, author.name, author.uri, author.role))
477                authorArray.append(author)
478
479        logging.debug('Finished adding authors data')
480
481
482    def _isNewParameter(self, param):
483        '''
484        Check if a parameter is already specified in the atom, return False if
485        so, otherwise return True
486        '''
487        for p in self.parameters:
488            if p.term == param.term and \
489                p.scheme == param.scheme and \
490                p.label == param.label:
491                return False
492        return True
493
494
495    def addRelatedLinks(self, linkVals):
496        '''
497        Add related links in string format - converting to Link objects
498        @param linkVals: string of format, 'uri | title | vocabServerURL'
499        '''
500        self.relatedLinks.append(self.objectify(linkVals, 'relatedLinks'))
501
502
503    def addParameters(self, params):
504        '''
505        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
506        @params param: parameter, as string array, to add to atom parameters collection
507        '''
508        # avoid strings being parsed character by character
509        if type(params) is str:
510            params = [params]
511           
512        for param in params:
513            # firstly tidy parameter
514            param = tidyUpParameters(param)
515            category = Category()
516            # NB, data already tidied up here, so set keyword to avoid this happening again
517            category.fromString(param, escapeSpecialCharacters=False)
518
519            # now check for uniqueness
520            if self._isNewParameter(category):
521                logging.debug("Adding new parameter: %s" %param)
522                self.parameters.append(category)
523   
524   
525    def _linksToXML(self, root):
526        '''
527        Add required links to the input element
528        @param root: element to add links to - NB, should be the root element of the atom
529        '''
530        selfLink = ET.SubElement(root, "link")
531        selfLink.attrib["href"] = self.atomBrowseURL
532        selfLink.attrib["rel"] = "self"
533        if self.subtypeID != VTD.DEPLOYMENT_TERM:
534            molesLink = ET.SubElement(root, "link")
535            molesDoc = re.sub('ATOM','NDG-B1', self.atomBrowseURL)
536            molesLink.attrib["href"] = molesDoc
537            molesLink.attrib["rel"] = 'related'
538       
539        for relatedLink in self.relatedLinks:
540            if relatedLink.hasValue():
541                import pdb
542                pdb.set_trace()
543                root.append(relatedLink.toXML())
544   
545    def toXML(self):
546        '''
547        Convert the atom into XML representation and return this
548        @return: xml version of atom
549        '''
550        logging.info("Creating formatted XML version of Atom")
551        root = ET.Element("entry")
552        root.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
553        root.attrib["xmlns:moles"] = "http://ndg.nerc.ac.uk/schema/moles2beta"
554        root.attrib["xmlns:georss"] = "http://www.georss.org/georss/10"
555        root.attrib["xmlns:gml"] = "http://www.opengis.net/gml"
556        id = ET.SubElement(root, "id")
557        id.text = self.atomID
558        title = ET.SubElement(root, "title")
559        title.text = self.title
560        self._linksToXML(root)
561
562        # NB, the author tag is mandatory for atoms - so if an explicit
563        # author has not been set, just take the author to be the provider
564        if not self.author:
565            author = Person()
566            author.name = self.ME.providerID
567            #author.uri = self.ME.providerID
568            self.author = author
569
570        root.append(self.author.toXML())
571           
572        for contributor in self.contributors:
573            root.append(contributor.toXML())
574
575        # add the moles entity section, if it is required
576        if self.ME:
577            root.append(self.ME.toXML())
578
579        # add parameters data
580        for param in self.parameters:
581            if param.hasValue():
582                root.append(param.toXML())
583
584        # add the type and subtype data
585        self.__addAtomTypeDataXML(root)
586                   
587        summary = ET.SubElement(root, "summary")
588        summary.text = self.Summary
589                   
590        # add link to content, if required - NB, can only have one content element in atom
591        # - and this is mandatory
592        content = ET.SubElement(root, "content")
593        if self.contentFile:
594            content.attrib["type"] = "application/xml"
595            content.attrib["src"] = self.contentFile
596        else:
597            content.text = self.Content
598            content.attrib["type"] = "xhtml"
599       
600        # if there's a published date already defined, assume we're doing an update now
601        # NB, update element is mandatory
602        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
603        if not self.publishedDate:
604            self.publishedDate = currentDate
605
606        updated = ET.SubElement(root, "updated")
607        if not self.updatedDate:
608            self.updatedDate = currentDate
609        updated.text = self.updatedDate
610
611        published = ET.SubElement(root, "published")
612        published.text = self.publishedDate
613
614        # add temporal range data, if available
615        temporalRange = ET.SubElement(root, "moles:temporalRange")
616        if self.t1:
617            temporalRange.text = self.t1
618            if self.t2:
619                temporalRange.text += "/" + self.t2
620
621        # add spatial range data, if available
622        self._addSpatialData(root)
623
624        tree = ET.ElementTree(root)
625        logging.info("XML version of Atom created")
626        return tree
627
628
629    def __getSummary(self):
630        logging.debug("Getting summary data")
631        summaryString = ""
632        for summary_line in self.summary:
633            summaryString += summary_line + "\n"
634
635        return summaryString
636
637    def __setSummary(self, summary):
638        logging.debug("Adding summary data")
639        self.summary = []
640        for summary_line in summary.split('\n'):
641            self.summary.append(escapeSpecialCharacters(summary_line))
642           
643    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
644
645
646    def __getContent(self):
647        logging.debug("Getting content data")
648        contentString = ""
649        # NB, there must be content specified in an atom
650        if not self.content:
651            return "Metadata document"
652       
653        for content_line in self.content:
654            contentString += content_line + "\n"
655
656        return contentString
657
658    def __setContent(self, content):
659        logging.debug("Adding content data")
660        self.content = []
661        for content_line in content.split('\n'):
662            self.content.append(content_line)
663           
664    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
665
666           
667    def fromString(self, xmlString):
668        '''
669        Initialise Atom object using an xmlString
670        @param xmlString: representation of atom as an XML string
671        '''
672        logging.info("Ingesting data from XML string")
673       
674        # firstly, remove any namespaces used - to avoid problems with elementtree
675        logging.debug("Stripping moles namespace from string to allow easy handling with elementtree")
676        xmlString = xmlString.replace('moles:', '')
677        xmlString = xmlString.replace('georss:', '')
678        xmlString = xmlString.replace('gml:', '')
679        xmlString = xmlString.replace('xmlns="http://www.w3.org/2005/Atom"', '')
680        xmlString = xmlString.replace('default:', '')
681
682        # now create elementtree with the XML string
683        logging.debug("Create elementtree instance with XML string")
684        tree = ET.fromstring(xmlString)
685       
686        title = tree.findtext('title')
687        if title:
688            logging.debug("Adding title data")
689            self.title = title
690
691        summary = tree.findtext('summary')
692        if summary:
693            self.Summary = summary#.decode('unicode_escape')
694
695        authorElement = tree.find('author')
696        logging.debug("Adding author data")
697        author = Person()
698        author.fromETElement(authorElement)
699        self.author = author
700
701        contributorElements = tree.findall('contributor')
702        for contributorElement in contributorElements:
703            logging.debug("Adding contributor data")
704            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
705            contributor.fromETElement(contributorElement)
706            self.contributors.append(contributor)
707
708        molesElement = tree.find('entity')
709        if molesElement:
710            self.ME.fromET(molesElement)
711               
712        self.atomID = tree.findtext('id')
713
714        self._parseCategoryData(tree.findall('category'))
715
716        self._parseLinksData(tree.findall('link'))
717           
718        contentTag = tree.find('content')
719        if contentTag != None:
720            logging.debug("Found content tag - checking for CSML/CDML file data")
721            file = contentTag.attrib.get('src')
722            if file:
723                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
724                if file.upper().find('CSML') > -1:
725                    logging.debug("Adding CSML file data")
726                    self.csmlFile = file
727                elif file.upper().find('CDML') > -1:
728                    logging.debug("Adding CDML file data")
729                    self.cdmlFile = file
730                self.contentFile = file
731            else:
732                logging.debug("No file data - adding contents of element instead")
733                self.Content = contentTag.text
734       
735        range = tree.findtext('temporalRange')
736        if range:
737            logging.debug("Adding temporal range data")
738            timeData = range.split('/')
739            self.t1 = timeData[0]
740            if len(timeData) > 1:
741                self.t2 = timeData[1]
742       
743        # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
744        minBBox = tree.findall('.//lowerCorner')
745        if minBBox:
746            logging.debug("Adding min spatial range data")
747            minBBox = minBBox[0]
748            spatialData = minBBox.text.split()
749            self.minX = spatialData[0]
750            if len(spatialData) > 1:
751                self.minY = spatialData[1]
752       
753        maxBBox = tree.findall('.//upperCorner')
754        if maxBBox:
755            maxBBox = maxBBox[0]
756            logging.debug("Adding max spatial range data")
757            spatialData = maxBBox.text.split()
758            self.maxX = spatialData[0]
759            if len(spatialData) > 1:
760                self.maxY = spatialData[1]
761               
762        publishedDate = tree.findtext('published')
763        if publishedDate:
764            logging.debug("Adding published date")
765            self.publishedDate = publishedDate
766               
767        updatedDate = tree.findtext('updated')
768        if updatedDate:
769            logging.debug("Adding updated date")
770            self.updatedDate = updatedDate
771           
772        logging.info("Completed data ingest")
773   
774   
775    def _parseCategoryData(self, categories):
776        logging.debug("Adding category/parameters data")
777        for category in categories:
778            cat = Category()
779            cat.fromETElement(category)
780           
781            if cat.term == self.ATOM_TYPE:
782                logging.debug("Found atom type data")
783                self.atomTypeID = cat.label
784                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
785                continue
786            elif cat.term == self.ATOM_SUBTYPE:
787                logging.debug("Found atom subtype data")
788                self.subtypeID = cat.label
789                self.subtype = cat.scheme
790                continue
791
792            self.parameters.append(cat)
793   
794
795    def setDatasetID(self, datasetID):
796        '''
797        Set the dataset ID for the atom - and generate an appropriate atom name using this
798        @param datasetID: ID to set for the atom
799        '''
800        self.datasetID = datasetID
801        self._generateAtomName(datasetID) 
802        self.atomID = self.createAtomID(datasetID)
803
804
805    def createAtomID(self, datasetID):
806        '''
807        Create a unique ID, conforming to atom standards, for atom
808        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
809        @param datasetID: ID of atom's dataset
810        @return: unique ID
811        '''
812        logging.info("Creating unique ID for atom")
813        if not self.atomBrowseURL:
814            self._generateAtomName(datasetID)
815        urlBit = self.atomBrowseURL.split('://')[1]
816        urlBit = urlBit.replace('#', '')
817        urlBits = urlBit.split('/')
818        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
819       
820        id = "tag:" + urlBits[0] + "," + dateBit + ":/" + "/".join(urlBits[1:])
821        logging.info("- unique ID created for atom")
822        logging.debug(" - '%s'" %id)
823        return id
824       
825       
826    def _generateAtomName(self, datasetID):
827        '''
828        Generate a consistent name for the atom - with full eXist doc path
829        @param datasetID: ID of atom's dataset
830        '''
831        self.atomName = datasetID + ".atom"
832        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
833        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
834
835
836    def _parseLinksData(self, links):
837        '''
838        Extract links and atom data from array of link elements in the XML representation of the atom
839        @param links: an array of <link> elements
840        '''
841        # firstly, get all data to start with, so we can properly process it afterwards
842        linkData = {}
843        logging.debug("Getting link data")
844        for linkTag in links:
845            link = Link()
846            link.fromETElement(linkTag)
847
848            if not linkData.has_key(link.rel):
849                linkData[link.rel] = []
850           
851            linkData[link.rel].append(link)
852
853        # there should be one self referencing link - which will provide info on the atom itself
854        if not linkData.has_key('self'):
855            errorMessage = "Atom does not have self referencing link - " + \
856                "cannot ascertain datasetID without this - please fix"
857            logging.error(errorMessage)
858            raise ValueError(errorMessage)
859       
860        # this is the link describing the atom itself
861        self.atomBrowseURL = linkData['self'][0].href
862       
863        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
864        self.atomName = self.datasetID + ".atom"
865        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
866       
867        # now remove this value and the associated moles doc link
868        del linkData['self']
869        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
870        if linkData.has_key('related'):
871            relatedLinks = []
872            for link in linkData['related']:
873                if link.href != molesDoc:
874                    relatedLinks.append(link)
875           
876            linkData['related'] = relatedLinks
877               
878        # now add the remaining links to the atom
879        for key in linkData:
880            for link in linkData[key]:
881                logging.debug("Adding link data")
882                self.relatedLinks.append(link)
883       
884
885    def _addSpatialData(self, element):
886        '''
887        Add spatial coverage element to an input element
888        @param element: element to add coverage data to
889        '''
890        logging.info("Adding spatial data to Atom")
891        if not self.minX:
892            logging.info("No spatial data specified")
893            return
894        bbox = ET.SubElement(element, "georss:where")
895        envelope = ET.SubElement(bbox, "gml:Envelope")
896        lc = ET.SubElement(envelope, "gml:lowerCorner")
897        lc.text = self.minX + " " + self.minY
898        uc = ET.SubElement(envelope, "gml:upperCorner")
899        uc.text = self.maxX + " " + self.maxY
900
901       
902    def setAttribute(self, attributeName, attributeValue):
903        '''
904        Set the value of an atom attribute - and do some basic tidying up of the string content
905        - to escape any XML unfriendly characters
906        @param attributeName: name of the attribute whose value to set
907        @param attributeValue: value to set the attribute to 
908        '''
909        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
910        origValue = attributeValue
911       
912        # escape any special characters if a value has been specified
913        # NB, need to cope with both single values and arrays
914        if attributeValue:
915            if type(attributeValue) is list:
916                newVals = []
917                for val in attributeValue:
918                    newVals.append(self.objectify(escapeSpecialCharacters(val), attributeName))
919                attributeValue = newVals
920                   
921            else:
922                attributeValue = self.objectify(escapeSpecialCharacters(attributeValue), attributeName)
923
924        # handle the special case of authors; only one author is allowed per atom
925        # - the others should be treated as contributors
926        if attributeName == "authors":
927            setattr(self, "author", attributeValue[0])
928            if len(attributeValue) > 1:
929                setattr(self, "contributors", attributeValue[1:])
930        elif attributeName == "atomAuthors":
931            self.ME.responsibleParties.append(attributeValue)
932        else:
933            setattr(self, attributeName, attributeValue)
934
935
936    def objectify(self, objectVals, attributeName):
937        '''
938        Some inputs are specified as strings but need to be converted into
939        objects - do this here
940        @param objectVals: a '|' delimited string of values
941        @param attributeName: name of attribute the values belong to
942        '''
943        obj = None
944        if type(objectVals) != str:
945            return objectVals
946       
947        if attributeName == "relatedLinks":
948            obj = Link()
949        elif attributeName == "atomAuthors" or attributeName == "authors":
950            obj = Person()
951
952        if obj:
953            obj.fromString(objectVals)
954            return obj
955       
956        return objectVals
957
958
959    def toPrettyXML(self):
960        '''
961        Returns nicely formatted XML as string
962        '''
963        atomXML = self.toXML()
964
965        # create the string
966        logging.debug("Converting the elementtree object into a string")
967        prettyXML = et2text(atomXML.getroot())
968
969        # add XML version tag
970        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
971        logging.info("Created formatted version of XML object")
972        return prettyXML
973
974
975    def getLinksOfType(self, termID):
976        '''
977        Returns links in the atom related links attribute which match the specified
978        term ID
979        @param termID: the termID to look for in the related links - NB, this is
980        matched to the end of the link.rel value
981        @return links: array of Link objects with matching term type
982        '''
983        logging.debug("Getting atom links of type, '%s'" %termID)
984        matchingLinks = []
985        for link in self.relatedLinks:
986            # firstly, handle special case where we only want the online ref type links
987            # returned
988            if termID == self.ONLINE_REF_LABEL:
989                if not link.isChildAtom():
990                    logging.debug("- found link with matching term type")
991                    matchingLinks.append(link)
992               
993            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
994                logging.debug("- found link with matching term type")
995                matchingLinks.append(link)
996               
997        logging.debug("Returning matched links")
998        return matchingLinks
999       
1000       
1001    def getLogos(self):
1002        '''
1003        Return related links that are logos
1004        @return: array of Links containing the logos for the atom
1005        '''
1006        logos = []
1007        for link in self.relatedLinks:
1008            if link.rel.lower().endswith(VTD.LOGO_TERM.lower()):
1009                logos.append(link)
1010               
1011        return logos
1012   
1013   
1014    def isGranule(self):
1015        if self.atomTypeID == VTD.GRANULE_TERM:
1016            return True
1017        return False
1018   
1019   
1020    def isDE(self):
1021        if self.atomTypeID == VTD.DE_TERM:
1022            return True
1023        return False
1024   
1025    def isDeployment(self):
1026        if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM:
1027            return True
1028        return False
1029   
1030    def isDeployable(self):
1031        if (self.atomTypeID == VTD.ACTIVITY_TERM and self.subtypeID != VTD.DEPLOYMENT_TERM) or \
1032            self.atomTypeID == VTD.DPT_TERM or \
1033            self.atomTypeID == VTD.OBS_TERM:
1034            return True
1035        return False
1036
1037       
1038    def addCSMLData(self, csml, aggregateCoverage=False):
1039        '''
1040        Parse CSML data and add extracted info to the atom
1041        @param csml: csml file contents - or path to csml file
1042        @keyword aggregateCoverage: if set to True, only coverage data that extends the
1043        atom coverage data will be added
1044        '''
1045        logging.info("Creating CSML data model")
1046        self.csmlFile = csml.filename
1047        csmlDoc = CsmlParser.Dataset(file=csml.value)
1048       
1049        logging.info("Extracting info from CSML file")
1050        logging.debug("Got dataset ID: %s" %csmlDoc.id)
1051        self.setDatasetID(csmlDoc.id)
1052       
1053        title = csmlDoc.name.CONTENT
1054        logging.debug("Got dataset name (title): '%s'" %title)
1055        # NB, if a title is specified (and not as the default value), it automatically is used in
1056        # place of anything in the granulite file
1057        if title and title != "NAME OF DATASET GOES HERE":
1058            logging.info("Title, '%s', extracted from CSML file" %title)
1059            if self.title:
1060                logging.info("- NB, this will override the title specified in the granulite file ('%s')" \
1061                             %self.title)
1062            self.title = title
1063               
1064        bbox1 = csmlDoc.getBoundingBox()
1065        bbox2 = csmlDoc.getCSMLBoundingBox()
1066        time = bbox2.getTimeLimits()
1067   
1068        # now check for other parameters to add to granule
1069        # Firstly, extract the bounding envelope
1070        if bbox1:
1071            w, e = normaliseLongitude(bbox1[0],bbox1[2])
1072            n, s = (bbox1[3], bbox1[1])
1073   
1074            if not aggregateCoverage or (not self.maxY or float(n) > float(self.maxY)):
1075                self.maxY = n
1076               
1077            if not aggregateCoverage or (not self.minY or float(s) < float(self.minY)):
1078                self.minY = s
1079           
1080            if not aggregateCoverage or (not self.minX or float(w) < float(self.minX)):
1081                self.minX = w
1082   
1083            if not aggregateCoverage or (not self.maxX or float(e) > float(self.maxX)):
1084                self.maxX = e
1085           
1086            logging.debug("Got bounding box data from file: (%s, %s) , (%s, %s)" \
1087                          %(w, s, e, n))
1088           
1089            logging.debug("Updated atom bounding box data: (%s, %s) , (%s, %s)" \
1090                          %(self.minX, self.minY, self.maxX, self.maxY))
1091        else:
1092            logging.debug("No valid bounding box data found")
1093   
1094        if time:
1095            t1 = formatDateYYYYMMDD(time[0])
1096            if not aggregateCoverage or \
1097                (not self.t1 or datetime.datetime.strptime(t1, YEAR_FORMAT) < \
1098                    datetime.datetime.strptime(self.t1, YEAR_FORMAT)):
1099                self.t1 = t1
1100   
1101            t2 = time[1]
1102            if t2 and t2 != 'None':
1103                t2 = formatDateYYYYMMDD(t2)
1104                if not aggregateCoverage or \
1105                    (not self.t2 or datetime.datetime.strptime(t2, YEAR_FORMAT) > \
1106                        datetime.datetime.strptime(self.t2, YEAR_FORMAT)):
1107                    self.t2 = t2
1108           
1109            logging.debug("Got time range: %s -> %s" %(self.t1, self.t2))
1110        else:
1111            logging.debug("No valid time range data found")
1112   
1113        #create parameter summaries:
1114        #set up list to hold the parameters data
1115        parameters = []
1116        for feature in csmlDoc.featureCollection.featureMembers:
1117            if hasattr(feature.parameter, 'href'):
1118                paramTriple = ""
1119                if hasattr(feature, 'description'):
1120                    paramTriple = feature.description.CONTENT
1121                    paramTriple += " | " + feature.parameter.href
1122                   
1123                    term = ""
1124                    if hasattr(feature, 'name'):
1125                        term = feature.name.CONTENT
1126   
1127                    paramTriple += " | " + term
1128                   
1129                    logging.debug("Got parameter info: %s" %paramTriple)
1130                    parameters.append(paramTriple)
1131       
1132        # update the atom with the extracted parameters
1133        logging.info("Adding CSML parameters to granule atom")
1134        self.addParameters(parameters)
1135        logging.info("Finished adding CSML data")
1136
1137
1138    def lookupDeploymentsInfo(self, dr, lookupIndirectReferences=False):
1139        '''
1140        Check through the atom links and retrieve any associated deployments
1141        data
1142        @param dr: Instance of DocumentRetrieve object - NB, this requires eXist
1143        config details which are not available to the Atom object
1144        @keyword lookupIndirectReferences: if True, the atom ID is used to search
1145        defined deployments to find those which reference it, otherwise only
1146        deployments data featured in the atom related links are processed
1147        '''
1148        logging.info("Looking up deployments info")
1149        self.deployments = []
1150        self.allActivities = []
1151        self.allObs = []
1152        self.allDpts = []
1153
1154        if lookupIndirectReferences:
1155            logging.info("Looking up references to this atom from other deployments")
1156            doc = dr.get(self.ME.providerID, dr.ATOM_DEPLOYMENTS, self.atomBrowseURL, \
1157                                        targetCollection='/db/atoms/')
1158            # now need to turn this results set into actual atoms
1159            tree = ET.fromstring(doc)
1160            links = []
1161            for atom in tree:
1162                logging.debug("- found reference in deployment")
1163                links.append(ET.tostring(atom))
1164           
1165        else:
1166            links = self.getLinksOfType(self.VTD.DEPLOYMENT_TERM)
1167
1168        for link in links:
1169            if lookupIndirectReferences:
1170                deploymentAtom = link
1171            else:
1172                localID = link.href.split("__ATOM__")[-1]
1173                deploymentAtom = dr.get(self.ME.providerID, 'ATOM', localID, \
1174                                        targetCollection='/db/atoms/')
1175
1176            deployment = Deployment.Deployment(Atom(xmlString=str(deploymentAtom)))
1177            self.deployments.append(deployment)
1178           
1179            self.addUniqueLinks(self.allActivities, deployment.activities)
1180            self.addUniqueLinks(self.allObs, deployment.obs)
1181            self.addUniqueLinks(self.allDpts, deployment.dpts)
1182       
1183        logging.info("Finished looking up deployments info")
1184
1185
1186    def addUniqueLinks(self, dataArray, links):
1187        '''
1188        Add links to specified array - if they are not already included
1189        @param dataArray: a list, potentially arlready containing links
1190        @param links: a Link or array of Links to add to the dataArray
1191        '''
1192        logging.debug("Adding new links")
1193        if not links:
1194            return
1195       
1196        if type(links) is not list:
1197            links = [links]
1198       
1199        for link in links:
1200            if type(link) is not Link:
1201                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
1202                continue
1203            if link not in dataArray:
1204                logging.debug("- adding unique link")
1205                dataArray.append(link)
1206        logging.debug("Finished adding links")
Note: See TracBrowser for help on using the repository browser.