source: exist/trunk/python/ndgUtils/models/Atom.py @ 4535

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4535
Revision 4535, 47.1 KB checked in by cbyrom, 11 years ago (diff)

Generalise existing methods to allow data entities to be looked up with
the same code as associated deployments data + add new templates to
display the DEs + extend the javascript to allow multiple callbacks
to retrieve the info + various tidy ups and fixes.

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
16from ndgUtils.eXistConnector import eXistConnector
17from ndgUtils.ETxmlView import et2text
18from ndgUtils.lib.utilities import getTripleData, escapeSpecialCharacters, \
19    tidyUpParameters, getISO8601Date, normaliseLongitude, formatDateYYYYMMDD
20from ndgUtils.vocabtermdata import VocabTermData as VTD
21from ndgUtils.models import MolesEntity as ME
22import csml.parser as CsmlParser
23from ndgUtils.models import Deployment as Deployment
24
25
26class AtomError(Exception):
27    """
28    Exception handling for Atom class.
29    """
30    def __init__(self, msg):
31        logging.error(msg)
32        Exception.__init__(self, msg)
33
34
35class Person(object):
36    '''
37    Class representing atom author type data - with name, uri and role attributes
38    '''
39    AUTHOR_TYPE = 0
40    CONTRIBUTOR_TYPE = 1
41    RESPONSIBLE_PARTY_TYPE = 2
42    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
43   
44    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
45        self.type = personType
46        self.ns = namespace
47        self.name = ""
48        self.uri = ""
49        self.role = ""
50       
51        # NB, the atom format specifies slightly different data contents
52        self.uriTagName = "email"
53        # NB, responsible party data is always stored in the moles section
54        if self.type == self.RESPONSIBLE_PARTY_TYPE:
55            self.ns = 'moles'
56            self.uriTagName = "uri"
57
58    def __str__(self):
59        if self.name or self.uri or self.role:
60            return self.name + " | " + self.uri + " | " + self.role
61        return ""
62   
63    def fromString(self, personString):
64        (self.name, self.uri, self.role) = getTripleData(personString)
65       
66    def fromETElement(self, personTag):
67        self.name = personTag.findtext('name') or ""
68        self.role = personTag.findtext('role') or ""
69        self.uri = personTag.findtext(self.uriTagName) or ""
70        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
71                      %(self.name, self.role, self.uriTagName, self.uri))
72
73    def toXML(self):
74        prefix = ""
75        if self.ns:
76            prefix = self.ns + ':'
77
78        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
79
80        if self.name:
81            name = ET.SubElement(author, prefix + "name")
82            name.text = self.name
83       
84        if self.uri:
85            uri = ET.SubElement(author, prefix + self.uriTagName)
86            uri.text = self.uri
87       
88        if self.role:
89            role = ET.SubElement(author, prefix + "role")
90            role.text = self.role
91
92        return author
93   
94    def __cmp__(self, person1):
95        '''
96        Override comparison to allow proper object comparison when checking
97        if Person objects are in an array already - i.e. if person in personArray...
98        '''
99        if not person1:
100            return -1
101       
102        if self is person1:
103            return 0
104        elif self.uri == person1.uri and self.name == person1.name and \
105                self.role == person1.role and self.type == person1.type:
106            return 0
107        return 1
108
109
110class Link(object):
111    '''
112    Class representing an atom link - with href, title and rel attributes
113    '''
114
115    def __init__(self):
116        self.href = ""
117        self.title = ""
118        self.rel = ""
119
120    def fromString(self, linkString):
121        (self.href, self.title, self.rel) = getTripleData(linkString)
122       
123    def fromETElement(self, linkTag):
124        self.href = linkTag.attrib.get('href') or ""
125        self.rel = linkTag.attrib.get('rel') or ""
126        self.title = linkTag.attrib.get('title') or ""
127
128    def toXML(self):
129        link = ET.Element("link")
130        link.attrib["href"] = self.href
131        link.attrib["title"] = self.title
132        link.attrib["rel"] = self.rel
133        return link
134
135    def hasValue(self):
136        # NB, just a rel on its own is meaningless - so ignore
137        if self.href or self.title:
138            return True
139        return False
140   
141    def __str__(self):
142        if self.href or self.title or self.rel:
143            return self.href + " | " + self.title + " | " + self.rel
144        return ""
145   
146    def isChildAtom(self):
147        '''
148        Determines whether the link refers to another atom - e.g. a link to
149        a data granule
150        @return True, if so; False otherwise
151        '''
152        if self.rel.endswith(VTD.GRANULE_TERM) or \
153            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
154            self.rel.endswith(VTD.ACTIVITY_TERM) or \
155            self.rel.endswith(VTD.DPT_TERM) or \
156            self.rel.endswith(VTD.OBS_TERM):
157            return True
158       
159        return False
160   
161    def __cmp__(self, link1):
162        '''
163        Override comparison to allow proper object comparison when checking
164        if Link objects are in an array already - i.e. if link in linkArray...
165        '''
166        if not link1:
167            return -1
168       
169        if self is link1:
170            return 0
171        elif self.href == link1.href and self.title == link1.title and \
172                self.rel == link1.rel:
173            return 0
174        return 1
175
176
177class Category(object):
178    '''
179    Class representing an atom category - with term, scheme and label attributes
180    '''
181    def __init__(self):
182        self.term = ""
183        self.scheme = ""
184        self.label = ""
185
186    def fromString(self, linkString, escapeSpecialCharacters=True):
187        '''
188        Create Category from triple string of format, 'label | scheme | term'
189        @param linkString: triple string to create category with
190        @keyword escapeSpecialCharacters: if set to True, special characters in
191        triple string are escaped (default)
192        '''
193        (self.label, self.scheme, self.term) = getTripleData(linkString, \
194            doEscape=escapeSpecialCharacters)
195       
196    def fromETElement(self, linkTag):
197        self.term = linkTag.attrib.get('term') or ""
198        self.label = linkTag.attrib.get('label') or ""
199        self.scheme = linkTag.attrib.get('scheme') or ""
200
201    def toXML(self):
202        link = ET.Element("category")
203        link.attrib["term"] = self.term
204        link.attrib["scheme"] = self.scheme
205        link.attrib["label"] = self.label
206        return link
207   
208    def hasValue(self):
209        if self.scheme or self.label or self.term:
210            return True
211        return False
212
213
214class Atom(object):
215
216    # labels for use with the atom categories
217    ATOM_TYPE = "ATOM_TYPE"
218    ATOM_SUBTYPE = "ATOM_SUBTYPE"
219
220    # labels for use with the templates to set/extract specific inputs
221    ONLINE_REF_LABEL = "online_ref"
222    PARAMETER_LABEL = "parameter"
223    ATOM_REF_LABEL = "atom_ref"
224    DELIMITER = "---"
225    REMOVE_LABEL = "remove"
226   
227    # format to use for t1-t2 date range
228    YEAR_FORMAT = '%Y-%m-%d'
229
230    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
231                 xmlString = None, state = eXistConnector.WORKING_COLLECTION_PATH, **inputs):
232        '''
233        Constructor - initialise the atom variables
234        '''
235        logging.info("Initialising atom")
236        if atomType:
237            logging.info(" - of type '%s'" %atomType)
238        self.atomTypeID = atomType
239
240        # some data have further subtypes specified
241        self.subtypeID = None # this should be the termID
242        self.subtype = None # and this should be the fully formed vocab URL
243       
244        self.ndgObject = ndgObject
245
246        self.atomName = None
247        self.files = []
248        self.author = None
249        self.contributors = []
250        self.atomAuthors = []
251        self.parameters = []
252        self.spatialData = []
253        self.temporalData = []
254        self.relatedLinks = []
255        self.summary = []
256        self.content = []
257        # NB, this deployments data duplicates other atom data - and is only used for a
258        # convenient way to collect the info (by lookupAssociatedData()) for use in templates
259        self.deployments = []
260        # ditto for the following field
261        self.dataEntities = []
262           
263        self.csmlFile = None
264        self.cdmlFile = None
265        # general variable to use for setting the atom content - NB, if a csmlFile is specified
266        # (either directly or via a cdmlFile specification), this will be the content by default
267        # for this purpose
268        self.contentFile = None     
269        self.title = None
270        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
271        self.atomID = None
272   
273        # boundary box info - to replace spatial/temporalData?
274        self.minX = None
275        self.minY = None
276        self.maxX = None
277        self.maxY = None
278        self.t1 = None
279        self.t2 = None
280
281        self.ME = ME.MolesEntity()
282       
283        # date when the atom was first ingested
284        self.publishedDate = None
285
286        # last update date
287        self.updatedDate = None
288
289        # assume atom in working state by default - this is used to define what collection
290        # in eXist the atom is stored in
291        self.state = state
292       
293        # additional, non standard atom data can be included in the molesExtra element
294        if vocabTermData:
295            self.VTD = vocabTermData
296        else:
297            self.VTD = VTD()
298       
299        if xmlString:
300            self.fromString(xmlString)
301
302        # if inputs passed in as dict, add these now
303        if inputs:
304            logging.info("Adding info to atom from input dict")
305            logging.debug(inputs)
306            self.__dict__.update(inputs)
307           
308            # NB, this doesn't trigger the Summary Property, so do this
309            # explicitly, if need be
310            if inputs.has_key('Summary'):
311                self.Summary = inputs.get('Summary')
312            if inputs.has_key('Content'):
313                self.Content = inputs.get('Content')
314           
315            # also pass any moles data up to the moles entity object
316            if inputs.get('providerID'):
317                self.ME.providerID = inputs.get('providerID')
318               
319            if inputs.get('abbreviation'):
320                self.ME.abbreviation = inputs.get('abbreviation')
321
322        if self.atomTypeID:
323            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
324
325        logging.info("Atom initialised")
326
327
328    def addOnlineReferences(self, links):
329        '''
330        Add online reference data associated with the atom
331        - NB, care needs to be taken here since this data is stored in the atom
332        link elements and these are also used for the various atom associations
333        @param links: a Link or array of Links to add to the relatedLinks attribute
334        '''
335        logging.debug("Adding online references")
336        if not links:
337            return
338       
339        if type(links) is not list:
340            links = [links]
341       
342        # firstly clear out any online refs data from the existing related links
343        newLinks = []
344        for link in self.relatedLinks:
345            if link.isChildAtom():
346                newLinks.append(link)
347       
348        newLinks.extend(links)
349        self.relatedLinks = newLinks
350        logging.debug("Online references added")
351
352
353    def addUniqueRelatedLinks(self, links):
354        '''
355        Add links to relatedLinks array - if they are not already included
356        @param links: a Link or array of Links to add to the relatedLinks attribute
357        '''
358        self.addUniqueLinks(self.relatedLinks, links)
359       
360
361    def removeRelatedLinks(self, linksToDelete):
362        '''
363        Remove any links in the input list from the atom's related links list
364        @param linksToDelete: array of Link objects to remove from atom
365        '''
366        logging.debug("Removing related links from atom")
367        if not linksToDelete:
368            return
369       
370        if type(linksToDelete) is not list:
371            linksToDelete = [linksToDelete]
372       
373        updatedLinks = []
374        for link in self.relatedLinks:
375            if type(link) is not Link:
376                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
377                continue
378            if link in linksToDelete:
379                logging.debug("- found link to remove")
380            else:
381                updatedLinks.append(link)
382
383        self.relatedLinks = updatedLinks
384        logging.debug("Links removed")
385       
386
387    def getDefaultCollectionPath(self):
388        '''
389        Determine the correct collection to use for the atom in eXist
390        '''
391        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state
392       
393        if self.atomTypeID == VTD.DE_TERM:
394            collectionPath += eXistConnector.DE_COLLECTION_PATH
395        elif self.atomTypeID == VTD.GRANULE_TERM:
396            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
397        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
398            self.subtypeID == VTD.DEPLOYMENT_TERM:
399            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
400        else:
401            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
402       
403        if not self.ME.providerID:
404            raise AtomError("Error: cannot determine atom collection path because " + \
405                            "the provider ID is not defined")
406           
407        collectionPath += self.ME.providerID + "/"
408        return collectionPath
409
410
411    def __addAtomTypeDataXML(self, root):
412        '''
413        Add the atom type, and subtype data, if available, to atom categories
414        - and lookup and add the appropriate vocab term data
415        '''
416        if self.atomTypeID:
417            logging.info("Adding atom type info to XML output")
418            category = Category()
419            category.label = self.atomTypeID
420            # look up the appropriate vocab term data
421            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
422            category.term = self.ATOM_TYPE
423            root.append(category.toXML())
424
425        if self.subtypeID:
426            logging.info("Adding atom subtype info to XML output")
427            # NB subtypes not all defined, so leave this out for the moment
428            category.label = self.subtypeID
429            # look up the appropriate vocab term data
430            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
431            category.term = self.ATOM_SUBTYPE
432            root.append(category.toXML())
433
434
435    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
436        '''
437        Add data to include in the moles entity element
438        '''
439        logging.debug('Adding moles entity information')
440        self.ME.abbreviation = abbreviation
441        self.ME.providerID = provider_id
442        self.ME.createdDate = getISO8601Date(object_creation_time)
443        logging.debug('Moles entity information added')
444
445
446    def addAuthors(self, authors):
447        '''
448        Add author data appropriately to the atom
449        NB, these will overwrite any existing authors of the same type
450        @param authors: list of Person objects with the author data
451        '''
452        logging.debug('Adding authors data to Atom')
453        isFirstAuthor = {}
454        authorArray = None
455        for author in authors:
456            # NB, we're only allowed one atom author
457            if author.type == Person.AUTHOR_TYPE:
458                self.author = author
459                if isFirstAuthor.has_key(author.type):
460                    raise AtomError("Error: an atom can only have one author specified")
461                isFirstAuthor[author.type] = 1
462                continue
463            elif author.type == Person.CONTRIBUTOR_TYPE:
464                authorArray = self.contributors
465            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
466                authorArray = self.ME.responsibleParties
467               
468            # check if this is the first addition - if so, clear out the
469            # array in advance
470            if not isFirstAuthor.has_key(author.type):
471                logging.debug("Clearing out author array")
472                # NB, need to be careful to clear the array, not create a ref
473                # to a new array
474                del authorArray[:]
475                isFirstAuthor[author.type] = 1
476
477            if str(author) != "" and author not in authorArray:
478                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
479                              %(author.type, author.name, author.uri, author.role))
480                authorArray.append(author)
481
482        logging.debug('Finished adding authors data')
483
484
485    def _isNewParameter(self, param):
486        '''
487        Check if a parameter is already specified in the atom, return False if
488        so, otherwise return True
489        '''
490        for p in self.parameters:
491            if p.term == param.term and \
492                p.scheme == param.scheme and \
493                p.label == param.label:
494                return False
495        return True
496
497
498    def addRelatedLinks(self, linkVals):
499        '''
500        Add related links in string format - converting to Link objects
501        @param linkVals: string of format, 'uri | title | vocabServerURL'
502        '''
503        self.relatedLinks.append(self.objectify(linkVals, 'relatedLinks'))
504
505
506    def addParameters(self, params):
507        '''
508        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
509        @params param: parameter, as string array, to add to atom parameters collection
510        '''
511        # avoid strings being parsed character by character
512        if type(params) is str:
513            params = [params]
514           
515        for param in params:
516            # firstly tidy parameter
517            param = tidyUpParameters(param)
518            category = Category()
519            # NB, data already tidied up here, so set keyword to avoid this happening again
520            category.fromString(param, escapeSpecialCharacters=True)
521
522            # now check for uniqueness
523            if self._isNewParameter(category):
524                logging.debug("Adding new parameter: %s" %param)
525                self.parameters.append(category)
526   
527   
528    def _linksToXML(self, root):
529        '''
530        Add required links to the input element
531        @param root: element to add links to - NB, should be the root element of the atom
532        '''
533        selfLink = ET.SubElement(root, "link")
534        selfLink.attrib["href"] = self.atomBrowseURL
535        selfLink.attrib["rel"] = "self"
536        if self.subtypeID != VTD.DEPLOYMENT_TERM:
537            molesLink = ET.SubElement(root, "link")
538            molesDoc = re.sub('ATOM','NDG-B1', self.atomBrowseURL)
539            molesLink.attrib["href"] = molesDoc
540            molesLink.attrib["rel"] = 'related'
541       
542        for relatedLink in self.relatedLinks:
543            if relatedLink.hasValue():
544                root.append(relatedLink.toXML())
545   
546    def toXML(self):
547        '''
548        Convert the atom into XML representation and return this
549        @return: xml version of atom
550        '''
551        logging.info("Creating formatted XML version of Atom")
552        root = ET.Element("entry")
553        root.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
554        root.attrib["xmlns:moles"] = "http://ndg.nerc.ac.uk/schema/moles2beta"
555        root.attrib["xmlns:georss"] = "http://www.georss.org/georss/10"
556        root.attrib["xmlns:gml"] = "http://www.opengis.net/gml"
557        id = ET.SubElement(root, "id")
558        id.text = self.atomID
559        title = ET.SubElement(root, "title")
560        title.text = self.title
561        self._linksToXML(root)
562
563        # NB, the author tag is mandatory for atoms - so if an explicit
564        # author has not been set, just take the author to be the provider
565        if not self.author:
566            author = Person()
567            author.name = self.ME.providerID
568            #author.uri = self.ME.providerID
569            self.author = author
570
571        root.append(self.author.toXML())
572           
573        for contributor in self.contributors:
574            root.append(contributor.toXML())
575
576        # add the moles entity section, if it is required
577        if self.ME:
578            root.append(self.ME.toXML())
579
580        # add parameters data
581        for param in self.parameters:
582            if param.hasValue():
583                root.append(param.toXML())
584
585        # add the type and subtype data
586        self.__addAtomTypeDataXML(root)
587                   
588        summary = ET.SubElement(root, "summary")
589        summary.text = self.Summary
590                   
591        # add link to content, if required - NB, can only have one content element in atom
592        # - and this is mandatory
593        content = ET.SubElement(root, "content")
594        contentFile = self.contentFile or self.csmlFile or self.cdmlFile
595        if contentFile:
596            content.attrib["type"] = "application/xml"
597            content.attrib["src"] = contentFile
598        else:
599            content.text = self.Content
600            content.attrib["type"] = "xhtml"
601       
602        # if there's a published date already defined, assume we're doing an update now
603        # NB, update element is mandatory
604        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
605        if not self.publishedDate:
606            self.publishedDate = currentDate
607
608        updated = ET.SubElement(root, "updated")
609        if not self.updatedDate:
610            self.updatedDate = currentDate
611        updated.text = self.updatedDate
612
613        published = ET.SubElement(root, "published")
614        published.text = self.publishedDate
615
616        # add temporal range data, if available
617        temporalRange = ET.SubElement(root, "moles:temporalRange")
618        if self.t1:
619            temporalRange.text = self.t1
620            if self.t2:
621                temporalRange.text += "/" + self.t2
622
623        # add spatial range data, if available
624        self._addSpatialData(root)
625
626        tree = ET.ElementTree(root)
627        logging.info("XML version of Atom created")
628        return tree
629
630
631    def __getSummary(self):
632        logging.debug("Getting summary data")
633        summaryString = ""
634        for summary_line in self.summary:
635            summaryString += summary_line + "\n"
636
637        return summaryString
638
639    def __setSummary(self, summary):
640        logging.debug("Adding summary data")
641        self.summary = []
642        for summary_line in summary.split('\n'):
643            self.summary.append(escapeSpecialCharacters(summary_line))
644           
645    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
646
647
648    def __getContent(self):
649        logging.debug("Getting content data")
650        contentString = ""
651        # NB, there must be content specified in an atom
652        if not self.content:
653            return "Metadata document"
654       
655        for content_line in self.content:
656            contentString += content_line + "\n"
657
658        return contentString
659
660    def __setContent(self, content):
661        logging.debug("Adding content data")
662        self.content = []
663        for content_line in content.split('\n'):
664            self.content.append(content_line)
665           
666    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
667
668           
669    def fromString(self, xmlString):
670        '''
671        Initialise Atom object using an xmlString
672        @param xmlString: representation of atom as an XML string
673        '''
674        logging.info("Ingesting data from XML string")
675       
676        # firstly, remove any namespaces used - to avoid problems with elementtree
677        logging.debug("Stripping moles namespace from string to allow easy handling with elementtree")
678        xmlString = xmlString.replace('moles:', '')
679        xmlString = xmlString.replace('georss:', '')
680        xmlString = xmlString.replace('gml:', '')
681        xmlString = xmlString.replace('xmlns="http://www.w3.org/2005/Atom"', '')
682        xmlString = xmlString.replace('default:', '')
683
684        # now create elementtree with the XML string
685        logging.debug("Create elementtree instance with XML string")
686        tree = ET.fromstring(xmlString)
687       
688        title = tree.findtext('title')
689        if title:
690            logging.debug("Adding title data")
691            self.title = title
692
693        summary = tree.findtext('summary')
694        if summary:
695            self.Summary = summary#.decode('unicode_escape')
696
697        authorElement = tree.find('author')
698        logging.debug("Adding author data")
699        author = Person()
700        author.fromETElement(authorElement)
701        self.author = author
702
703        contributorElements = tree.findall('contributor')
704        for contributorElement in contributorElements:
705            logging.debug("Adding contributor data")
706            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
707            contributor.fromETElement(contributorElement)
708            self.contributors.append(contributor)
709
710        molesElement = tree.find('entity')
711        if molesElement:
712            self.ME.fromET(molesElement)
713               
714        self.atomID = tree.findtext('id')
715
716        self._parseCategoryData(tree.findall('category'))
717
718        self._parseLinksData(tree.findall('link'))
719           
720        contentTag = tree.find('content')
721        if contentTag != None:
722            logging.debug("Found content tag - checking for CSML/CDML file data")
723            file = contentTag.attrib.get('src')
724            if file:
725                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
726                if file.upper().find('CSML') > -1:
727                    logging.debug("Adding CSML file data")
728                    self.csmlFile = file
729                elif file.upper().find('CDML') > -1:
730                    logging.debug("Adding CDML file data")
731                    self.cdmlFile = file
732                self.contentFile = file
733            else:
734                logging.debug("No file data - adding contents of element instead")
735                self.Content = contentTag.text
736       
737        range = tree.findtext('temporalRange')
738        if range:
739            logging.debug("Adding temporal range data")
740            timeData = range.split('/')
741            self.t1 = timeData[0]
742            if len(timeData) > 1:
743                self.t2 = timeData[1]
744       
745        # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
746        minBBox = tree.findall('.//lowerCorner')
747        if minBBox:
748            logging.debug("Adding min spatial range data")
749            minBBox = minBBox[0]
750            spatialData = minBBox.text.split()
751            self.minX = spatialData[0]
752            if len(spatialData) > 1:
753                self.minY = spatialData[1]
754       
755        maxBBox = tree.findall('.//upperCorner')
756        if maxBBox:
757            maxBBox = maxBBox[0]
758            logging.debug("Adding max spatial range data")
759            spatialData = maxBBox.text.split()
760            self.maxX = spatialData[0]
761            if len(spatialData) > 1:
762                self.maxY = spatialData[1]
763               
764        publishedDate = tree.findtext('published')
765        if publishedDate:
766            logging.debug("Adding published date")
767            self.publishedDate = publishedDate
768               
769        updatedDate = tree.findtext('updated')
770        if updatedDate:
771            logging.debug("Adding updated date")
772            self.updatedDate = updatedDate
773           
774        logging.info("Completed data ingest")
775   
776   
777    def _parseCategoryData(self, categories):
778        logging.debug("Adding category/parameters data")
779        for category in categories:
780            cat = Category()
781            cat.fromETElement(category)
782           
783            if cat.term == self.ATOM_TYPE:
784                logging.debug("Found atom type data")
785                self.atomTypeID = cat.label
786                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
787                continue
788            elif cat.term == self.ATOM_SUBTYPE:
789                logging.debug("Found atom subtype data")
790                self.subtypeID = cat.label
791                self.subtype = cat.scheme
792                continue
793
794            self.parameters.append(cat)
795   
796
797    def setDatasetID(self, datasetID):
798        '''
799        Set the dataset ID for the atom - and generate an appropriate atom name using this
800        @param datasetID: ID to set for the atom
801        '''
802        self.datasetID = datasetID
803        self._generateAtomName(datasetID) 
804        self.atomID = self.createAtomID(datasetID)
805
806
807    def createAtomID(self, datasetID):
808        '''
809        Create a unique ID, conforming to atom standards, for atom
810        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
811        @param datasetID: ID of atom's dataset
812        @return: unique ID
813        '''
814        logging.info("Creating unique ID for atom")
815        if not self.atomBrowseURL:
816            self._generateAtomName(datasetID)
817        urlBit = self.atomBrowseURL.split('://')[1]
818        urlBit = urlBit.replace('#', '')
819        urlBits = urlBit.split('/')
820        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
821       
822        id = "tag:" + urlBits[0] + "," + dateBit + ":/" + "/".join(urlBits[1:])
823        logging.info("- unique ID created for atom")
824        logging.debug(" - '%s'" %id)
825        return id
826       
827       
828    def _generateAtomName(self, datasetID):
829        '''
830        Generate a consistent name for the atom - with full eXist doc path
831        @param datasetID: ID of atom's dataset
832        '''
833        self.atomName = datasetID + ".atom"
834        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
835        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
836
837
838    def _parseLinksData(self, links):
839        '''
840        Extract links and atom data from array of link elements in the XML representation of the atom
841        @param links: an array of <link> elements
842        '''
843        # firstly, get all data to start with, so we can properly process it afterwards
844        linkData = {}
845        logging.debug("Getting link data")
846        for linkTag in links:
847            link = Link()
848            link.fromETElement(linkTag)
849
850            if not linkData.has_key(link.rel):
851                linkData[link.rel] = []
852           
853            linkData[link.rel].append(link)
854
855        # there should be one self referencing link - which will provide info on the atom itself
856        if not linkData.has_key('self'):
857            errorMessage = "Atom does not have self referencing link - " + \
858                "cannot ascertain datasetID without this - please fix"
859            logging.error(errorMessage)
860            raise ValueError(errorMessage)
861       
862        # this is the link describing the atom itself
863        self.atomBrowseURL = linkData['self'][0].href
864       
865        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
866        self.atomName = self.datasetID + ".atom"
867        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
868       
869        # now remove this value and the associated moles doc link
870        del linkData['self']
871        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
872        if linkData.has_key('related'):
873            relatedLinks = []
874            for link in linkData['related']:
875                if link.href != molesDoc:
876                    relatedLinks.append(link)
877           
878            linkData['related'] = relatedLinks
879               
880        # now add the remaining links to the atom
881        for key in linkData:
882            for link in linkData[key]:
883                logging.debug("Adding link data")
884                self.relatedLinks.append(link)
885       
886
887    def _addSpatialData(self, element):
888        '''
889        Add spatial coverage element to an input element
890        @param element: element to add coverage data to
891        '''
892        logging.info("Adding spatial data to Atom")
893        if not self.minX:
894            logging.info("No spatial data specified")
895            return
896        bbox = ET.SubElement(element, "georss:where")
897        envelope = ET.SubElement(bbox, "gml:Envelope")
898        lc = ET.SubElement(envelope, "gml:lowerCorner")
899        lc.text = str(self.minX) + " " + str(self.minY)
900        uc = ET.SubElement(envelope, "gml:upperCorner")
901        uc.text = str(self.maxX) + " " + str(self.maxY)
902
903       
904    def setAttribute(self, attributeName, attributeValue):
905        '''
906        Set the value of an atom attribute - and do some basic tidying up of the string content
907        - to escape any XML unfriendly characters
908        @param attributeName: name of the attribute whose value to set
909        @param attributeValue: value to set the attribute to 
910        '''
911        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
912        origValue = attributeValue
913       
914        # escape any special characters if a value has been specified
915        # NB, need to cope with both single values and arrays
916        if attributeValue:
917            if type(attributeValue) is list:
918                newVals = []
919                for val in attributeValue:
920                    newVals.append(self.objectify(escapeSpecialCharacters(val), attributeName))
921                attributeValue = newVals
922                   
923            else:
924                attributeValue = self.objectify(escapeSpecialCharacters(attributeValue), attributeName)
925
926        # handle the special case of authors; only one author is allowed per atom
927        # - the others should be treated as contributors
928        if attributeName == "authors":
929            setattr(self, "author", attributeValue[0])
930            if len(attributeValue) > 1:
931                setattr(self, "contributors", attributeValue[1:])
932        elif attributeName == "atomAuthors":
933            if isinstance(attributeValue, list):
934                for val in attributeValue:
935                    self.ME.responsibleParties.append(val)
936            else:
937                self.ME.responsibleParties.append(attributeValue)
938        else:
939            setattr(self, attributeName, attributeValue)
940
941
942    def objectify(self, objectVals, attributeName):
943        '''
944        Some inputs are specified as strings but need to be converted into
945        objects - do this here
946        @param objectVals: a '|' delimited string of values
947        @param attributeName: name of attribute the values belong to
948        '''
949        obj = None
950        if type(objectVals) != str:
951            return objectVals
952       
953        if attributeName == "relatedLinks":
954            obj = Link()
955        elif attributeName == "atomAuthors" or attributeName == "authors":
956            obj = Person()
957
958        if obj:
959            obj.fromString(objectVals)
960            return obj
961       
962        return objectVals
963
964
965    def toPrettyXML(self):
966        '''
967        Returns nicely formatted XML as string
968        '''
969        atomXML = self.toXML()
970
971        # create the string
972        logging.debug("Converting the elementtree object into a string")
973        prettyXML = et2text(atomXML.getroot())
974
975        # add XML version tag
976        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
977        logging.info("Created formatted version of XML object")
978        return prettyXML
979
980
981    def getLinksOfType(self, termID):
982        '''
983        Returns links in the atom related links attribute which match the specified
984        term ID
985        @param termID: the termID to look for in the related links - NB, this is
986        matched to the end of the link.rel value
987        @return links: array of Link objects with matching term type
988        '''
989        logging.debug("Getting atom links of type, '%s'" %termID)
990        matchingLinks = []
991        for link in self.relatedLinks:
992            # firstly, handle special case where we only want the online ref type links
993            # returned
994            if termID == self.ONLINE_REF_LABEL:
995                if not link.isChildAtom():
996                    logging.debug("- found link with matching term type")
997                    matchingLinks.append(link)
998               
999            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
1000                logging.debug("- found link with matching term type")
1001                matchingLinks.append(link)
1002               
1003        logging.debug("Returning matched links")
1004        return matchingLinks
1005       
1006       
1007    def getLogos(self):
1008        '''
1009        Return related links that are logos
1010        @return: array of Links containing the logos for the atom
1011        '''
1012        logos = []
1013        for link in self.relatedLinks:
1014            if link.rel.lower().endswith(VTD.LOGO_TERM.lower()):
1015                logos.append(link)
1016               
1017        return logos
1018   
1019   
1020    def isGranule(self):
1021        if self.atomTypeID == VTD.GRANULE_TERM:
1022            return True
1023        return False
1024   
1025   
1026    def isDE(self):
1027        if self.atomTypeID == VTD.DE_TERM:
1028            return True
1029        return False
1030   
1031    def isDeployment(self):
1032        if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM:
1033            return True
1034        return False
1035   
1036    def isDeployable(self):
1037        if (self.atomTypeID == VTD.ACTIVITY_TERM and self.subtypeID != VTD.DEPLOYMENT_TERM) or \
1038            self.atomTypeID == VTD.DPT_TERM or \
1039            self.atomTypeID == VTD.OBS_TERM:
1040            return True
1041        return False
1042
1043       
1044    def addCSMLData(self, csmlName, csmlContent, aggregateCoverage=False, useCSMLID=False):
1045        '''
1046        Parse CSML data and add extracted info to the atom
1047        @param csmlName: name of the csml file
1048        @param csmlContent: content of the csml file - NB, if this is set to None and the
1049        file, csmlName, is available locally, CsmlParser.Dataset will read in the file
1050        directly
1051        @keyword aggregateCoverage: if set to True, only coverage data that extends the
1052        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
1053        this should only be True if creating a new atom - e.g. from a granulite
1054        atom coverage data will be added
1055        @return csmlDoc: the CsmlParser.Dataset object with the csml data in
1056        '''
1057        logging.info("Creating CSML data model")
1058        self.csmlFile = csmlName
1059        self.contentFile = csmlName
1060        content = csmlContent or csmlName
1061        csmlDoc = CsmlParser.Dataset(file=content)
1062       
1063        logging.info("Extracting info from CSML file")
1064        logging.debug("Got dataset ID: %s" %csmlDoc.id)
1065        if useCSMLID:
1066            logging.debug(" - using this ID for the atom")
1067            self.setDatasetID(VTD.GRANULE_TERM + '_' + csmlDoc.id)
1068       
1069        title = csmlDoc.name.CONTENT
1070        logging.debug("Got dataset name (title): '%s'" %title)
1071        # NB, if a title is specified (and not as the default value), it automatically is used in
1072        # place of anything in the granulite file
1073        if title and title != "NAME OF DATASET GOES HERE":
1074            logging.info("Title, '%s', extracted from CSML file" %title)
1075            if self.title:
1076                logging.info("- NB, this will override the title specified in the granulite file ('%s')" \
1077                             %self.title)
1078            self.title = title
1079               
1080        bbox1 = csmlDoc.getBoundingBox()
1081        bbox2 = csmlDoc.getCSMLBoundingBox()
1082        time = bbox2.getTimeLimits()
1083   
1084        # now check for other parameters to add to granule
1085        # Firstly, extract the bounding envelope
1086        if bbox1:
1087            w, e = normaliseLongitude(bbox1[0],bbox1[2])
1088            n, s = (bbox1[3], bbox1[1])
1089   
1090            if not aggregateCoverage or (not self.maxY or float(n) > float(self.maxY)):
1091                self.maxY = n
1092               
1093            if not aggregateCoverage or (not self.minY or float(s) < float(self.minY)):
1094                self.minY = s
1095           
1096            if not aggregateCoverage or (not self.minX or float(w) < float(self.minX)):
1097                self.minX = w
1098   
1099            if not aggregateCoverage or (not self.maxX or float(e) > float(self.maxX)):
1100                self.maxX = e
1101           
1102            logging.debug("Got bounding box data from file: (%s, %s) , (%s, %s)" \
1103                          %(w, s, e, n))
1104           
1105            logging.debug("Updated atom bounding box data: (%s, %s) , (%s, %s)" \
1106                          %(self.minX, self.minY, self.maxX, self.maxY))
1107        else:
1108            logging.debug("No valid bounding box data found")
1109   
1110        if time:
1111            t1 = formatDateYYYYMMDD(time[0])
1112            if not aggregateCoverage or \
1113                (not self.t1 or datetime.datetime.strptime(t1, YEAR_FORMAT) < \
1114                    datetime.datetime.strptime(self.t1, YEAR_FORMAT)):
1115                self.t1 = t1
1116   
1117            t2 = time[1]
1118            if t2 and t2 != 'None':
1119                t2 = formatDateYYYYMMDD(t2)
1120                if not aggregateCoverage or \
1121                    (not self.t2 or datetime.datetime.strptime(t2, YEAR_FORMAT) > \
1122                        datetime.datetime.strptime(self.t2, YEAR_FORMAT)):
1123                    self.t2 = t2
1124           
1125            logging.debug("Got time range: %s -> %s" %(self.t1, self.t2))
1126        else:
1127            logging.debug("No valid time range data found")
1128   
1129        #create parameter summaries:
1130        #set up list to hold the parameters data
1131        parameters = []
1132        for feature in csmlDoc.featureCollection.featureMembers:
1133            if hasattr(feature.parameter, 'href'):
1134                paramTriple = ""
1135                if hasattr(feature, 'description'):
1136                    paramTriple = feature.description.CONTENT
1137                    paramTriple += " | " + feature.parameter.href
1138                   
1139                    term = ""
1140                    if hasattr(feature, 'name'):
1141                        term = feature.name.CONTENT
1142   
1143                    paramTriple += " | " + term
1144                   
1145                    logging.debug("Got parameter info: %s" %paramTriple)
1146                    parameters.append(paramTriple)
1147       
1148        # update the atom with the extracted parameters
1149        logging.info("Adding CSML parameters to granule atom")
1150        self.addParameters(parameters)
1151        logging.info("Finished adding CSML data")
1152        return csmlDoc
1153
1154
1155    def lookupAssociatedData(self, type, dr, lookupIndirectReferences=False):
1156        '''
1157        Check through the atom links and retrieve any associated data of the
1158        specified type
1159        @param type: type of associated data to lookup - currently VTD.DEPLOYMENT_TERM
1160        or VTD.DE_TERM
1161        @param dr: Instance of DocumentRetrieve object - NB, this requires eXist
1162        config details which are not available to the Atom object
1163        @keyword lookupIndirectReferences: if True, the atom ID is used to search
1164        defined deployments to find those which reference it, otherwise only
1165        deployments data featured in the atom related links are processed
1166        '''
1167        logging.info("Looking up %s info" %type)
1168       
1169        self.allActivities = []
1170        self.allObs = []
1171        self.allDpts = []
1172
1173        if type != VTD.DE_TERM and type != VTD.DEPLOYMENT_TERM:
1174            raise ValueError('Unrecognised associated data type: %s' %type)
1175       
1176        # avoid duplicating lookup effort
1177        if (type == VTD.DEPLOYMENT_TERM and self.deployments) or \
1178            (type == VTD.DE_TERM and self.dataEntities):
1179            logging.info("- this info has already been looked up - returning")
1180            return
1181
1182        # firstly, collect all the references to the info required
1183        if lookupIndirectReferences:
1184            logging.info("Looking up indirect references")
1185           
1186            # if we're looking up DE data for deployments data, need to have the
1187            # deployments info looked up first
1188            if type == VTD.DE_TERM and self.isDeployable() and not self.deployments:
1189                self.lookupAssociatedData(VTD.DEPLOYMENT_TERM, dr, lookupIndirectReferences)
1190           
1191            logging.info("Looking up references to this atom from other %s" %type)
1192           
1193            # NB, if we're looking up deployments info, we only look up references
1194            # to this atom - if we're looking up DEs, we need to look up references
1195            # to the deployments referenced by this atom
1196            urls = [self.atomBrowseURL]
1197           
1198            if type == VTD.DE_TERM and self.isDeployable():
1199                urls = []
1200                for dep in self.deployments:
1201                    urls.append(dep.browseURL)
1202                   
1203            links = []
1204            for url in urls:
1205                doc = dr.get(type, dr.ATOM_TYPE, url, \
1206                                            targetCollection='/db/atoms/')
1207                # now need to turn this results set into actual atoms
1208                tree = ET.fromstring(doc)
1209                for atom in tree:
1210                    logging.debug("- found reference in %s" %type)
1211                    links.append(ET.tostring(atom))
1212                   
1213            logging.info("Finished looking up indirect references")
1214        else:
1215            links = self.getLinksOfType(self.VTD.DEPLOYMENT_TERM)
1216
1217        # now retrieve the references and extract the required data
1218        logging.info("Retrieving info from %s references" %type)
1219        if type == VTD.DEPLOYMENT_TERM:
1220            self.deployments = []
1221            for link in links:
1222                if lookupIndirectReferences:
1223                    deploymentAtom = link
1224                else:
1225                    localID = link.href.split("__ATOM__")[-1]
1226                    deploymentAtom = dr.get(self.ME.providerID, 'ATOM', localID, \
1227                                            targetCollection='/db/atoms/')
1228   
1229                deployment = Deployment.Deployment(Atom(xmlString=str(deploymentAtom)))
1230                self.deployments.append(deployment)
1231               
1232                self.addUniqueLinks(self.allActivities, deployment.activities)
1233                self.addUniqueLinks(self.allObs, deployment.obs)
1234                self.addUniqueLinks(self.allDpts, deployment.dpts)
1235        else:
1236            # for DE data, just store the title + link in a Link object
1237            self.dataEntities = []
1238            for data in links:
1239                atom = Atom(xmlString=str(data))
1240                link = Link()
1241                link.title = atom.title
1242                link.href = atom.atomBrowseURL
1243               
1244                # NB, different deployments may be used by the same DE - so
1245                # avoid duplication
1246                self.addUniqueLinks(self.dataEntities, link)
1247           
1248        logging.info("Finished looking up %s info" %type)
1249
1250
1251    def addUniqueLinks(self, dataArray, links):
1252        '''
1253        Add links to specified array - if they are not already included
1254        @param dataArray: a list, potentially arlready containing links
1255        @param links: a Link or array of Links to add to the dataArray
1256        '''
1257        logging.debug("Adding new links")
1258        if not links:
1259            return
1260       
1261        if type(links) is not list:
1262            links = [links]
1263       
1264        for link in links:
1265            if type(link) is not Link:
1266                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
1267                continue
1268            if link not in dataArray:
1269                logging.debug("- adding unique link")
1270                dataArray.append(link)
1271        logging.debug("Finished adding links")
Note: See TracBrowser for help on using the repository browser.