source: exist/trunk/python/ndgUtils/models/Atom.py @ 4314

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4314
Revision 4314, 36.2 KB checked in by cbyrom, 11 years ago (diff)

Add new utility methods for adding/removing/retrieving online reference data
+ improve compare methods on atom subtype objects + extend test suite to test
new methods.

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
16from ndgUtils.eXistConnector import eXistConnector
17from ndgUtils.ETxmlView import et2text
18from utilities import getTripleData, escapeSpecialCharacters, \
19    tidyUpParameters, getISO8601Date
20from ndgUtils.vocabtermdata import VocabTermData as VTD
21from ndgUtils.models import MolesEntity as ME
22
23
24class AtomError(Exception):
25    """
26    Exception handling for Atom class.
27    """
28    def __init__(self, msg):
29        logging.error(msg)
30        Exception.__init__(self, msg)
31
32
33class Person(object):
34    '''
35    Class representing atom author type data - with name, uri and role attributes
36    '''
37    AUTHOR_TYPE = 0
38    CONTRIBUTOR_TYPE = 1
39    RESPONSIBLE_PARTY_TYPE = 2
40    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
41   
42    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
43        self.type = personType
44        self.ns = namespace
45        self.name = ""
46        self.uri = ""
47        self.role = ""
48       
49        # NB, the atom format specifies slightly different data contents
50        self.uriTagName = "email"
51        # NB, responsible party data is always stored in the moles section
52        if self.type == self.RESPONSIBLE_PARTY_TYPE:
53            self.ns = 'moles'
54            self.uriTagName = "uri"
55
56    def __str__(self):
57        if self.name or self.uri or self.role:
58            return self.name + " | " + self.uri + " | " + self.role
59        return ""
60   
61    def fromString(self, personString):
62        (self.name, self.uri, self.role) = getTripleData(personString)
63       
64    def fromETElement(self, personTag):
65        self.name = personTag.findtext('name') or ""
66        self.role = personTag.findtext('role') or ""
67        self.uri = personTag.findtext(self.uriTagName) or ""
68        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
69                      %(self.name, self.role, self.uriTagName, self.uri))
70
71    def toXML(self):
72        prefix = ""
73        if self.ns:
74            prefix = self.ns + ':'
75
76        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
77
78        if self.name:
79            name = ET.SubElement(author, prefix + "name")
80            name.text = self.name
81       
82        if self.uri:
83            uri = ET.SubElement(author, prefix + self.uriTagName)
84            uri.text = self.uri
85       
86        if self.role:
87            role = ET.SubElement(author, prefix + "role")
88            role.text = self.role
89
90        return author
91   
92    def __cmp__(self, person1):
93        '''
94        Override comparison to allow proper object comparison when checking
95        if Person objects are in an array already - i.e. if person in personArray...
96        '''
97        if not person1:
98            return -1
99       
100        if self is person1:
101            return 0
102        elif self.uri == person1.uri and self.name == person1.name and \
103                self.role == person1.role and self.type == person1.type:
104            return 0
105        return 1
106
107
108class Link(object):
109    '''
110    Class representing an atom link - with href, title and rel attributes
111    '''
112
113    def __init__(self):
114        self.href = ""
115        self.title = ""
116        self.rel = ""
117
118    def fromString(self, linkString):
119        (self.href, self.title, self.rel) = getTripleData(linkString)
120       
121    def fromETElement(self, linkTag):
122        self.href = linkTag.attrib.get('href') or ""
123        self.rel = linkTag.attrib.get('rel') or ""
124        self.title = linkTag.attrib.get('title') or ""
125
126    def toXML(self):
127        link = ET.Element("link")
128        link.attrib["href"] = self.href
129        link.attrib["title"] = self.title
130        link.attrib["rel"] = self.rel
131        return link
132
133    def hasValue(self):
134        # NB, just a rel on its own is meaningless - so ignore
135        if self.href or self.title:
136            return True
137        return False
138   
139    def __str__(self):
140        if self.href or self.title or self.rel:
141            return self.href + " | " + self.title + " | " + self.rel
142        return ""
143   
144    def isChildAtom(self):
145        '''
146        Determines whether the link refers to another atom - e.g. a link to
147        a data granule
148        @return True, if so; False otherwise
149        '''
150        if self.rel.endswith(VTD.GRANULE_TERM) or \
151            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
152            self.rel.endswith(VTD.ACTIVITY_TERM) or \
153            self.rel.endswith(VTD.DPT_TERM) or \
154            self.rel.endswith(VTD.OBS_TERM):
155            return True
156       
157        return False
158   
159    def __cmp__(self, link1):
160        '''
161        Override comparison to allow proper object comparison when checking
162        if Link objects are in an array already - i.e. if link in linkArray...
163        '''
164        if not link1:
165            return -1
166       
167        if self is link1:
168            return 0
169        elif self.href == link1.href and self.title == link1.title and \
170                self.rel == link1.rel:
171            return 0
172        return 1
173
174
175class Category(object):
176    '''
177    Class representing an atom category - with term, scheme and label attributes
178    '''
179    def __init__(self):
180        self.term = ""
181        self.scheme = ""
182        self.label = ""
183
184    def fromString(self, linkString):
185        (self.label, self.scheme, self.term) = getTripleData(linkString)
186       
187    def fromETElement(self, linkTag):
188        self.term = linkTag.attrib.get('term') or ""
189        self.label = linkTag.attrib.get('label') or ""
190        self.scheme = linkTag.attrib.get('scheme') or ""
191
192    def toXML(self):
193        link = ET.Element("category")
194        link.attrib["term"] = self.term
195        link.attrib["scheme"] = self.scheme
196        link.attrib["label"] = self.label
197        return link
198   
199    def hasValue(self):
200        if self.scheme or self.label or self.term:
201            return True
202        return False
203
204
205class Atom(object):
206
207    # labels for use with the atom categories
208    ATOM_TYPE = "ATOM_TYPE"
209    ATOM_SUBTYPE = "ATOM_SUBTYPE"
210
211    # labels for use with the templates to set/extract specific inputs
212    ONLINE_REF_LABEL = "online_ref"
213    PARAMETER_LABEL = "parameter"
214    ATOM_REF_LABEL = "atom_ref"
215    DELIMITER = "---"
216    REMOVE_LABEL = "remove"
217
218
219    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
220                 xmlString = None, state = eXistConnector.WORKING_COLLECTION_PATH, **inputs):
221        '''
222        Constructor - initialise the atom variables
223        '''
224        logging.info("Initialising atom")
225        if atomType:
226            logging.info(" - of type '%s'" %atomType)
227        self.atomTypeID = atomType
228
229        # some data have further subtypes specified
230        self.subtypeID = None # this should be the termID
231        self.subtype = None # and this should be the fully formed vocab URL
232       
233        self.ndgObject = ndgObject
234
235        self.atomName = None
236        self.files = []
237        self.author = None
238        self.contributors = []
239        self.atomAuthors = []
240        self.parameters = []
241        self.spatialData = []
242        self.temporalData = []
243        self.relatedLinks = []
244        self.summary = []
245        self.content = []
246        self.csmlFile = None
247        self.cdmlFile = None
248        # general variable to use for setting the atom content - NB, if a csmlFile is specified
249        # (either directly or via a cdmlFile specification), this will be the content by default
250        # for this purpose
251        self.contentFile = None     
252        self.logos = []
253        self.title = None
254        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
255        self.atomID = None
256   
257        # boundary box info - to replace spatial/temporalData?
258        self.minX = None
259        self.minY = None
260        self.maxX = None
261        self.maxY = None
262        self.t1 = None
263        self.t2 = None
264
265        self.ME = ME.MolesEntity()
266       
267        # date when the atom was first ingested
268        self.publishedDate = None
269
270        # last update date
271        self.updatedDate = None
272
273        # assume atom in working state by default - this is used to define what collection
274        # in eXist the atom is stored in
275        self.state = state
276       
277        # additional, non standard atom data can be included in the molesExtra element
278        if vocabTermData:
279            self.VTD = vocabTermData
280        else:
281            self.VTD = VTD()
282       
283        if xmlString:
284            self.fromString(xmlString)
285
286        # if inputs passed in as dict, add these now
287        if inputs:
288            logging.info("Adding info to atom from input dict")
289            logging.debug(inputs)
290            self.__dict__.update(inputs)
291           
292            # NB, this doesn't trigger the Summary Property, so do this
293            # explicitly, if need be
294            if inputs.has_key('Summary'):
295                self.Summary = inputs.get('Summary')
296            if inputs.has_key('Content'):
297                self.Content = inputs.get('Content')
298           
299            # also pass any moles data up to the moles entity object
300            if inputs.get('providerID'):
301                self.ME.providerID = inputs.get('providerID')
302               
303            if inputs.get('abbreviation'):
304                self.ME.abbreviation = inputs.get('abbreviation')
305
306        if self.atomTypeID:
307            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
308
309        logging.info("Atom initialised")
310
311
312    def addOnlineReferences(self, links):
313        '''
314        Add online reference data associated with the atom
315        - NB, care needs to be taken here since this data is stored in the atom
316        link elements and these are also used for the various atom associations
317        @param links: a Link or array of Links to add to the relatedLinks attribute
318        '''
319        logging.debug("Adding online references")
320        if not links:
321            return
322       
323        if type(links) is not list:
324            links = [links]
325       
326        # firstly clear out any online refs data from the existing related links
327        newLinks = []
328        for link in self.relatedLinks:
329            if link.isChildAtom():
330                newLinks.append(link)
331       
332        newLinks.extend(links)
333        self.relatedLinks = newLinks
334        logging.debug("Online references added")
335
336
337
338    def addUniqueRelatedLinks(self, links):
339        '''
340        Add links to relatedLinks array - if they are not already included
341        @param links: a Link or array of Links to add to the relatedLinks attribute
342        '''
343        logging.debug("Adding new related links")
344        if not links:
345            return
346       
347        if type(links) is not list:
348            links = [links]
349       
350        for link in links:
351            if type(link) is not Link:
352                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
353                continue
354            if link not in self.relatedLinks:
355                logging.debug("- adding unique link")
356                self.relatedLinks.append(link)
357        logging.debug("Finished adding links")
358       
359
360    def removeRelatedLinks(self, linksToDelete):
361        '''
362        Remove any links in the input list from the atom's related links list
363        @param linksToDelete: array of Link objects to remove from atom
364        '''
365        logging.debug("Removing related links from atom")
366        if not linksToDelete:
367            return
368       
369        if type(linksToDelete) is not list:
370            linksToDelete = [linksToDelete]
371       
372        updatedLinks = []
373        for link in self.relatedLinks:
374            if type(link) is not Link:
375                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
376                continue
377            if link in linksToDelete:
378                logging.debug("- found link to remove")
379            else:
380                updatedLinks.append(link)
381
382        self.relatedLinks = updatedLinks
383        logging.debug("Links removed")
384       
385
386    def getDefaultCollectionPath(self):
387        '''
388        Determine the correct collection to use for the atom in eXist
389        '''
390        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state
391       
392        if self.atomTypeID == VTD.DE_TERM:
393            collectionPath += eXistConnector.DE_COLLECTION_PATH
394        elif self.atomTypeID == VTD.GRANULE_TERM:
395            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
396        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
397            self.subtypeID == VTD.DEPLOYMENT_TERM:
398            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
399        else:
400            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
401       
402        if not self.ME.providerID:
403            raise AtomError("Error: cannot determine atom collection path because " + \
404                            "the provider ID is not defined")
405           
406        collectionPath += self.ME.providerID + "/"
407        return collectionPath
408           
409
410    def __addAtomTypeDataXML(self, root):
411        '''
412        Add the atom type, and subtype data, if available, to atom categories
413        - and lookup and add the appropriate vocab term data
414        '''
415        if self.atomTypeID:
416            logging.info("Adding atom type info to XML output")
417            category = Category()
418            category.label = self.atomTypeID
419            # look up the appropriate vocab term data
420            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
421            category.term = self.ATOM_TYPE
422            root.append(category.toXML())
423
424        if self.subtypeID:
425            logging.info("Adding atom subtype info to XML output")
426            # NB subtypes not all defined, so leave this out for the moment
427            category.label = self.subtypeID
428            # look up the appropriate vocab term data
429            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
430            category.term = self.ATOM_SUBTYPE
431            root.append(category.toXML())
432
433
434    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
435        '''
436        Add data to include in the moles entity element
437        '''
438        logging.debug('Adding moles entity information')
439        self.ME.abbreviation = abbreviation
440        self.ME.providerID = provider_id
441        self.ME.createdDate = getISO8601Date(object_creation_time)
442        logging.debug('Moles entity information added')
443
444
445    def addAuthors(self, authors):
446        '''
447        Add author data appropriately to the atom
448        NB, these will overwrite any existing authors of the same type
449        @param authors: list of Person objects with the author data
450        '''
451        logging.debug('Adding authors data to Atom')
452        isFirstAuthor = {}
453        authorArray = None
454        for author in authors:
455            # NB, we're only allowed one atom author
456            if author.type == Person.AUTHOR_TYPE:
457                self.author = author
458                if isFirstAuthor.has_key(author.type):
459                    raise AtomError("Error: an atom can only have one author specified")
460                isFirstAuthor[author.type] = 1
461                continue
462            elif author.type == Person.CONTRIBUTOR_TYPE:
463                authorArray = self.contributors
464            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
465                authorArray = self.ME.responsibleParties
466               
467            # check if this is the first addition - if so, clear out the
468            # array in advance
469            if not isFirstAuthor.has_key(author.type):
470                logging.debug("Clearing out author array")
471                # NB, need to be careful to clear the array, not create a ref
472                # to a new array
473                del authorArray[:]
474                isFirstAuthor[author.type] = 1
475
476            if str(author) != "" and author not in authorArray:
477                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
478                              %(author.type, author.name, author.uri, author.role))
479                authorArray.append(author)
480
481        logging.debug('Finished adding authors data')
482
483
484    def _isNewParameter(self, param):
485        '''
486        Check if a parameter is already specified in the atom, return False if
487        so, otherwise return True
488        '''
489        for p in self.parameters:
490            if p.term == param.term and \
491                p.scheme == param.scheme and \
492                p.label == param.label:
493                return False
494        return True
495
496
497    def addRelatedLinks(self, linkVals):
498        '''
499        Add related links in string format - converting to Link objects
500        @param linkVals: string of format, 'uri | title | vocabServerURL'
501        '''
502        self.relatedLinks.append(self.objectify(linkVals, 'relatedLinks'))
503
504
505    def addLogos(self, logoVals):
506        '''
507        Add related logos in string format - converting to Link objects
508        @param linkVals: string of format, 'uri | title | vocabServerURL'
509        '''
510        self.relatedLinks.append(self.objectify(logoVals, 'logo'))
511
512
513    def addParameters(self, params):
514        '''
515        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
516        @params param: parameter, as string array, to add to atom parameters collection
517        '''
518        # avoid strings being parsed character by character
519        if type(params) is str:
520            params = [params]
521           
522        for param in params:
523            # firstly tidy parameter
524            param = tidyUpParameters(param)
525            category = Category()
526            category.fromString(param)
527
528            # now check for uniqueness
529            if self._isNewParameter(category):
530                logging.debug("Adding new parameter: %s" %param)
531                self.parameters.append(category)
532   
533   
534    def _linksToXML(self, root):
535        '''
536        Add required links to the input element
537        @param root: element to add links to - NB, should be the root element of the atom
538        '''
539        selfLink = ET.SubElement(root, "link")
540        selfLink.attrib["href"] = self.atomBrowseURL
541        selfLink.attrib["rel"] = "self"
542        if self.subtypeID != VTD.DEPLOYMENT_TERM:
543            molesLink = ET.SubElement(root, "link")
544            molesDoc = re.sub('ATOM','NDG-B1', self.atomBrowseURL)
545            molesLink.attrib["href"] = molesDoc
546            molesLink.attrib["rel"] = "related"
547       
548        for relatedLink in self.relatedLinks:
549            if relatedLink.hasValue():
550                root.append(relatedLink.toXML())
551       
552        for logo in self.logos:
553            if logo.hasValue():
554                root.append(logo.toXML())
555   
556    def toXML(self):
557        '''
558        Convert the atom into XML representation and return this
559        @return: xml version of atom
560        '''
561        logging.info("Creating formatted XML version of Atom")
562        root = ET.Element("entry")
563        root.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
564        root.attrib["xmlns:moles"] = "http://ndg.nerc.ac.uk/schema/moles2alpha"
565        root.attrib["xmlns:georss"] = "http://www.georss.org/georss"
566        root.attrib["xmlns:gml"] = "http://www.opengis.net/gml"
567        id = ET.SubElement(root, "id")
568        id.text = self.atomID
569        title = ET.SubElement(root, "title")
570        title.text = self.title
571        self._linksToXML(root)
572
573        # NB, the author tag is mandatory for atoms - so if an explicit
574        # author has not been set, just take the author to be the provider
575        if not self.author:
576            author = Person()
577            author.name = self.ME.providerID
578            author.uri = self.ME.providerID
579            self.author = author
580
581        root.append(self.author.toXML())
582           
583        for contributor in self.contributors:
584            root.append(contributor.toXML())
585
586        # add the moles entity section, if it is required
587        if self.ME:
588            root.append(self.ME.toXML())
589
590        # add parameters data
591        for param in self.parameters:
592            if param.hasValue():
593                root.append(param.toXML())
594
595        # add the type and subtype data
596        self.__addAtomTypeDataXML(root)
597                   
598        summary = ET.SubElement(root, "summary")
599        summary.text = escapeSpecialCharacters(self.Summary)
600                   
601        # add link to content, if required - NB, can only have one content element in atom
602        # - and this is mandatory
603        content = ET.SubElement(root, "content")
604        if self.contentFile:
605            content.attrib["type"] = "application/xml"
606            content.attrib["src"] = self.contentFile
607        else:
608            content.text = escapeSpecialCharacters(self.Content)
609       
610        # if there's a published date already defined, assume we're doing an update now
611        # NB, update element is mandatory
612        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
613        if not self.publishedDate:
614            self.publishedDate = currentDate
615
616        updated = ET.SubElement(root, "updated")
617        if not self.updatedDate:
618            self.updatedDate = currentDate
619        updated.text = self.updatedDate
620
621        published = ET.SubElement(root, "published")
622        published.text = self.publishedDate
623
624        # add temporal range data, if available
625        temporalRange = ET.SubElement(root, "moles:temporalRange")
626        if self.t1:
627            temporalRange.text = self.t1
628            if self.t2:
629                temporalRange.text += "/" + self.t2
630
631        # add spatial range data, if available
632        self._addSpatialData(root)
633
634        tree = ET.ElementTree(root)
635        logging.info("XML version of Atom created")
636        return tree
637
638
639    def __getSummary(self):
640        logging.debug("Getting summary data")
641        summaryString = ""
642        for summary_line in self.summary:
643            summaryString += summary_line + "\n"
644
645        return summaryString
646
647    def __setSummary(self, summary):
648        logging.debug("Adding summary data")
649        self.summary = []
650        for summary_line in summary.split('\n'):
651            self.summary.append(summary_line)
652           
653    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
654
655
656    def __getContent(self):
657        logging.debug("Getting content data")
658        contentString = ""
659        # NB, there must be content specified in an atom
660        if not self.content:
661            return "Metadata document"
662       
663        for content_line in self.content:
664            contentString += content_line + "\n"
665
666        return contentString
667
668    def __setContent(self, content):
669        logging.debug("Adding content data")
670        self.content = []
671        for content_line in content.split('\n'):
672            self.content.append(content_line)
673           
674    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
675
676           
677    def fromString(self, xmlString):
678        '''
679        Initialise Atom object using an xmlString
680        @param xmlString: representation of atom as an XML string
681        '''
682        logging.info("Ingesting data from XML string")
683       
684        # firstly, remove any namespaces used - to avoid problems with elementtree
685        logging.debug("Stripping moles namespace from string to allow easy handling with elementtree")
686        xmlString = xmlString.replace('moles:', '')
687        xmlString = xmlString.replace('georss:', '')
688        xmlString = xmlString.replace('gml:', '')
689        xmlString = xmlString.replace('xmlns="http://www.w3.org/2005/Atom"', '')
690
691        # now create elementtree with the XML string
692        logging.debug("Create elementtree instance with XML string")
693        tree = ET.fromstring(xmlString)
694       
695        title = tree.findtext('title')
696        if title:
697            logging.debug("Adding title data")
698            self.title = title
699
700        summary = tree.findtext('summary')
701        if summary:
702            self.Summary = summary
703
704        authorElement = tree.find('author')
705        logging.debug("Adding author data")
706        author = Person()
707        author.fromETElement(authorElement)
708        self.author = author
709
710        contributorElements = tree.findall('contributor')
711        for contributorElement in contributorElements:
712            logging.debug("Adding contributor data")
713            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
714            contributor.fromETElement(contributorElement)
715            self.contributors.append(contributor)
716
717        molesElement = tree.find('entity')
718        if molesElement:
719            self.ME.fromET(molesElement)
720               
721        self.atomID = tree.findtext('id')
722
723        self._parseCategoryData(tree.findall('category'))
724
725        self._parseLinksData(tree.findall('link'))
726           
727        contentTag = tree.find('content')
728        if contentTag != None:
729            logging.debug("Found content tag - checking for CSML/CDML file data")
730            file = contentTag.attrib.get('src')
731            if file:
732                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
733                if file.upper().find('CSML') > -1:
734                    logging.debug("Adding CSML file data")
735                    self.csmlFile = file
736                elif file.upper().find('CDML') > -1:
737                    logging.debug("Adding CDML file data")
738                    self.cdmlFile = file
739                self.contentFile = file
740            else:
741                logging.debug("No file data - adding contents of element instead")
742                self.Content = contentTag.text
743       
744        range = tree.findtext('temporalRange')
745        if range:
746            logging.debug("Adding temporal range data")
747            timeData = range.split('/')
748            self.t1 = timeData[0]
749            if len(timeData) > 1:
750                self.t2 = timeData[1]
751       
752        # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
753        minBBox = tree.findall('.//lowerCorner')
754        if minBBox:
755            logging.debug("Adding min spatial range data")
756            minBBox = minBBox[0]
757            spatialData = minBBox.text.split()
758            self.minX = spatialData[0]
759            if len(spatialData) > 1:
760                self.minY = spatialData[1]
761       
762        maxBBox = tree.findall('.//upperCorner')
763        if maxBBox:
764            maxBBox = maxBBox[0]
765            logging.debug("Adding max spatial range data")
766            spatialData = maxBBox.text.split()
767            self.maxX = spatialData[0]
768            if len(spatialData) > 1:
769                self.maxY = spatialData[1]
770               
771        publishedDate = tree.findtext('published')
772        if publishedDate:
773            logging.debug("Adding published date")
774            self.publishedDate = publishedDate
775               
776        updatedDate = tree.findtext('updated')
777        if updatedDate:
778            logging.debug("Adding updated date")
779            self.updatedDate = updatedDate
780           
781        logging.info("Completed data ingest")
782   
783   
784    def _parseCategoryData(self, categories):
785        logging.debug("Adding category/parameters data")
786        for category in categories:
787            cat = Category()
788            cat.fromETElement(category)
789           
790            if cat.term == self.ATOM_TYPE:
791                logging.debug("Found atom type data")
792                self.atomTypeID = cat.label
793                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
794                continue
795            elif cat.term == self.ATOM_SUBTYPE:
796                logging.debug("Found atom subtype data")
797                self.subtypeID = cat.label
798                self.subtype = cat.scheme
799                continue
800
801            self.parameters.append(cat)
802   
803
804    def setDatasetID(self, datasetID):
805        '''
806        Set the dataset ID for the atom - and generate an appropriate atom name using this
807        @param datasetID: ID to set for the atom
808        '''
809        self.datasetID = datasetID
810        self._generateAtomName(datasetID) 
811        self.atomID = self.createAtomID(datasetID)
812
813
814    def createAtomID(self, datasetID):
815        '''
816        Create a unique ID, conforming to atom standards, for atom
817        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
818        @param datasetID: ID of atom's dataset
819        @return: unique ID
820        '''
821        logging.info("Creating unique ID for atom")
822        if not self.atomBrowseURL:
823            self._generateAtomName(datasetID)
824        urlBit = self.atomBrowseURL.split('://')[1]
825        urlBit = urlBit.replace('#', '')
826        urlBits = urlBit.split('/')
827        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
828       
829        id = "tag:" + urlBits[0] + "," + dateBit + ":/" + "/".join(urlBits[1:])
830        logging.info("- unique ID created for atom")
831        logging.debug(" - '%s'" %id)
832        return id
833       
834       
835    def _generateAtomName(self, datasetID):
836        '''
837        Generate a consistent name for the atom - with full eXist doc path
838        @param datasetID: ID of atom's dataset
839        '''
840        self.atomName = datasetID + ".atom"
841        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
842        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
843
844
845    def _parseLinksData(self, links):
846        '''
847        Extract links and atom data from array of link elements in the XML representation of the atom
848        @param links: an array of <link> elements
849        '''
850        # firstly, get all data to start with, so we can properly process it afterwards
851        linkData = {}
852        logging.debug("Getting link data")
853        for linkTag in links:
854            link = Link()
855            link.fromETElement(linkTag)
856
857            if not linkData.has_key(link.rel):
858                linkData[link.rel] = []
859               
860            if link.title == VTD.TERM_DATA[VTD.LOGO_TERM].title:
861                self.logos.append(link)
862            else:
863                linkData[link.rel].append(link)
864
865        # there should be one self referencing link - which will provide info on the atom itself
866        if not linkData.has_key('self'):
867            errorMessage = "Atom does not have self referencing link - " + \
868                "cannot ascertain datasetID without this - please fix"
869            logging.error(errorMessage)
870            raise ValueError(errorMessage)
871       
872        # this is the link describing the atom itself
873        self.atomBrowseURL = linkData['self'][0].href
874       
875        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
876        self.atomName = self.datasetID + ".atom"
877        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
878       
879        # now remove this value and the associated moles doc link
880        del linkData['self']
881        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
882        if linkData.has_key('related'):
883            relatedLinks = []
884            for link in linkData['related']:
885                if link.href != molesDoc:
886                    relatedLinks.append(link)
887           
888            linkData['related'] = relatedLinks
889               
890        # now add the remaining links to the atom
891        for key in linkData:
892            for link in linkData[key]:
893                logging.debug("Adding link data")
894                self.relatedLinks.append(link)
895       
896
897    def _addSpatialData(self, element):
898        '''
899        Add spatial coverage element to an input element
900        @param element: element to add coverage data to
901        '''
902        logging.info("Adding spatial data to Atom")
903        bbox = ET.SubElement(element, "georss:where")
904        if not self.minX:
905            logging.info("No spatial data specified")
906            return
907       
908        envelope = ET.SubElement(bbox, "gml:Envelope")
909        lc = ET.SubElement(envelope, "gml:lowerCorner")
910        lc.text = self.minX + " " + self.minY
911        uc = ET.SubElement(envelope, "gml:upperCorner")
912        uc.text = self.maxX + " " + self.maxY
913
914       
915    def setAttribute(self, attributeName, attributeValue):
916        '''
917        Set the value of an atom attribute - and do some basic tidying up of the string content
918        - to escape any XML unfriendly characters
919        @param attributeName: name of the attribute whose value to set
920        @param attributeValue: value to set the attribute to 
921        '''
922        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
923        origValue = attributeValue
924       
925        # escape any special characters if a value has been specified
926        # NB, need to cope with both single values and arrays
927        if attributeValue:
928            if type(attributeValue) is list:
929                newVals = []
930                for val in attributeValue:
931                    newVals.append(objectify(escapeSpecialCharacters(val)), attributeName)
932                attributeValue = newVals
933                   
934            else:
935                attributeValue = objectify(escapeSpecialCharacters(attributeValue), attributeName)
936
937        # handle the special case of authors; only one author is allowed per atom
938        # - the others should be treated as contributors
939        if attributeName == "authors":
940            setattr(self, "author", attributeValue[0])
941            if len(attributeValue) > 1:
942                setattr(self, "contributors", attributeValue[1:])
943        elif attributeName == "atomAuthors":
944            self.ME.responsibleParties.append(attributeValue)
945        else:
946            setattr(self, attributeName, attributeValue)
947
948
949    def objectify(self, objectVals, attributeName):
950        '''
951        Some inputs are specified as strings but need to be converted into
952        objects - do this here
953        @param objectVals: a '|' delimited string of values
954        @param attributeName: name of attribute the values belong to
955        '''
956        obj = None
957        if type(objectVals) != str:
958            return objectVals
959       
960        if attributeName == "relatedLinks" or attributeName == "logo":
961            obj = Link()
962        elif attributeName == "atomAuthors" or attributeName == "authors":
963            obj = Person()
964
965        if obj:
966            obj.fromString(objectVals)
967            return obj
968       
969        return objectVals
970
971
972    def toPrettyXML(self):
973        '''
974        Returns nicely formatted XML as string
975        '''
976        atomXML = self.toXML()
977
978        # create the string
979        logging.debug("Converting the elementtree object into a string")
980        prettyXML = et2text(atomXML.getroot())
981
982        # add XML version tag
983        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
984        logging.info("Created formatted version of XML object")
985        return prettyXML
986
987
988    def getLinksOfType(self, termID):
989        '''
990        Returns links in the atom related links attribute which match the specified
991        term ID
992        @param termID: the termID to look for in the related links - NB, this is
993        matched to the end of the link.rel value
994        @return links: array of Link objects with matching term type
995        '''
996        logging.debug("Getting atom links of type, '%s'" %termID)
997        matchingLinks = []
998        for link in self.relatedLinks:
999            # firstly, handle special case where we only want the online ref type links
1000            # returned
1001            if termID == self.ONLINE_REF_LABEL:
1002                if not link.isChildAtom():
1003                    logging.debug("- found link with matching term type")
1004                    matchingLinks.append(link)
1005               
1006            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
1007                logging.debug("- found link with matching term type")
1008                matchingLinks.append(link)
1009               
1010        logging.debug("Returning matched links")
1011        return matchingLinks
1012   
1013   
1014    def validate(self):
1015        '''
1016        Check the various values of the various atom attributes; if an error with any of
1017        these is found, raise a ValueError
1018        @raise ValueError: if any atom attributes have a problem
1019        '''
1020        logging.info("Validating the atom data model")
1021       
1022        logging.info("Atom model validated successfully")
Note: See TracBrowser for help on using the repository browser.