source: exist/trunk/python/ndgUtils/models/Atom.py @ 4347

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4347
Revision 4347, 39.4 KB checked in by cbyrom, 11 years ago (diff)

Add validate() method to atom model to allow basic checks on atom data
integrity + add custom exception to allow packing of multiple error messages
into one exception + remove atom logos field - all logos data now stored
solely in the related links field to simplify things.

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
16from ndgUtils.eXistConnector import eXistConnector
17from ndgUtils.ETxmlView import et2text
18from utilities import getTripleData, escapeSpecialCharacters, \
19    tidyUpParameters, getISO8601Date
20from ndgUtils.vocabtermdata import VocabTermData as VTD
21from ndgUtils.models import MolesEntity as ME
22
23
24class AtomError(Exception):
25    """
26    Exception handling for Atom class.
27    """
28    def __init__(self, msg):
29        logging.error(msg)
30        Exception.__init__(self, msg)
31
32
33class ValidationError(Exception):
34    """
35    Exception handling for validation.
36    """
37    def __init__(self, errorDict):
38        msg = "Data validation error"
39        logging.error(msg)
40        Exception.__init__(self, msg)
41        for val in errorDict.itervalues():
42            logging.error(val)
43        self._errorDict = errorDict
44           
45    def unpack_errors(self):
46        return self._errorDict
47
48
49class Person(object):
50    '''
51    Class representing atom author type data - with name, uri and role attributes
52    '''
53    AUTHOR_TYPE = 0
54    CONTRIBUTOR_TYPE = 1
55    RESPONSIBLE_PARTY_TYPE = 2
56    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
57   
58    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
59        self.type = personType
60        self.ns = namespace
61        self.name = ""
62        self.uri = ""
63        self.role = ""
64       
65        # NB, the atom format specifies slightly different data contents
66        self.uriTagName = "email"
67        # NB, responsible party data is always stored in the moles section
68        if self.type == self.RESPONSIBLE_PARTY_TYPE:
69            self.ns = 'moles'
70            self.uriTagName = "uri"
71
72    def __str__(self):
73        if self.name or self.uri or self.role:
74            return self.name + " | " + self.uri + " | " + self.role
75        return ""
76   
77    def fromString(self, personString):
78        (self.name, self.uri, self.role) = getTripleData(personString)
79       
80    def fromETElement(self, personTag):
81        self.name = personTag.findtext('name') or ""
82        self.role = personTag.findtext('role') or ""
83        self.uri = personTag.findtext(self.uriTagName) or ""
84        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
85                      %(self.name, self.role, self.uriTagName, self.uri))
86
87    def toXML(self):
88        prefix = ""
89        if self.ns:
90            prefix = self.ns + ':'
91
92        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
93
94        if self.name:
95            name = ET.SubElement(author, prefix + "name")
96            name.text = self.name
97       
98        if self.uri:
99            uri = ET.SubElement(author, prefix + self.uriTagName)
100            uri.text = self.uri
101       
102        if self.role:
103            role = ET.SubElement(author, prefix + "role")
104            role.text = self.role
105
106        return author
107   
108    def __cmp__(self, person1):
109        '''
110        Override comparison to allow proper object comparison when checking
111        if Person objects are in an array already - i.e. if person in personArray...
112        '''
113        if not person1:
114            return -1
115       
116        if self is person1:
117            return 0
118        elif self.uri == person1.uri and self.name == person1.name and \
119                self.role == person1.role and self.type == person1.type:
120            return 0
121        return 1
122
123
124class Link(object):
125    '''
126    Class representing an atom link - with href, title and rel attributes
127    '''
128
129    def __init__(self):
130        self.href = ""
131        self.title = ""
132        self.rel = ""
133
134    def fromString(self, linkString):
135        (self.href, self.title, self.rel) = getTripleData(linkString)
136       
137    def fromETElement(self, linkTag):
138        self.href = linkTag.attrib.get('href') or ""
139        self.rel = linkTag.attrib.get('rel') or ""
140        self.title = linkTag.attrib.get('title') or ""
141
142    def toXML(self):
143        link = ET.Element("link")
144        link.attrib["href"] = self.href
145        link.attrib["title"] = self.title
146        link.attrib["rel"] = self.rel
147        return link
148
149    def hasValue(self):
150        # NB, just a rel on its own is meaningless - so ignore
151        if self.href or self.title:
152            return True
153        return False
154   
155    def __str__(self):
156        if self.href or self.title or self.rel:
157            return self.href + " | " + self.title + " | " + self.rel
158        return ""
159   
160    def isChildAtom(self):
161        '''
162        Determines whether the link refers to another atom - e.g. a link to
163        a data granule
164        @return True, if so; False otherwise
165        '''
166        if self.rel.endswith(VTD.GRANULE_TERM) or \
167            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
168            self.rel.endswith(VTD.ACTIVITY_TERM) or \
169            self.rel.endswith(VTD.DPT_TERM) or \
170            self.rel.endswith(VTD.OBS_TERM):
171            return True
172       
173        return False
174   
175    def __cmp__(self, link1):
176        '''
177        Override comparison to allow proper object comparison when checking
178        if Link objects are in an array already - i.e. if link in linkArray...
179        '''
180        if not link1:
181            return -1
182       
183        if self is link1:
184            return 0
185        elif self.href == link1.href and self.title == link1.title and \
186                self.rel == link1.rel:
187            return 0
188        return 1
189
190
191class Category(object):
192    '''
193    Class representing an atom category - with term, scheme and label attributes
194    '''
195    def __init__(self):
196        self.term = ""
197        self.scheme = ""
198        self.label = ""
199
200    def fromString(self, linkString):
201        (self.label, self.scheme, self.term) = getTripleData(linkString)
202       
203    def fromETElement(self, linkTag):
204        self.term = linkTag.attrib.get('term') or ""
205        self.label = linkTag.attrib.get('label') or ""
206        self.scheme = linkTag.attrib.get('scheme') or ""
207
208    def toXML(self):
209        link = ET.Element("category")
210        link.attrib["term"] = self.term
211        link.attrib["scheme"] = self.scheme
212        link.attrib["label"] = self.label
213        return link
214   
215    def hasValue(self):
216        if self.scheme or self.label or self.term:
217            return True
218        return False
219
220
221class Atom(object):
222
223    # labels for use with the atom categories
224    ATOM_TYPE = "ATOM_TYPE"
225    ATOM_SUBTYPE = "ATOM_SUBTYPE"
226
227    # labels for use with the templates to set/extract specific inputs
228    ONLINE_REF_LABEL = "online_ref"
229    PARAMETER_LABEL = "parameter"
230    ATOM_REF_LABEL = "atom_ref"
231    DELIMITER = "---"
232    REMOVE_LABEL = "remove"
233   
234    # format to use for t1-t2 date range
235    YEAR_FORMAT = '%Y-%m-%d'
236
237    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
238                 xmlString = None, state = eXistConnector.WORKING_COLLECTION_PATH, **inputs):
239        '''
240        Constructor - initialise the atom variables
241        '''
242        logging.info("Initialising atom")
243        if atomType:
244            logging.info(" - of type '%s'" %atomType)
245        self.atomTypeID = atomType
246
247        # some data have further subtypes specified
248        self.subtypeID = None # this should be the termID
249        self.subtype = None # and this should be the fully formed vocab URL
250       
251        self.ndgObject = ndgObject
252
253        self.atomName = None
254        self.files = []
255        self.author = None
256        self.contributors = []
257        self.atomAuthors = []
258        self.parameters = []
259        self.spatialData = []
260        self.temporalData = []
261        self.relatedLinks = []
262        self.summary = []
263        self.content = []
264        self.csmlFile = None
265        self.cdmlFile = None
266        # general variable to use for setting the atom content - NB, if a csmlFile is specified
267        # (either directly or via a cdmlFile specification), this will be the content by default
268        # for this purpose
269        self.contentFile = None     
270        self.title = None
271        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
272        self.atomID = None
273   
274        # boundary box info - to replace spatial/temporalData?
275        self.minX = None
276        self.minY = None
277        self.maxX = None
278        self.maxY = None
279        self.t1 = None
280        self.t2 = None
281
282        self.ME = ME.MolesEntity()
283       
284        # date when the atom was first ingested
285        self.publishedDate = None
286
287        # last update date
288        self.updatedDate = None
289
290        # assume atom in working state by default - this is used to define what collection
291        # in eXist the atom is stored in
292        self.state = state
293       
294        # additional, non standard atom data can be included in the molesExtra element
295        if vocabTermData:
296            self.VTD = vocabTermData
297        else:
298            self.VTD = VTD()
299       
300        if xmlString:
301            self.fromString(xmlString)
302
303        # if inputs passed in as dict, add these now
304        if inputs:
305            logging.info("Adding info to atom from input dict")
306            logging.debug(inputs)
307            self.__dict__.update(inputs)
308           
309            # NB, this doesn't trigger the Summary Property, so do this
310            # explicitly, if need be
311            if inputs.has_key('Summary'):
312                self.Summary = inputs.get('Summary')
313            if inputs.has_key('Content'):
314                self.Content = inputs.get('Content')
315           
316            # also pass any moles data up to the moles entity object
317            if inputs.get('providerID'):
318                self.ME.providerID = inputs.get('providerID')
319               
320            if inputs.get('abbreviation'):
321                self.ME.abbreviation = inputs.get('abbreviation')
322
323        if self.atomTypeID:
324            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
325
326        logging.info("Atom initialised")
327
328
329    def addOnlineReferences(self, links):
330        '''
331        Add online reference data associated with the atom
332        - NB, care needs to be taken here since this data is stored in the atom
333        link elements and these are also used for the various atom associations
334        @param links: a Link or array of Links to add to the relatedLinks attribute
335        '''
336        logging.debug("Adding online references")
337        if not links:
338            return
339       
340        if type(links) is not list:
341            links = [links]
342       
343        # firstly clear out any online refs data from the existing related links
344        newLinks = []
345        for link in self.relatedLinks:
346            if link.isChildAtom():
347                newLinks.append(link)
348       
349        newLinks.extend(links)
350        self.relatedLinks = newLinks
351        logging.debug("Online references added")
352
353
354
355    def addUniqueRelatedLinks(self, links):
356        '''
357        Add links to relatedLinks array - if they are not already included
358        @param links: a Link or array of Links to add to the relatedLinks attribute
359        '''
360        logging.debug("Adding new related links")
361        if not links:
362            return
363       
364        if type(links) is not list:
365            links = [links]
366       
367        for link in links:
368            if type(link) is not Link:
369                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
370                continue
371            if link not in self.relatedLinks:
372                logging.debug("- adding unique link")
373                self.relatedLinks.append(link)
374        logging.debug("Finished adding links")
375       
376
377    def removeRelatedLinks(self, linksToDelete):
378        '''
379        Remove any links in the input list from the atom's related links list
380        @param linksToDelete: array of Link objects to remove from atom
381        '''
382        logging.debug("Removing related links from atom")
383        if not linksToDelete:
384            return
385       
386        if type(linksToDelete) is not list:
387            linksToDelete = [linksToDelete]
388       
389        updatedLinks = []
390        for link in self.relatedLinks:
391            if type(link) is not Link:
392                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
393                continue
394            if link in linksToDelete:
395                logging.debug("- found link to remove")
396            else:
397                updatedLinks.append(link)
398
399        self.relatedLinks = updatedLinks
400        logging.debug("Links removed")
401       
402
403    def getDefaultCollectionPath(self):
404        '''
405        Determine the correct collection to use for the atom in eXist
406        '''
407        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state
408       
409        if self.atomTypeID == VTD.DE_TERM:
410            collectionPath += eXistConnector.DE_COLLECTION_PATH
411        elif self.atomTypeID == VTD.GRANULE_TERM:
412            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
413        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
414            self.subtypeID == VTD.DEPLOYMENT_TERM:
415            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
416        else:
417            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
418       
419        if not self.ME.providerID:
420            raise AtomError("Error: cannot determine atom collection path because " + \
421                            "the provider ID is not defined")
422           
423        collectionPath += self.ME.providerID + "/"
424        return collectionPath
425           
426
427    def __addAtomTypeDataXML(self, root):
428        '''
429        Add the atom type, and subtype data, if available, to atom categories
430        - and lookup and add the appropriate vocab term data
431        '''
432        if self.atomTypeID:
433            logging.info("Adding atom type info to XML output")
434            category = Category()
435            category.label = self.atomTypeID
436            # look up the appropriate vocab term data
437            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
438            category.term = self.ATOM_TYPE
439            root.append(category.toXML())
440
441        if self.subtypeID:
442            logging.info("Adding atom subtype info to XML output")
443            # NB subtypes not all defined, so leave this out for the moment
444            category.label = self.subtypeID
445            # look up the appropriate vocab term data
446            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
447            category.term = self.ATOM_SUBTYPE
448            root.append(category.toXML())
449
450
451    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
452        '''
453        Add data to include in the moles entity element
454        '''
455        logging.debug('Adding moles entity information')
456        self.ME.abbreviation = abbreviation
457        self.ME.providerID = provider_id
458        self.ME.createdDate = getISO8601Date(object_creation_time)
459        logging.debug('Moles entity information added')
460
461
462    def addAuthors(self, authors):
463        '''
464        Add author data appropriately to the atom
465        NB, these will overwrite any existing authors of the same type
466        @param authors: list of Person objects with the author data
467        '''
468        logging.debug('Adding authors data to Atom')
469        isFirstAuthor = {}
470        authorArray = None
471        for author in authors:
472            # NB, we're only allowed one atom author
473            if author.type == Person.AUTHOR_TYPE:
474                self.author = author
475                if isFirstAuthor.has_key(author.type):
476                    raise AtomError("Error: an atom can only have one author specified")
477                isFirstAuthor[author.type] = 1
478                continue
479            elif author.type == Person.CONTRIBUTOR_TYPE:
480                authorArray = self.contributors
481            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
482                authorArray = self.ME.responsibleParties
483               
484            # check if this is the first addition - if so, clear out the
485            # array in advance
486            if not isFirstAuthor.has_key(author.type):
487                logging.debug("Clearing out author array")
488                # NB, need to be careful to clear the array, not create a ref
489                # to a new array
490                del authorArray[:]
491                isFirstAuthor[author.type] = 1
492
493            if str(author) != "" and author not in authorArray:
494                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
495                              %(author.type, author.name, author.uri, author.role))
496                authorArray.append(author)
497
498        logging.debug('Finished adding authors data')
499
500
501    def _isNewParameter(self, param):
502        '''
503        Check if a parameter is already specified in the atom, return False if
504        so, otherwise return True
505        '''
506        for p in self.parameters:
507            if p.term == param.term and \
508                p.scheme == param.scheme and \
509                p.label == param.label:
510                return False
511        return True
512
513
514    def addRelatedLinks(self, linkVals):
515        '''
516        Add related links in string format - converting to Link objects
517        @param linkVals: string of format, 'uri | title | vocabServerURL'
518        '''
519        self.relatedLinks.append(self.objectify(linkVals, 'relatedLinks'))
520
521
522    def addParameters(self, params):
523        '''
524        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
525        @params param: parameter, as string array, to add to atom parameters collection
526        '''
527        # avoid strings being parsed character by character
528        if type(params) is str:
529            params = [params]
530           
531        for param in params:
532            # firstly tidy parameter
533            param = tidyUpParameters(param)
534            category = Category()
535            category.fromString(param)
536
537            # now check for uniqueness
538            if self._isNewParameter(category):
539                logging.debug("Adding new parameter: %s" %param)
540                self.parameters.append(category)
541   
542   
543    def _linksToXML(self, root):
544        '''
545        Add required links to the input element
546        @param root: element to add links to - NB, should be the root element of the atom
547        '''
548        selfLink = ET.SubElement(root, "link")
549        selfLink.attrib["href"] = self.atomBrowseURL
550        selfLink.attrib["rel"] = "self"
551        if self.subtypeID != VTD.DEPLOYMENT_TERM:
552            molesLink = ET.SubElement(root, "link")
553            molesDoc = re.sub('ATOM','NDG-B1', self.atomBrowseURL)
554            molesLink.attrib["href"] = molesDoc
555            molesLink.attrib["rel"] = 'related'
556       
557        for relatedLink in self.relatedLinks:
558            if relatedLink.hasValue():
559                root.append(relatedLink.toXML())
560   
561    def toXML(self):
562        '''
563        Convert the atom into XML representation and return this
564        @return: xml version of atom
565        '''
566        logging.info("Creating formatted XML version of Atom")
567        root = ET.Element("entry")
568        root.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
569        root.attrib["xmlns:moles"] = "http://ndg.nerc.ac.uk/schema/moles2alpha"
570        root.attrib["xmlns:georss"] = "http://www.georss.org/georss"
571        root.attrib["xmlns:gml"] = "http://www.opengis.net/gml"
572        id = ET.SubElement(root, "id")
573        id.text = self.atomID
574        title = ET.SubElement(root, "title")
575        title.text = self.title
576        self._linksToXML(root)
577
578        # NB, the author tag is mandatory for atoms - so if an explicit
579        # author has not been set, just take the author to be the provider
580        if not self.author:
581            author = Person()
582            author.name = self.ME.providerID
583            author.uri = self.ME.providerID
584            self.author = author
585
586        root.append(self.author.toXML())
587           
588        for contributor in self.contributors:
589            root.append(contributor.toXML())
590
591        # add the moles entity section, if it is required
592        if self.ME:
593            root.append(self.ME.toXML())
594
595        # add parameters data
596        for param in self.parameters:
597            if param.hasValue():
598                root.append(param.toXML())
599
600        # add the type and subtype data
601        self.__addAtomTypeDataXML(root)
602                   
603        summary = ET.SubElement(root, "summary")
604        summary.text = self.Summary
605                   
606        # add link to content, if required - NB, can only have one content element in atom
607        # - and this is mandatory
608        content = ET.SubElement(root, "content")
609        if self.contentFile:
610            content.attrib["type"] = "application/xml"
611            content.attrib["src"] = self.contentFile
612        else:
613            content.text = self.Content
614            content.attrib["type"] = "xhtml"
615       
616        # if there's a published date already defined, assume we're doing an update now
617        # NB, update element is mandatory
618        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
619        if not self.publishedDate:
620            self.publishedDate = currentDate
621
622        updated = ET.SubElement(root, "updated")
623        if not self.updatedDate:
624            self.updatedDate = currentDate
625        updated.text = self.updatedDate
626
627        published = ET.SubElement(root, "published")
628        published.text = self.publishedDate
629
630        # add temporal range data, if available
631        temporalRange = ET.SubElement(root, "moles:temporalRange")
632        if self.t1:
633            temporalRange.text = self.t1
634            if self.t2:
635                temporalRange.text += "/" + self.t2
636
637        # add spatial range data, if available
638        self._addSpatialData(root)
639
640        tree = ET.ElementTree(root)
641        logging.info("XML version of Atom created")
642        return tree
643
644
645    def __getSummary(self):
646        logging.debug("Getting summary data")
647        summaryString = ""
648        for summary_line in self.summary:
649            summaryString += summary_line + "\n"
650
651        return summaryString
652
653    def __setSummary(self, summary):
654        logging.debug("Adding summary data")
655        self.summary = []
656        for summary_line in summary.split('\n'):
657            self.summary.append(escapeSpecialCharacters(summary_line))
658           
659    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
660
661
662    def __getContent(self):
663        logging.debug("Getting content data")
664        contentString = ""
665        # NB, there must be content specified in an atom
666        if not self.content:
667            return "Metadata document"
668       
669        for content_line in self.content:
670            contentString += content_line + "\n"
671
672        return contentString
673
674    def __setContent(self, content):
675        logging.debug("Adding content data")
676        self.content = []
677        for content_line in content.split('\n'):
678            self.content.append(escapeSpecialCharacters(content_line))
679           
680    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
681
682           
683    def fromString(self, xmlString):
684        '''
685        Initialise Atom object using an xmlString
686        @param xmlString: representation of atom as an XML string
687        '''
688        logging.info("Ingesting data from XML string")
689       
690        # firstly, remove any namespaces used - to avoid problems with elementtree
691        logging.debug("Stripping moles namespace from string to allow easy handling with elementtree")
692        xmlString = xmlString.replace('moles:', '')
693        xmlString = xmlString.replace('georss:', '')
694        xmlString = xmlString.replace('gml:', '')
695        xmlString = xmlString.replace('xmlns="http://www.w3.org/2005/Atom"', '')
696
697        # now create elementtree with the XML string
698        logging.debug("Create elementtree instance with XML string")
699        tree = ET.fromstring(xmlString)
700       
701        title = tree.findtext('title')
702        if title:
703            logging.debug("Adding title data")
704            self.title = title
705
706        summary = tree.findtext('summary')
707        if summary:
708            self.Summary = summary
709
710        authorElement = tree.find('author')
711        logging.debug("Adding author data")
712        author = Person()
713        author.fromETElement(authorElement)
714        self.author = author
715
716        contributorElements = tree.findall('contributor')
717        for contributorElement in contributorElements:
718            logging.debug("Adding contributor data")
719            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
720            contributor.fromETElement(contributorElement)
721            self.contributors.append(contributor)
722
723        molesElement = tree.find('entity')
724        if molesElement:
725            self.ME.fromET(molesElement)
726               
727        self.atomID = tree.findtext('id')
728
729        self._parseCategoryData(tree.findall('category'))
730
731        self._parseLinksData(tree.findall('link'))
732           
733        contentTag = tree.find('content')
734        if contentTag != None:
735            logging.debug("Found content tag - checking for CSML/CDML file data")
736            file = contentTag.attrib.get('src')
737            if file:
738                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
739                if file.upper().find('CSML') > -1:
740                    logging.debug("Adding CSML file data")
741                    self.csmlFile = file
742                elif file.upper().find('CDML') > -1:
743                    logging.debug("Adding CDML file data")
744                    self.cdmlFile = file
745                self.contentFile = file
746            else:
747                logging.debug("No file data - adding contents of element instead")
748                self.Content = contentTag.text
749       
750        range = tree.findtext('temporalRange')
751        if range:
752            logging.debug("Adding temporal range data")
753            timeData = range.split('/')
754            self.t1 = timeData[0]
755            if len(timeData) > 1:
756                self.t2 = timeData[1]
757       
758        # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
759        minBBox = tree.findall('.//lowerCorner')
760        if minBBox:
761            logging.debug("Adding min spatial range data")
762            minBBox = minBBox[0]
763            spatialData = minBBox.text.split()
764            self.minX = spatialData[0]
765            if len(spatialData) > 1:
766                self.minY = spatialData[1]
767       
768        maxBBox = tree.findall('.//upperCorner')
769        if maxBBox:
770            maxBBox = maxBBox[0]
771            logging.debug("Adding max spatial range data")
772            spatialData = maxBBox.text.split()
773            self.maxX = spatialData[0]
774            if len(spatialData) > 1:
775                self.maxY = spatialData[1]
776               
777        publishedDate = tree.findtext('published')
778        if publishedDate:
779            logging.debug("Adding published date")
780            self.publishedDate = publishedDate
781               
782        updatedDate = tree.findtext('updated')
783        if updatedDate:
784            logging.debug("Adding updated date")
785            self.updatedDate = updatedDate
786           
787        logging.info("Completed data ingest")
788   
789   
790    def _parseCategoryData(self, categories):
791        logging.debug("Adding category/parameters data")
792        for category in categories:
793            cat = Category()
794            cat.fromETElement(category)
795           
796            if cat.term == self.ATOM_TYPE:
797                logging.debug("Found atom type data")
798                self.atomTypeID = cat.label
799                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
800                continue
801            elif cat.term == self.ATOM_SUBTYPE:
802                logging.debug("Found atom subtype data")
803                self.subtypeID = cat.label
804                self.subtype = cat.scheme
805                continue
806
807            self.parameters.append(cat)
808   
809
810    def setDatasetID(self, datasetID):
811        '''
812        Set the dataset ID for the atom - and generate an appropriate atom name using this
813        @param datasetID: ID to set for the atom
814        '''
815        self.datasetID = datasetID
816        self._generateAtomName(datasetID) 
817        self.atomID = self.createAtomID(datasetID)
818
819
820    def createAtomID(self, datasetID):
821        '''
822        Create a unique ID, conforming to atom standards, for atom
823        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
824        @param datasetID: ID of atom's dataset
825        @return: unique ID
826        '''
827        logging.info("Creating unique ID for atom")
828        if not self.atomBrowseURL:
829            self._generateAtomName(datasetID)
830        urlBit = self.atomBrowseURL.split('://')[1]
831        urlBit = urlBit.replace('#', '')
832        urlBits = urlBit.split('/')
833        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
834       
835        id = "tag:" + urlBits[0] + "," + dateBit + ":/" + "/".join(urlBits[1:])
836        logging.info("- unique ID created for atom")
837        logging.debug(" - '%s'" %id)
838        return id
839       
840       
841    def _generateAtomName(self, datasetID):
842        '''
843        Generate a consistent name for the atom - with full eXist doc path
844        @param datasetID: ID of atom's dataset
845        '''
846        self.atomName = datasetID + ".atom"
847        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
848        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
849
850
851    def _parseLinksData(self, links):
852        '''
853        Extract links and atom data from array of link elements in the XML representation of the atom
854        @param links: an array of <link> elements
855        '''
856        # firstly, get all data to start with, so we can properly process it afterwards
857        linkData = {}
858        logging.debug("Getting link data")
859        for linkTag in links:
860            link = Link()
861            link.fromETElement(linkTag)
862
863            if not linkData.has_key(link.rel):
864                linkData[link.rel] = []
865           
866            linkData[link.rel].append(link)
867
868        # there should be one self referencing link - which will provide info on the atom itself
869        if not linkData.has_key('self'):
870            errorMessage = "Atom does not have self referencing link - " + \
871                "cannot ascertain datasetID without this - please fix"
872            logging.error(errorMessage)
873            raise ValueError(errorMessage)
874       
875        # this is the link describing the atom itself
876        self.atomBrowseURL = linkData['self'][0].href
877       
878        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
879        self.atomName = self.datasetID + ".atom"
880        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
881       
882        # now remove this value and the associated moles doc link
883        del linkData['self']
884        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
885        if linkData.has_key('related'):
886            relatedLinks = []
887            for link in linkData['related']:
888                if link.href != molesDoc:
889                    relatedLinks.append(link)
890           
891            linkData['related'] = relatedLinks
892               
893        # now add the remaining links to the atom
894        for key in linkData:
895            for link in linkData[key]:
896                logging.debug("Adding link data")
897                self.relatedLinks.append(link)
898       
899
900    def _addSpatialData(self, element):
901        '''
902        Add spatial coverage element to an input element
903        @param element: element to add coverage data to
904        '''
905        logging.info("Adding spatial data to Atom")
906        bbox = ET.SubElement(element, "georss:where")
907        if not self.minX:
908            logging.info("No spatial data specified")
909            return
910       
911        envelope = ET.SubElement(bbox, "gml:Envelope")
912        lc = ET.SubElement(envelope, "gml:lowerCorner")
913        lc.text = self.minX + " " + self.minY
914        uc = ET.SubElement(envelope, "gml:upperCorner")
915        uc.text = self.maxX + " " + self.maxY
916
917       
918    def setAttribute(self, attributeName, attributeValue):
919        '''
920        Set the value of an atom attribute - and do some basic tidying up of the string content
921        - to escape any XML unfriendly characters
922        @param attributeName: name of the attribute whose value to set
923        @param attributeValue: value to set the attribute to 
924        '''
925        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
926        origValue = attributeValue
927       
928        # escape any special characters if a value has been specified
929        # NB, need to cope with both single values and arrays
930        if attributeValue:
931            if type(attributeValue) is list:
932                newVals = []
933                for val in attributeValue:
934                    newVals.append(objectify(escapeSpecialCharacters(val)), attributeName)
935                attributeValue = newVals
936                   
937            else:
938                attributeValue = objectify(escapeSpecialCharacters(attributeValue), attributeName)
939
940        # handle the special case of authors; only one author is allowed per atom
941        # - the others should be treated as contributors
942        if attributeName == "authors":
943            setattr(self, "author", attributeValue[0])
944            if len(attributeValue) > 1:
945                setattr(self, "contributors", attributeValue[1:])
946        elif attributeName == "atomAuthors":
947            self.ME.responsibleParties.append(attributeValue)
948        else:
949            setattr(self, attributeName, attributeValue)
950
951
952    def objectify(self, objectVals, attributeName):
953        '''
954        Some inputs are specified as strings but need to be converted into
955        objects - do this here
956        @param objectVals: a '|' delimited string of values
957        @param attributeName: name of attribute the values belong to
958        '''
959        obj = None
960        if type(objectVals) != str:
961            return objectVals
962       
963        if attributeName == "relatedLinks":
964            obj = Link()
965        elif attributeName == "atomAuthors" or attributeName == "authors":
966            obj = Person()
967
968        if obj:
969            obj.fromString(objectVals)
970            return obj
971       
972        return objectVals
973
974
975    def toPrettyXML(self):
976        '''
977        Returns nicely formatted XML as string
978        '''
979        atomXML = self.toXML()
980
981        # create the string
982        logging.debug("Converting the elementtree object into a string")
983        prettyXML = et2text(atomXML.getroot())
984
985        # add XML version tag
986        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
987        logging.info("Created formatted version of XML object")
988        return prettyXML
989
990
991    def getLinksOfType(self, termID):
992        '''
993        Returns links in the atom related links attribute which match the specified
994        term ID
995        @param termID: the termID to look for in the related links - NB, this is
996        matched to the end of the link.rel value
997        @return links: array of Link objects with matching term type
998        '''
999        logging.debug("Getting atom links of type, '%s'" %termID)
1000        matchingLinks = []
1001        for link in self.relatedLinks:
1002            # firstly, handle special case where we only want the online ref type links
1003            # returned
1004            if termID == self.ONLINE_REF_LABEL:
1005                if not link.isChildAtom():
1006                    logging.debug("- found link with matching term type")
1007                    matchingLinks.append(link)
1008               
1009            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
1010                logging.debug("- found link with matching term type")
1011                matchingLinks.append(link)
1012               
1013        logging.debug("Returning matched links")
1014        return matchingLinks
1015   
1016   
1017    def validate(self):
1018        '''
1019        Check the various values of the various atom attributes; if an error with any of
1020        these is found, raise a ValueError
1021        @raise ValueError: if any atom attributes have a problem
1022        '''
1023        logging.info("Validating the atom data model")
1024        errors = {}
1025        if not self.title:
1026            errors['title'] = "Title attribute cannot be empty"
1027           
1028        if self.minX or self.maxX or self.minY or self.maxY:
1029            missingVals = False
1030            incorrectFormat = False 
1031            for val in [self.minX, self.maxX, self.minY, self.maxY]:
1032                if val == '':
1033                    missingVals = True
1034                else:
1035                    try:
1036                        float(val)
1037                    except:
1038                        incorrectFormat = True
1039           
1040            if missingVals or incorrectFormat:
1041                errors['spatialcoverage'] = ""
1042            if missingVals:
1043                errors['spatialcoverage'] += "Incomplete spatial coverage data.\n"
1044            if incorrectFormat:
1045                errors['spatialcoverage'] += "Spatial coverage data not in numerical format."
1046
1047        if self.t1 or self.t2:
1048            timeErrors = ''
1049            d1 = None
1050            d2 = None
1051            if self.t1:
1052                try:
1053                    d1 = datetime.datetime.strptime(self.t1, self.YEAR_FORMAT)
1054                except:
1055                    timeErrors += "Incorrect start date format - '%s' - c.f. '2008-04-12. \n'" %self.t1
1056            if self.t2:
1057                try:
1058                    d2 = datetime.datetime.strptime(self.t2, self.YEAR_FORMAT)
1059                except:
1060                    timeErrors += "Incorrect end date format - '%s' - c.f. '2008-04-12. \n'" %self.t2
1061
1062            if d1 and d2:
1063                if d1 > d2 or d2 < d1:
1064                    timeErrors += "Inconsistent date range - '%s' is not before '%s'" \
1065                        %(d1.strftime(self.YEAR_FORMAT), d2.strftime(self.YEAR_FORMAT))
1066
1067            if timeErrors:
1068                errors['temporalrange'] = timeErrors
1069
1070           
1071        # do a quick recursion over all the attributes to look for ascii characters
1072        for key, val in self.__dict__.items():
1073            if val:
1074                if type(val) == str:
1075                    try:
1076                        # NB, the latin coding accepts unicode up to 255
1077                        correctedString = val.decode('latin-1')
1078                    except:
1079                        if not errors.has_key(key):
1080                            errors[key] = ''
1081                        errors[key] += "Illegal unicode found in string: '%s'.\n" %val
1082               
1083        if errors:
1084            logging.warning("Errors found in atom data: %s" %errors)
1085            raise ValidationError(errors)
1086        logging.info("Atom model validated successfully")
1087       
1088       
1089    def getLogos(self):
1090        '''
1091        Return related links that are logos
1092        @return: array of Links containing the logos for the atom
1093        '''
1094        logos = []
1095        for link in self.relatedLinks:
1096            if link.rel.lower().endswith(VTD.LOGO_TERM.lower()):
1097                logos.append(link)
1098               
1099        return logos
1100   
1101   
1102    def isGranule(self):
1103        if self.atomTypeID == VTD.GRANULE_TERM:
1104            return True
1105        return False
1106   
1107   
1108    def isDE(self):
1109        if self.atomTypeID == VTD.DE_TERM:
1110            return True
1111        return False
1112   
1113    def isDeployment(self):
1114        if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM:
1115            return True
1116        return False
1117
Note: See TracBrowser for help on using the repository browser.