source: exist/trunk/python/ndgUtils/models/Atom.py @ 4296

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4296
Revision 4296, 32.1 KB checked in by cbyrom, 11 years ago (diff)

Add Content property, with getter+setter, to Atom model + add method to
ensure uniqueness of any relatedLinks data - extending the Links object
to allow this easily.

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
16from ndgUtils.eXistConnector import eXistConnector
17from ndgUtils.ETxmlView import et2text
18from utilities import getTripleData, escapeSpecialCharacters, \
19    tidyUpParameters, getISO8601Date
20from ndgUtils.vocabtermdata import VocabTermData as VTD
21from ndgUtils.models import MolesEntity as ME
22
23
24class AtomError(Exception):
25    """
26    Exception handling for Atom class.
27    """
28    def __init__(self, msg):
29        logging.error(msg)
30        Exception.__init__(self, msg)
31
32
33class Person(object):
34    '''
35    Class representing atom author type data - with name, uri and role attributes
36    '''
37    AUTHOR_TYPE = 0
38    CONTRIBUTOR_TYPE = 1
39    RESPONSIBLE_PARTY_TYPE = 2
40    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
41   
42    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
43        self.type = personType
44        self.ns = namespace
45        self.name = ""
46        self.uri = ""
47        self.role = ""
48       
49        # NB, the atom format specifies slightly different data contents
50        self.uriTagName = "email"
51        # NB, responsible party data is always stored in the moles section
52        if self.type == self.RESPONSIBLE_PARTY_TYPE:
53            self.ns = 'moles'
54            self.uriTagName = "uri"
55
56    def __str__(self):
57        if self.name or self.uri or self.role:
58            return self.name + " | " + self.uri + " | " + self.role
59        return ""
60   
61    def fromString(self, personString):
62        (self.name, self.uri, self.role) = getTripleData(personString)
63       
64    def fromETElement(self, personTag):
65        self.name = personTag.findtext('name') or ""
66        self.role = personTag.findtext('role') or ""
67        self.uri = personTag.findtext(self.uriTagName) or ""
68        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
69                      %(self.name, self.role, self.uriTagName, self.uri))
70
71    def toXML(self):
72        prefix = ""
73        if self.ns:
74            prefix = self.ns + ':'
75
76        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
77
78        if self.name:
79            name = ET.SubElement(author, prefix + "name")
80            name.text = self.name
81       
82        if self.uri:
83            uri = ET.SubElement(author, prefix + self.uriTagName)
84            uri.text = self.uri
85       
86        if self.role:
87            role = ET.SubElement(author, prefix + "role")
88            role.text = self.role
89
90        return author
91   
92    def __cmp__(self, person1):
93        '''
94        Override comparison to allow proper object comparison when checking
95        if Person objects are in an array already - i.e. if person in personArray...
96        '''
97        if self is person1:
98            return 0
99        elif self.uri == person1.uri and self.name == person1.name and \
100                self.role == person1.role and self.type == person1.type:
101            return 0
102        return 1
103
104
105class Link(object):
106    '''
107    Class representing an atom link - with href, title and rel attributes
108    '''
109
110    def __init__(self):
111        self.href = ""
112        self.title = ""
113        self.rel = ""
114
115    def fromString(self, linkString):
116        (self.href, self.title, self.rel) = getTripleData(linkString)
117       
118    def fromETElement(self, linkTag):
119        self.href = linkTag.attrib.get('href') or ""
120        self.rel = linkTag.attrib.get('rel') or ""
121        self.title = linkTag.attrib.get('title') or ""
122
123    def toXML(self):
124        link = ET.Element("link")
125        link.attrib["href"] = self.href
126        link.attrib["title"] = self.title
127        link.attrib["rel"] = self.rel
128        return link
129
130    def hasValue(self):
131        # NB, just a rel on its own is meaningless - so ignore
132        if self.href or self.title:
133            return True
134        return False
135   
136    def __str__(self):
137        if self.href or self.title or self.rel:
138            return self.href + " | " + self.title + " | " + self.rel
139        return ""
140
141   
142    def __cmp__(self, link1):
143        '''
144        Override comparison to allow proper object comparison when checking
145        if Link objects are in an array already - i.e. if link in linkArray...
146        '''
147        if self is link1:
148            return 0
149        elif self.href == link1.href and self.title == link1.title and \
150                self.rel == link1.rel:
151            return 0
152        return 1
153
154
155class Category(object):
156    '''
157    Class representing an atom category - with term, scheme and label attributes
158    '''
159    def __init__(self):
160        self.term = ""
161        self.scheme = ""
162        self.label = ""
163
164    def fromString(self, linkString):
165        (self.label, self.scheme, self.term) = getTripleData(linkString)
166       
167    def fromETElement(self, linkTag):
168        self.term = linkTag.attrib.get('term') or ""
169        self.label = linkTag.attrib.get('label') or ""
170        self.scheme = linkTag.attrib.get('scheme') or ""
171
172    def toXML(self):
173        link = ET.Element("category")
174        link.attrib["term"] = self.term
175        link.attrib["scheme"] = self.scheme
176        link.attrib["label"] = self.label
177        return link
178
179
180class Atom(object):
181
182    # labels for use with the atom categories
183    ATOM_TYPE = "ATOM_TYPE"
184    ATOM_SUBTYPE = "ATOM_SUBTYPE"
185
186    # labels for use with the templates to set/extract specific inputs
187    ONLINE_REF_LABEL = "online_ref"
188    ATOM_REF_LABEL = "atom_ref"
189    DELIMITER = "---"
190    REMOVE_LABEL = "remove"
191
192
193    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
194                 xmlString = None, state = eXistConnector.WORKING_COLLECTION_PATH, **inputs):
195        '''
196        Constructor - initialise the atom variables
197        '''
198        logging.info("Initialising atom")
199        if atomType:
200            logging.info(" - of type '%s'" %atomType)
201        self.atomTypeID = atomType
202
203        # some data have further subtypes specified
204        self.subtypeID = None # this should be the termID
205        self.subtype = None # and this should be the fully formed vocab URL
206       
207        self.ndgObject = ndgObject
208
209        self.atomName = None
210        self.files = []
211        self.author = None
212        self.contributors = []
213        self.atomAuthors = []
214        self.parameters = []
215        self.spatialData = []
216        self.temporalData = []
217        self.relatedLinks = []
218        self.summary = []
219        self.content = []
220        self.csmlFile = None
221        self.cdmlFile = None
222        # general variable to use for setting the atom content - NB, if a csmlFile is specified
223        # (either directly or via a cdmlFile specification), this will be the content by default
224        # for this purpose
225        self.contentFile = None     
226        self.logos = []
227        self.title = None
228        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
229        self.atomID = None
230   
231        # boundary box info - to replace spatial/temporalData?
232        self.minX = None
233        self.minY = None
234        self.maxX = None
235        self.maxY = None
236        self.t1 = None
237        self.t2 = None
238
239        self.ME = ME.MolesEntity()
240       
241        # date when the atom was first ingested
242        self.publishedDate = None
243
244        # last update date
245        self.updatedDate = None
246
247        # assume atom in working state by default - this is used to define what collection
248        # in eXist the atom is stored in
249        self.state = state
250       
251        # additional, non standard atom data can be included in the molesExtra element
252        if vocabTermData:
253            self.VTD = vocabTermData
254        else:
255            self.VTD = VTD()
256       
257        if xmlString:
258            self.fromString(xmlString)
259
260        # if inputs passed in as dict, add these now
261        if inputs:
262            logging.info("Adding info to atom from input dict")
263            logging.debug(inputs)
264            self.__dict__.update(inputs)
265           
266            # NB, this doesn't trigger the Summary Property, so do this
267            # explicitly, if need be
268            if inputs.has_key('Summary'):
269                self.Summary = inputs.get('Summary')
270            if inputs.has_key('Content'):
271                self.Content = inputs.get('Content')
272           
273            # also pass any moles data up to the moles entity object
274            if inputs.get('providerID'):
275                self.ME.providerID = inputs.get('providerID')
276               
277            if inputs.get('abbreviation'):
278                self.ME.abbreviation = inputs.get('abbreviation')
279
280        if self.atomTypeID:
281            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
282
283        logging.info("Atom initialised")
284
285
286    def addUniqueRelatedlinks(self, links):
287        '''
288        Add links to relatedLinks array - if they are not already included
289        @param links: a Link or array of Links to add to the relatedLinks attribute
290        '''
291        logging.debug("Adding new related links")
292        if not links:
293            return
294       
295        if type(links) is str:
296            links = [links]
297       
298        for link in links:
299            if link not in self.relatedLinks:
300                logging.debug("- adding unique link")
301                self.relatedLinks.append(link)
302        logging.debug("Finished adding links")
303       
304       
305
306    def getDefaultCollectionPath(self):
307        '''
308        Determine the correct collection to use for the atom in eXist
309        '''
310        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state
311       
312        if self.atomTypeID == VTD.DE_TERM:
313            collectionPath += eXistConnector.DE_COLLECTION_PATH
314        elif self.atomTypeID == VTD.GRANULE_TERM:
315            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
316        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
317            self.subtypeID == VTD.DEPLOYMENT_TERM:
318            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
319        else:
320            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
321       
322        if not self.ME.providerID:
323            raise AtomError("Error: cannot determine atom collection path because " + \
324                            "the provider ID is not defined")
325           
326        collectionPath += self.ME.providerID + "/"
327        return collectionPath
328           
329
330    def __addAtomTypeDataXML(self, root):
331        '''
332        Add the atom type, and subtype data, if available, to atom categories
333        - and lookup and add the appropriate vocab term data
334        '''
335        if self.atomTypeID:
336            logging.info("Adding atom type info to XML output")
337            category = Category()
338            category.label = self.atomTypeID
339            # look up the appropriate vocab term data
340            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
341            category.term = self.ATOM_TYPE
342            root.append(category.toXML())
343
344        if self.subtypeID:
345            logging.info("Adding atom subtype info to XML output")
346            # NB subtypes not all defined, so leave this out for the moment
347            category.label = self.subtypeID
348            # look up the appropriate vocab term data
349            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
350            category.term = self.ATOM_SUBTYPE
351            root.append(category.toXML())
352
353
354    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
355        '''
356        Add data to include in the moles entity element
357        '''
358        logging.debug('Adding moles entity information')
359        self.ME.abbreviation = abbreviation
360        self.ME.providerID = provider_id
361        self.ME.createdDate = getISO8601Date(object_creation_time)
362        logging.debug('Moles entity information added')
363
364
365    def addAuthors(self, authors):
366        '''
367        Add author data appropriately to the atom
368        NB, these will overwrite any existing authors of the same type
369        @param authors: list of Person objects with the author data
370        '''
371        logging.debug('Adding authors data to Atom')
372        isFirstAuthor = {}
373        authorArray = None
374        for author in authors:
375            # NB, we're only allowed one atom author
376            if author.type == Person.AUTHOR_TYPE:
377                self.author = author
378                if isFirstAuthor.has_key(author.type):
379                    raise AtomError("Error: an atom can only have one author specified")
380                isFirstAuthor[author.type] = 1
381                continue
382            elif author.type == Person.CONTRIBUTOR_TYPE:
383                authorArray = self.contributors
384            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
385                authorArray = self.ME.responsibleParties
386               
387            # check if this is the first addition - if so, clear out the
388            # array in advance
389            if not isFirstAuthor.has_key(author.type):
390                logging.debug("Clearing out author array")
391                # NB, need to be careful to clear the array, not create a ref
392                # to a new array
393                del authorArray[:]
394                isFirstAuthor[author.type] = 1
395
396            if str(author) != "" and author not in authorArray:
397                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
398                              %(author.type, author.name, author.uri, author.role))
399                authorArray.append(author)
400
401        logging.debug('Finished adding authors data')
402
403
404    def _isNewParameter(self, param):
405        '''
406        Check if a parameter is already specified in the atom, return False if
407        so, otherwise return True
408        '''
409        for p in self.parameters:
410            if p.term == param.term and \
411                p.scheme == param.scheme and \
412                p.label == param.label:
413                return False
414        return True
415
416
417    def addRelatedLinks(self, linkVals):
418        '''
419        Add related links in string format - converting to Link objects
420        @param linkVals: string of format, 'uri | title | vocabServerURL'
421        '''
422        self.relatedLinks.append(self.objectify(linkVals, 'relatedLinks'))
423
424
425    def addLogos(self, logoVals):
426        '''
427        Add related logos in string format - converting to Link objects
428        @param linkVals: string of format, 'uri | title | vocabServerURL'
429        '''
430        self.relatedLinks.append(self.objectify(logoVals, 'logo'))
431
432
433    def addParameters(self, params):
434        '''
435        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
436        @params param: parameter, as string array, to add to atom parameters collection
437        '''
438        # avoid strings being parsed character by character
439        if type(params) is str:
440            params = [params]
441           
442        for param in params:
443            # firstly tidy parameter
444            param = tidyUpParameters(param)
445            category = Category()
446            category.fromString(param)
447
448            # now check for uniqueness
449            if self._isNewParameter(category):
450                logging.debug("Adding new parameter: %s" %param)
451                self.parameters.append(category)
452   
453   
454    def _linksToXML(self, root):
455        '''
456        Add required links to the input element
457        @param root: element to add links to - NB, should be the root element of the atom
458        '''
459        selfLink = ET.SubElement(root, "link")
460        selfLink.attrib["href"] = self.atomBrowseURL
461        selfLink.attrib["rel"] = "self"
462        if self.subtypeID != VTD.DEPLOYMENT_TERM:
463            molesLink = ET.SubElement(root, "link")
464            molesDoc = re.sub('ATOM','NDG-B1', self.atomBrowseURL)
465            molesLink.attrib["href"] = molesDoc
466            molesLink.attrib["rel"] = "related"
467       
468        for relatedLink in self.relatedLinks:
469            if relatedLink.hasValue():
470                root.append(relatedLink.toXML())
471       
472        for logo in self.logos:
473            if logo.hasValue():
474                root.append(logo.toXML())
475   
476    def toXML(self):
477        '''
478        Convert the atom into XML representation and return this
479        @return: xml version of atom
480        '''
481        logging.info("Creating formatted XML version of Atom")
482        root = ET.Element("entry")
483        root.attrib["xmlns"] = "http://www.w3.org/2005/Atom"
484        root.attrib["xmlns:moles"] = "http://ndg.nerc.ac.uk/schema/moles2alpha"
485        root.attrib["xmlns:georss"] = "http://www.georss.org/georss"
486        root.attrib["xmlns:gml"] = "http://www.opengis.net/gml"
487        id = ET.SubElement(root, "id")
488        id.text = self.atomID
489        title = ET.SubElement(root, "title")
490        title.text = self.title
491        self._linksToXML(root)
492
493        # NB, the author tag is mandatory for atoms - so if an explicit
494        # author has not been set, just take the author to be the provider
495        if not self.author:
496            author = Person()
497            author.name = self.ME.providerID
498            author.uri = self.ME.providerID
499            self.author = author
500
501        root.append(self.author.toXML())
502           
503        for contributor in self.contributors:
504            root.append(contributor.toXML())
505
506        # add the moles entity section, if it is required
507        if self.ME:
508            root.append(self.ME.toXML())
509
510        # add parameters data
511        for param in self.parameters:
512            root.append(param.toXML())
513
514        # add the type and subtype data
515        self.__addAtomTypeDataXML(root)
516                   
517        summary = ET.SubElement(root, "summary")
518        summary.text = escapeSpecialCharacters(self.Summary)
519                   
520        # add link to content, if required - NB, can only have one content element in atom
521        # - and this is mandatory
522        content = ET.SubElement(root, "content")
523        if self.contentFile:
524            content.attrib["type"] = "application/xml"
525            content.attrib["src"] = self.contentFile
526        else:
527            content.text = escapeSpecialCharacters(self.Content)
528       
529        # if there's a published date already defined, assume we're doing an update now
530        # NB, update element is mandatory
531        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
532        if not self.publishedDate:
533            self.publishedDate = currentDate
534
535        updated = ET.SubElement(root, "updated")
536        if not self.updatedDate:
537            self.updatedDate = currentDate
538        updated.text = self.updatedDate
539
540        published = ET.SubElement(root, "published")
541        published.text = self.publishedDate
542
543        # add temporal range data, if available
544        temporalRange = ET.SubElement(root, "moles:temporalRange")
545        if self.t1:
546            temporalRange.text = self.t1
547            if self.t2:
548                temporalRange.text += "/" + self.t2
549
550        # add spatial range data, if available
551        self._addSpatialData(root)
552
553        tree = ET.ElementTree(root)
554        logging.info("XML version of Atom created")
555        return tree
556
557
558    def __getSummary(self):
559        logging.debug("Getting summary data")
560        summaryString = ""
561        for summary_line in self.summary:
562            summaryString += summary_line + "\n"
563
564        return summaryString
565
566    def __setSummary(self, summary):
567        logging.debug("Adding summary data")
568        self.summary = []
569        for summary_line in summary.split('\n'):
570            self.summary.append(summary_line)
571           
572    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
573
574
575    def __getContent(self):
576        logging.debug("Getting content data")
577        contentString = ""
578        # NB, there must be content specified in an atom
579        if not self.content:
580            return "Metadata document"
581       
582        for content_line in self.content:
583            contentString += content_line + "\n"
584
585        return contentString
586
587    def __setContent(self, content):
588        logging.debug("Adding content data")
589        self.content = []
590        for content_line in content.split('\n'):
591            self.content.append(content_line)
592           
593    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
594
595           
596    def fromString(self, xmlString):
597        '''
598        Initialise Atom object using an xmlString
599        @param xmlString: representation of atom as an XML string
600        '''
601        logging.info("Ingesting data from XML string")
602       
603        # firstly, remove any namespaces used - to avoid problems with elementtree
604        logging.debug("Stripping moles namespace from string to allow easy handling with elementtree")
605        xmlString = xmlString.replace('moles:', '')
606        xmlString = xmlString.replace('georss:', '')
607        xmlString = xmlString.replace('gml:', '')
608        xmlString = xmlString.replace('xmlns="http://www.w3.org/2005/Atom"', '')
609
610        # now create elementtree with the XML string
611        logging.debug("Create elementtree instance with XML string")
612        tree = ET.fromstring(xmlString)
613       
614        title = tree.findtext('title')
615        if title:
616            logging.debug("Adding title data")
617            self.title = title
618
619        summary = tree.findtext('summary')
620        if summary:
621            self.Summary = summary
622
623        authorElement = tree.find('author')
624        logging.debug("Adding author data")
625        author = Person()
626        author.fromETElement(authorElement)
627        self.author = author
628
629        contributorElements = tree.findall('contributor')
630        for contributorElement in contributorElements:
631            logging.debug("Adding contributor data")
632            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
633            contributor.fromETElement(contributorElement)
634            self.contributors.append(contributor)
635
636        molesElement = tree.find('entity')
637        if molesElement:
638            self.ME.fromET(molesElement)
639               
640        self.atomID = tree.findtext('id')
641
642        self._parseCategoryData(tree.findall('category'))
643
644        self._parseLinksData(tree.findall('link'))
645           
646        contentTag = tree.find('content')
647        if contentTag != None:
648            logging.debug("Found content tag - checking for CSML/CDML file data")
649            file = contentTag.attrib.get('src')
650            if file:
651                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
652                if file.upper().find('CSML') > -1:
653                    logging.debug("Adding CSML file data")
654                    self.csmlFile = file
655                elif file.upper().find('CDML') > -1:
656                    logging.debug("Adding CDML file data")
657                    self.cdmlFile = file
658                self.contentFile = file
659            else:
660                logging.debug("No file data - adding contents of element instead")
661                self.Content = contentTag.text
662       
663        range = tree.findtext('temporalRange')
664        if range:
665            logging.debug("Adding temporal range data")
666            timeData = range.split('/')
667            self.t1 = timeData[0]
668            if len(timeData) > 1:
669                self.t2 = timeData[1]
670       
671        # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
672        minBBox = tree.findall('.//lowerCorner')
673        if minBBox:
674            logging.debug("Adding min spatial range data")
675            minBBox = minBBox[0]
676            spatialData = minBBox.text.split()
677            self.minX = spatialData[0]
678            if len(spatialData) > 1:
679                self.minY = spatialData[1]
680       
681        maxBBox = tree.findall('.//upperCorner')
682        if maxBBox:
683            maxBBox = maxBBox[0]
684            logging.debug("Adding max spatial range data")
685            spatialData = maxBBox.text.split()
686            self.maxX = spatialData[0]
687            if len(spatialData) > 1:
688                self.maxY = spatialData[1]
689               
690        publishedDate = tree.findtext('published')
691        if publishedDate:
692            logging.debug("Adding published date")
693            self.publishedDate = publishedDate
694               
695        updatedDate = tree.findtext('updated')
696        if updatedDate:
697            logging.debug("Adding updated date")
698            self.updatedDate = updatedDate
699           
700        logging.info("Completed data ingest")
701   
702   
703    def _parseCategoryData(self, categories):
704        logging.debug("Adding category/parameters data")
705        for category in categories:
706            cat = Category()
707            cat.fromETElement(category)
708           
709            if cat.term == self.ATOM_TYPE:
710                logging.debug("Found atom type data")
711                self.atomTypeID = cat.label
712                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
713                continue
714            elif cat.term == self.ATOM_SUBTYPE:
715                logging.debug("Found atom subtype data")
716                self.subtypeID = cat.label
717                self.subtype = cat.scheme
718                continue
719
720            self.parameters.append(cat)
721   
722
723    def setDatasetID(self, datasetID):
724        '''
725        Set the dataset ID for the atom - and generate an appropriate atom name using this
726        @param datasetID: ID to set for the atom
727        '''
728        self.datasetID = datasetID
729        self._generateAtomName(datasetID) 
730        self.atomID = self.createAtomID(datasetID)
731
732
733    def createAtomID(self, datasetID):
734        '''
735        Create a unique ID, conforming to atom standards, for atom
736        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
737        @param datasetID: ID of atom's dataset
738        @return: unique ID
739        '''
740        logging.info("Creating unique ID for atom")
741        if not self.atomBrowseURL:
742            self._generateAtomName(datasetID)
743        urlBit = self.atomBrowseURL.split('://')[1]
744        urlBit = urlBit.replace('#', '')
745        urlBits = urlBit.split('/')
746        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
747       
748        id = "tag:" + urlBits[0] + "," + dateBit + ":/" + "/".join(urlBits[1:])
749        logging.info("- unique ID created for atom")
750        logging.debug(" - '%s'" %id)
751        return id
752       
753       
754    def _generateAtomName(self, datasetID):
755        '''
756        Generate a consistent name for the atom - with full eXist doc path
757        @param datasetID: ID of atom's dataset
758        '''
759        self.atomName = datasetID + ".atom"
760        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
761        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
762
763
764    def _parseLinksData(self, links):
765        '''
766        Extract links and atom data from array of link elements in the XML representation of the atom
767        @param links: an array of <link> elements
768        '''
769        # firstly, get all data to start with, so we can properly process it afterwards
770        linkData = {}
771        logging.debug("Getting link data")
772        for linkTag in links:
773            link = Link()
774            link.fromETElement(linkTag)
775
776            if not linkData.has_key(link.rel):
777                linkData[link.rel] = []
778               
779            if link.title == VTD.TERM_DATA[VTD.LOGO_TERM].title:
780                self.logos.append(link)
781            else:
782                linkData[link.rel].append(link)
783
784        # there should be one self referencing link - which will provide info on the atom itself
785        if not linkData.has_key('self'):
786            errorMessage = "Atom does not have self referencing link - " + \
787                "cannot ascertain datasetID without this - please fix"
788            logging.error(errorMessage)
789            raise ValueError(errorMessage)
790       
791        # this is the link describing the atom itself
792        self.atomBrowseURL = linkData['self'][0].href
793       
794        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
795        self.atomName = self.datasetID + ".atom"
796        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
797       
798        # now remove this value and the associated moles doc link
799        del linkData['self']
800        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
801        if linkData.has_key('related'):
802            relatedLinks = []
803            for link in linkData['related']:
804                if link.href != molesDoc:
805                    relatedLinks.append(link)
806           
807            linkData['related'] = relatedLinks
808               
809        # now add the remaining links to the atom
810        for key in linkData:
811            for link in linkData[key]:
812                logging.debug("Adding link data")
813                self.relatedLinks.append(link)
814       
815
816    def _addSpatialData(self, element):
817        '''
818        Add spatial coverage element to an input element
819        @param element: element to add coverage data to
820        '''
821        logging.info("Adding spatial data to Atom")
822        bbox = ET.SubElement(element, "georss:where")
823        if not self.minX:
824            logging.info("No spatial data specified")
825            return
826       
827        envelope = ET.SubElement(bbox, "gml:Envelope")
828        lc = ET.SubElement(envelope, "gml:lowerCorner")
829        lc.text = self.minX + " " + self.minY
830        uc = ET.SubElement(envelope, "gml:upperCorner")
831        uc.text = self.maxX + " " + self.maxY
832
833       
834    def setAttribute(self, attributeName, attributeValue):
835        '''
836        Set the value of an atom attribute - and do some basic tidying up of the string content
837        - to escape any XML unfriendly characters
838        @param attributeName: name of the attribute whose value to set
839        @param attributeValue: value to set the attribute to 
840        '''
841        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
842        origValue = attributeValue
843       
844        # escape any special characters if a value has been specified
845        # NB, need to cope with both single values and arrays
846        if attributeValue:
847            if type(attributeValue) is list:
848                newVals = []
849                for val in attributeValue:
850                    newVals.append(objectify(escapeSpecialCharacters(val)), attributeName)
851                attributeValue = newVals
852                   
853            else:
854                attributeValue = objectify(escapeSpecialCharacters(attributeValue), attributeName)
855
856        # handle the special case of authors; only one author is allowed per atom
857        # - the others should be treated as contributors
858        if attributeName == "authors":
859            setattr(self, "author", attributeValue[0])
860            if len(attributeValue) > 1:
861                setattr(self, "contributors", attributeValue[1:])
862        elif attributeName == "atomAuthors":
863            self.ME.responsibleParties.append(attributeValue)
864        else:
865            setattr(self, attributeName, attributeValue)
866
867
868    def objectify(self, objectVals, attributeName):
869        '''
870        Some inputs are specified as strings but need to be converted into
871        objects - do this here
872        @param objectVals: a '|' delimited string of values
873        @param attributeName: name of attribute the values belong to
874        '''
875        obj = None
876        if type(objectVals) != str:
877            return objectVals
878       
879        if attributeName == "relatedLinks" or attributeName == "logo":
880            obj = Link()
881        elif attributeName == "atomAuthors" or attributeName == "authors":
882            obj = Person()
883
884        if obj:
885            obj.fromString(objectVals)
886            return obj
887       
888        return objectVals
889
890
891    def toPrettyXML(self):
892        '''
893        Returns nicely formatted XML as string
894        '''
895        atomXML = self.toXML()
896
897        # create the string
898        logging.debug("Converting the elementtree object into a string")
899        prettyXML = et2text(atomXML.getroot())
900
901        # add XML version tag
902        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
903        logging.info("Created formatted version of XML object")
904        return prettyXML
Note: See TracBrowser for help on using the repository browser.