source: ndgCommon/trunk/ndg/common/src/models/Atom.py @ 5212

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/models/Atom.py@5212
Revision 5212, 54.2 KB checked in by cbyrom, 12 years ago (diff)

Adjust the Atom and MolesEntity? data models to store their data
internally, 'as is' - and to escape/unescape XML unfriendly characters
on data export/import. Adjust templates and inputs processing to
cope with this change - which allows the input of most characters
without problem (NB, old method caused unfriendly errors when illegal
characters were detected).

Line 
1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6import sys, logging, re, datetime
7from xml.sax.saxutils import escape, unescape
8from xml.etree import cElementTree as ET
9import csml.parser as CsmlParser
10import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
11from ndg.common.src.lib.ETxmlView import et2text
12import ndg.common.src.lib.utilities as utilities
13from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
14from ndg.common.src.models import MolesEntity as ME
15from ndg.common.src.models import Deployment as Deployment
16from ndg.common.src.models import AtomState
17from ndg.common.src.models.ndgObject import ndgObject
18
19class AtomError(Exception):
20    """
21    Exception handling for Atom class.
22    """
23    def __init__(self, msg):
24        logging.error(msg)
25        Exception.__init__(self, msg)
26
27
28class Person(object):
29    '''
30    Class representing atom author type data - with name, uri and role attributes
31    @keyword personType: Type of person to create - specified using the Person.._Type
32    values.  Default is AUTHOR_TYPE.
33    @keyword namespace: a two value array of format, ['short_namespace_name', 'full_namespace_name']
34    - e.g. ['moles', 'http://ndg.nerc.ac.uk/schema/moles2beta']
35    '''
36    AUTHOR_TYPE = 0
37    CONTRIBUTOR_TYPE = 1
38    RESPONSIBLE_PARTY_TYPE = 2
39    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
40   
41    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
42        self.type = personType
43        if namespace:
44            self.ns_shortname = namespace[0]
45            self.ns_fullname = namespace[1]
46        else:
47            self.ns_shortname = ""
48            self.ns_fullname = ndgObject.ATOM_NS
49           
50        self.name = ""
51        self.uri = ""
52        self.role = ""
53       
54        # NB, the atom format specifies slightly different data contents
55        self.uriTagName = "email"
56        # NB, responsible party data is always stored in the moles section
57        if self.type == self.RESPONSIBLE_PARTY_TYPE:
58            self.ns_shortname = 'moles'
59            self.ns_fullname = ndgObject.MOLES_NS
60            self.uriTagName = "uri"
61
62    def __str__(self):
63        if self.name or self.uri or self.role:
64            return self.name + " | " + self.uri + " | " + self.role
65        return ""
66
67
68    def hasValue(self):
69        if self.name or self.uri or self.role:
70            return True
71        return False
72   
73    def fromString(self, personString):
74        (self.name, self.uri, self.role) = utilities.getTripleData(personString)
75       
76    def fromETElement(self, personTag):
77        self.name = unescape(personTag.findtext('{%s}name' %self.ns_fullname) or "")
78        self.role = unescape(personTag.findtext('{%s}role' %self.ns_fullname) or "")
79        self.uri = unescape(personTag.findtext('{%s}%s' %(self.ns_fullname, self.uriTagName)) or "")
80        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
81                      %(self.name, self.role, self.uriTagName, self.uri))
82
83    def toXML(self):
84        prefix = ""
85        if self.ns_shortname:
86            prefix = self.ns_shortname + ':'
87
88        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
89
90        if self.name:
91            name = ET.SubElement(author, prefix + "name")
92            name.text = escape(self.name)
93       
94        if self.uri:
95            uri = ET.SubElement(author, prefix + self.uriTagName)
96            uri.text = escape(self.uri)
97       
98        if self.role:
99            role = ET.SubElement(author, prefix + "role")
100            role.text = escape(self.role)
101
102        return author
103   
104    def __cmp__(self, person1):
105        '''
106        Override comparison to allow proper object comparison when checking
107        if Person objects are in an array already - i.e. if person in personArray...
108        '''
109        if not person1:
110            return -1
111       
112        if self is person1:
113            return 0
114        elif self.uri == person1.uri and self.name == person1.name and \
115                self.role == person1.role and self.type == person1.type:
116            return 0
117        return 1
118
119
120class Link(object):
121    '''
122    Class representing an atom link - with href, title and rel attributes
123    '''
124
125    def __init__(self):
126        self.href = ""
127        self.title = ""
128        self.rel = ""
129
130    def fromString(self, linkString):
131        (self.href, self.title, self.rel) = utilities.getTripleData(linkString, doEscape=False)
132        # ensure no funny characters are included on data ingest
133        self.title = utilities.escapeSpecialCharacters(self.title)
134       
135    def fromETElement(self, linkTag):
136        # remove any url quoting when reading in from XML - to avoid need for
137        # correction on display
138        self.href = unescape(linkTag.attrib.get('href') or "")
139        self.rel = unescape(linkTag.attrib.get('rel') or "")
140        self.title = unescape(linkTag.attrib.get('title') or "")
141
142    def toXML(self):
143        # ensure the xml element doesn't contain things like '&' - which will
144        # cause problems when running xqueries
145        link = ET.Element("link")
146        link.attrib["href"] = escape(self.href)
147        link.attrib["title"] = escape(self.title)
148        link.attrib["rel"] = escape(self.rel)
149        return link
150
151    def hasValue(self):
152        # NB, just a rel on its own is meaningless - so ignore
153        if self.href or self.title:
154            return True
155        return False
156   
157    def __str__(self):
158        if self.href or self.title or self.rel:
159            return self.href + " | " + self.title + " | " + self.rel
160        return ""
161   
162    def isChildAtom(self):
163        '''
164        Determines whether the link refers to another atom - e.g. a link to
165        a data granule
166        @return True, if so; False otherwise
167        '''
168        if self.rel.endswith(VTD.GRANULE_TERM) or \
169            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
170            self.rel.endswith(VTD.ACTIVITY_TERM) or \
171            self.rel.endswith(VTD.DPT_TERM) or \
172            self.rel.endswith(VTD.OBS_TERM):
173            return True
174       
175        return False
176   
177    def __cmp__(self, link1):
178        '''
179        Override comparison to allow proper object comparison when checking
180        if Link objects are in an array already - i.e. if link in linkArray...
181        '''
182        if not link1:
183            return -1
184       
185        if self is link1:
186            return 0
187        elif self.href == link1.href and self.title == link1.title and \
188                self.rel == link1.rel:
189            return 0
190        return 1
191
192
193class Category(object):
194    '''
195    Class representing an atom category - with term, scheme and label attributes
196    '''
197    def __init__(self):
198        self.term = ""
199        self.scheme = ""
200        self.label = ""
201
202    def fromString(self, linkString, escapeSpecialCharacters=True):
203        '''
204        Create Category from triple string of format, 'label | scheme | term'
205        @param linkString: triple string to create category with
206        @keyword escapeSpecialCharacters: if set to True, special characters in
207        triple string are escaped (default)
208        '''
209        (self.label, self.scheme, self.term) = utilities.getTripleData(linkString, \
210            doEscape=escapeSpecialCharacters)
211
212        # also replace any double quotes with single apostrophes - since this data
213        # is stored as an attribute - i.e. already surrounded by double quotes
214        self.label = self.label.replace("\"", "'")
215        self.scheme = self.scheme.replace("\"", "'")
216        self.term = self.term.replace("\"", "'")
217       
218       
219    def fromETElement(self, linkTag):
220        self.term = unescape(linkTag.attrib.get('term') or "")
221        self.label = unescape(linkTag.attrib.get('label') or "")
222        self.scheme = unescape(linkTag.attrib.get('scheme') or "")
223
224    def toXML(self):
225        link = ET.Element("category")
226        link.attrib["term"] = escape(self.term)
227        link.attrib["scheme"] = escape(self.scheme)
228        link.attrib["label"] = escape(self.label)
229        return link
230   
231    def hasValue(self):
232        if self.scheme or self.label or self.term:
233            return True
234        return False
235
236
237class Atom(object):
238
239    # labels for use with the atom categories
240    ATOM_TYPE = "ATOM_TYPE"
241    ATOM_SUBTYPE = "ATOM_SUBTYPE"
242
243    # labels for use with the templates to set/extract specific inputs
244    ONLINE_REF_LABEL = "online_ref"
245    PARAMETER_LABEL = "parameter"
246    ATOM_REF_LABEL = "atom_ref"
247    DELIMITER = "---"
248    REMOVE_LABEL = "remove"
249   
250    # format to use for t1-t2 date range
251    YEAR_FORMAT = '%Y-%m-%d'
252
253    # subtype name, when not defined
254    SUB_TYPE_NOT_DEFINED_NAME = "Not currently defined"
255
256    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
257                 xmlString = None, state = AtomState.WORKING_STATE, **inputs):
258        '''
259        Constructor - initialise the atom variables
260        @keyword atomType: type of atom to set up
261        @keyword vocabTermData: instance of VocabTermData object to use with atom
262        @keywork ndgObject: instance of ndgObject to use with atom
263        @keyword xmlString: XML representation of atom - will be parsed to populate
264        the atom data
265        @keyword state:  AtomState object representing the state of the atom
266        @param inputs: a dict with vals to set directly against the object fields
267        '''
268        logging.info("Initialising atom")
269        if atomType:
270            logging.info(" - of type '%s'" %atomType)
271        self.atomTypeID = atomType
272
273        # some data have further subtypes specified
274        self.subtypeID = None # this should be the termID
275        self.subtype = None # and this should be the fully formed vocab URL
276       
277        self.ndgObject = ndgObject
278
279        self.atomName = None
280        self.files = []
281        self.author = Person()
282        self.contributors = []
283        self.atomAuthors = []
284        self.parameters = []
285        self.spatialData = []
286        self.temporalData = []
287        self.relatedLinks = []
288        self.summary = []
289        self.content = []
290        # NB, this deployments data duplicates other atom data - and is only used for a
291        # convenient way to collect the info (by lookupAssociatedData()) for use in templates
292        self.deployments = []
293        # ditto for the following field
294        self.dataEntities = []
295           
296        self.csmlFile = None
297        self.cdmlFile = None
298        # general variable to use for setting the atom content - NB, if a csmlFile is specified
299        # (either directly or via a cdmlFile specification), this will be the content by default
300        # for this purpose
301        self.contentFile = None     
302        self.title = None
303        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
304        self.atomID = None
305   
306        # boundary box info - to replace spatial/temporalData?
307        self.minX = None
308        self.minY = None
309        self.maxX = None
310        self.maxY = None
311        self.t1 = None
312        self.t2 = None
313
314        self.ME = ME.MolesEntity(**inputs)
315       
316        # date when the atom was first ingested
317        self.publishedDate = None
318
319        # last update date
320        self.updatedDate = None
321
322        # assume atom in working state by default - this is used to define what collection
323        # in eXist the atom is stored in
324        self.state = state
325       
326        # additional, non standard atom data can be included in the molesExtra element
327        if vocabTermData:
328            self.VTD = vocabTermData
329        else:
330            self.VTD = VTD()
331       
332        if xmlString:
333            self.fromString(xmlString)
334
335        # retain old title, in case it has changed - NB, this will be done by applying
336        # the inputs dict - and might require other atoms to be updated
337        self.oldTitle = self.title
338           
339        # if inputs passed in as dict, add these now
340        if inputs:
341            logging.info("Adding info to atom from input dict")
342            logging.debug(inputs)
343           
344            # avoid the initial case being caught - i.e. when there is no title at all
345            if inputs.has_key('title'):
346                newTitle = inputs.get('title')
347                if not self.title:
348                    self.oldTitle = newTitle
349                   
350            self.__dict__.update(inputs)
351            self.ME.__dict__.update(inputs)
352           
353            # NB, this doesn't trigger the Summary Property, so do this
354            # explicitly, if need be
355            if inputs.has_key('Summary'):
356                self.Summary = inputs.get('Summary')
357            if inputs.has_key('Content'):
358                self.Content = inputs.get('Content')
359            if inputs.has_key('author'):
360                name = inputs.get('author')
361                author = Person()
362                author.fromString(name)
363                self.author = author
364
365        if self.atomTypeID:
366            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
367
368        self.deploymentsURL = ""
369        self.dataEntitiesURL = ""
370
371        logging.info("Atom initialised")
372
373
374    def addOnlineReferences(self, links):
375        '''
376        Add online reference data associated with the atom
377        - NB, care needs to be taken here since this data is stored in the atom
378        link elements and these are also used for the various atom associations
379        @param links: a Link or array of Links to add to the relatedLinks attribute
380        '''
381        logging.debug("Adding online references")
382        if not links:
383            return
384       
385        if type(links) is not list:
386            links = [links]
387       
388        # firstly clear out any online refs data from the existing related links
389        newLinks = []
390        for link in self.relatedLinks:
391            if link.isChildAtom():
392                newLinks.append(link)
393       
394        newLinks.extend(links)
395        self.relatedLinks = newLinks
396        logging.debug("Online references added")
397
398
399    def addUniqueRelatedLinks(self, links):
400        '''
401        Add links to relatedLinks array - if they are not already included
402        @param links: a Link or array of Links to add to the relatedLinks attribute
403        '''
404        self.addUniqueLinks(self.relatedLinks, links)
405       
406
407    def removeRelatedLinks(self, linksToDelete):
408        '''
409        Remove any links in the input list from the atom's related links list
410        @param linksToDelete: array of Link objects to remove from atom
411        '''
412        logging.debug("Removing related links from atom")
413        if not linksToDelete:
414            return
415       
416        if type(linksToDelete) is not list:
417            linksToDelete = [linksToDelete]
418       
419        updatedLinks = []
420        for link in self.relatedLinks:
421            if type(link) is not Link:
422                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
423                continue
424            if link in linksToDelete:
425                logging.debug("- found link to remove")
426            else:
427                updatedLinks.append(link)
428
429        self.relatedLinks = updatedLinks
430        logging.debug("Links removed")
431
432    def getPublicationStatePath(self):
433        '''
434        Determine the correct publication state collection for the atom
435        @return collectionPath: collection path for the publication state of the atom
436        '''
437        logging.debug("Getting collection path for atom publication state")
438        collectionPath = dc.ATOM_COLLECTION_PATH + self.state.collectionPath
439        logging.debug("Returning publication state collection, '%s'" %collectionPath)
440        return collectionPath
441       
442
443    def getDefaultEntityCollectionPath(self):
444        '''
445        Determine the correct collection for the entity type of the atom
446        @return entityPath: collection path for the data type of the atom
447        '''
448        logging.debug("Getting collection path for atom entity type")
449        collectionPath = self.getPublicationStatePath()
450       
451        if self.atomTypeID == VTD.DE_TERM:
452            collectionPath += dc.DE_COLLECTION_PATH
453        elif self.atomTypeID == VTD.GRANULE_TERM:
454            collectionPath += dc.GRANULE_COLLECTION_PATH
455        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
456            self.subtypeID == VTD.DEPLOYMENT_TERM:
457            collectionPath += dc.DEPLOYMENTS_COLLECTION_PATH
458        else:
459            collectionPath += dc.DEPLOYMENT_COLLECTION_PATH
460       
461        logging.debug("Returning entity collection, '%s'" %collectionPath)
462        return collectionPath
463       
464
465    def getDefaultCollectionPath(self):
466        '''
467        Determine the correct collection to use for the atom in eXist
468        '''
469        logging.debug("Getting default collection path for atom")
470        collectionPath = self.getDefaultEntityCollectionPath()
471        if not self.ME.providerID:
472            raise AtomError("Error: cannot determine atom collection path because " + \
473                            "the provider ID is not defined")
474           
475        collectionPath += self.ME.providerID + "/"
476        logging.debug("Returning collection, '%s'" %collectionPath)
477        return collectionPath
478
479
480    def __addAtomTypeDataXML(self, root):
481        '''
482        Add the atom type, and subtype data, if available, to atom categories
483        - and lookup and add the appropriate vocab term data
484        '''
485        if self.atomTypeID:
486            logging.info("Adding atom type info to XML output")
487            category = Category()
488            category.label = self.atomTypeID
489            # look up the appropriate vocab term data
490            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
491            category.term = self.ATOM_TYPE
492            root.append(category.toXML())
493
494        if self.subtypeID:
495            logging.info("Adding atom subtype info to XML output")
496            # NB subtypes not all defined, so leave this out for the moment
497            category.label = self.subtypeID
498            # look up the appropriate vocab term data
499            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
500            category.term = self.ATOM_SUBTYPE
501            root.append(category.toXML())
502
503
504    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
505        '''
506        Add data to include in the moles entity element
507        '''
508        logging.debug('Adding moles entity information')
509        self.ME.abbreviation = abbreviation
510        self.ME.providerID = provider_id
511        self.ME.createdDate = utilities.getISO8601Date(object_creation_time)
512        logging.debug('Moles entity information added')
513
514
515    def addAuthors(self, authors):
516        '''
517        Add author data appropriately to the atom
518        NB, these will overwrite any existing authors of the same type
519        @param authors: list of Person objects with the author data
520        '''
521        logging.debug('Adding authors data to Atom')
522        isFirstAuthor = {}
523        authorArray = None
524        for author in authors:
525            # NB, we're only allowed one atom author
526            if author.type == Person.AUTHOR_TYPE:
527                self.author = author
528                   
529                if isFirstAuthor.has_key(author.type):
530                    raise AtomError("Error: an atom can only have one author specified")
531                isFirstAuthor[author.type] = 1
532                continue
533            elif author.type == Person.CONTRIBUTOR_TYPE:
534                authorArray = self.contributors
535            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
536                authorArray = self.ME.responsibleParties
537               
538            # check if this is the first addition - if so, clear out the
539            # array in advance
540            if not isFirstAuthor.has_key(author.type):
541                logging.debug("Clearing out author array")
542                # NB, need to be careful to clear the array, not create a ref
543                # to a new array
544                del authorArray[:]
545                isFirstAuthor[author.type] = 1
546
547            if author.hasValue() and author not in authorArray:
548                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
549                              %(author.type, author.name, author.uri, author.role))
550                authorArray.append(author)
551
552        logging.debug('Finished adding authors data')
553
554
555    def _isNewParameter(self, param):
556        '''
557        Check if a parameter is already specified in the atom, return False if
558        so, otherwise return True
559        '''
560        for p in self.parameters:
561            if p.term == param.term and \
562                p.scheme == param.scheme and \
563                p.label == param.label:
564                return False
565        return True
566
567
568    def addRelatedLinks(self, linkVals):
569        '''
570        Add related links in string format - converting to Link objects
571        NB, only add the link if it is unique
572       
573        @param linkVals: string of format, 'uri | title | vocabServerURL'
574        '''
575        link = self.objectify(linkVals, 'relatedLinks')
576        if link not in self.relatedLinks:
577            self.relatedLinks.append(link)
578
579
580    def addParameters(self, params):
581        '''
582        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
583        @params param: parameter, as string array, to add to atom parameters collection
584        '''
585        # avoid strings being parsed character by character
586        if type(params) is str:
587            params = [params]
588           
589        for param in params:
590            # firstly tidy parameter
591            param = utilities.tidyUpParameters(param)
592            category = Category()
593            # NB, data already tidied up here, so set keyword to avoid this happening again
594            category.fromString(param, escapeSpecialCharacters=False)
595
596            # now check for uniqueness
597            if self._isNewParameter(category):
598                logging.debug("Adding new parameter: %s" %param)
599                self.parameters.append(category)
600   
601   
602    def _linksToXML(self, root):
603        '''
604        Add required links to the input element
605        @param root: element to add links to - NB, should be the root element of the atom
606        '''
607        selfLink = ET.SubElement(root, "link")
608        selfLink.attrib["href"] = self.atomBrowseURL
609        selfLink.attrib["rel"] = "self"
610       
611        for relatedLink in self.relatedLinks:
612            if relatedLink.hasValue():
613                root.append(relatedLink.toXML())
614   
615    def toXML(self):
616        '''
617        Convert the atom into XML representation and return this
618        @return: xml version of atom
619        '''
620        logging.info("Creating formatted XML version of Atom")
621        root = ET.Element("entry")
622        root.attrib["xmlns"] = ndgObject.ATOM_NS
623        root.attrib["xmlns:moles"] = ndgObject.MOLES_NS
624        root.attrib["xmlns:georss"] = ndgObject.GEOSS_NS
625        root.attrib["xmlns:gml"] = ndgObject.GML_NS
626        id = ET.SubElement(root, "id")
627        id.text = self.atomID
628        title = ET.SubElement(root, "title")
629        title.text = escape(self.title)
630        self._linksToXML(root)
631
632        if self.author and self.author.hasValue():
633            root.append(self.author.toXML())
634           
635        for contributor in self.contributors:
636            root.append(contributor.toXML())
637
638        # add parameters data
639        for param in self.parameters:
640            if param.hasValue():
641                root.append(param.toXML())
642
643        # add the type and subtype data
644        self.__addAtomTypeDataXML(root)
645                   
646        summary = ET.SubElement(root, "summary")
647        summary.text = self.Summary
648                   
649        # add link to content, if required - NB, can only have one content element in atom
650        # - and this is mandatory
651        content = ET.SubElement(root, "content")
652        contentFile = self.contentFile or self.csmlFile or self.cdmlFile
653        if contentFile:
654            content.attrib["type"] = "application/xml"
655            content.attrib["src"] = contentFile
656        else:
657            content.attrib["type"] = "xhtml"
658            div = ET.SubElement(content, 'div')
659            div.attrib["xmlns:xhtml"] = ndgObject.XHTML_NS
660            div.text = self.Content
661       
662        # if there's a published date already defined, assume we're doing an update now
663        # NB, update element is mandatory
664        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
665        if not self.publishedDate:
666            self.publishedDate = currentDate
667
668        updated = ET.SubElement(root, "updated")
669        if not self.updatedDate:
670            self.updatedDate = currentDate
671        updated.text = self.updatedDate
672
673        published = ET.SubElement(root, "published")
674        published.text = self.publishedDate
675
676        # add the moles entity section, if it is required
677        if self.ME:
678            root.append(self.ME.toXML())
679
680        # add temporal range data, if available
681        temporalRange = ET.SubElement(root, "moles:temporalRange")
682        if self.t1:
683            temporalRange.text = escape(self.t1)
684            if self.t2:
685                temporalRange.text += "/" + escape(self.t2)
686
687        # add spatial range data, if available
688        self._addSpatialData(root)
689
690        tree = ET.ElementTree(root)
691        logging.info("XML version of Atom created")
692        return tree
693
694
695    def __getSummary(self):
696        logging.debug("Getting summary data")
697        summaryString = ""
698        for summary_line in self.summary:
699            summaryString += escape(summary_line) + "\n"
700
701        return summaryString
702
703
704    def __setSummary(self, summary):
705        logging.debug("Adding summary data")
706        self.summary = []
707        for summary_line in summary.split('\n'):
708            self.summary.append(unescape(summary_line))#utilities.escapeSpecialCharacters(summary_line))
709           
710    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
711
712
713    def __getContent(self):
714        logging.debug("Getting content data")
715        contentString = ""
716        # NB, there must be content specified in an atom
717        if not self.content:
718            return "Metadata document"
719       
720        for content_line in self.content:
721            contentString += content_line + "\n"
722
723        return contentString
724
725    def __setContent(self, content):
726        logging.debug("Adding content data")
727        self.content = []
728        if not content:
729            return
730       
731        for content_line in content.split('\n'):
732            self.content.append(content_line)
733           
734    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
735
736           
737    def fromString(self, xmlString):
738        '''
739        Initialise Atom object using an xmlString
740        @param xmlString: representation of atom as an XML string
741        '''
742        logging.info("Ingesting data from XML string")
743        logging.debug("Create elementtree instance with XML string")
744        tree = ET.fromstring(xmlString)
745        title = tree.findtext('{%s}title' %ndgObject.ATOM_NS)
746        if title:
747            logging.debug("Adding title data")
748            self.title = unescape(title)
749
750        summary = tree.findtext('{%s}summary' %ndgObject.ATOM_NS)
751        if summary:
752            self.Summary = summary#.decode('unicode_escape')
753
754        authorElement = tree.find('{%s}author' %ndgObject.ATOM_NS)
755        if authorElement:
756            logging.debug("Adding author data")
757            author = Person()
758            author.fromETElement(authorElement)
759            self.author = author
760
761        contributorElements = tree.findall('{%s}contributor' %ndgObject.ATOM_NS)
762        for contributorElement in contributorElements:
763            logging.debug("Adding contributor data")
764            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
765            contributor.fromETElement(contributorElement)
766            self.contributors.append(contributor)
767
768        molesElement = tree.find('{%s}entity' %ndgObject.MOLES_NS)
769        if molesElement:
770            self.ME.fromET(molesElement)
771               
772        atomID = tree.findtext('{%s}id' %ndgObject.ATOM_NS)
773        self.__parseAtomID(atomID)
774       
775        self._parseCategoryData(tree.findall('{%s}category' %ndgObject.ATOM_NS))
776
777        self._parseLinksData(tree.findall('{%s}link' %ndgObject.ATOM_NS))
778           
779        contentTag = tree.find('{%s}content' %ndgObject.ATOM_NS)
780        if contentTag != None:
781            logging.debug("Found content tag - checking for CSML/CDML file data")
782            file = contentTag.attrib.get('src')
783            if file:
784                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
785                if file.upper().find('CSML') > -1:
786                    logging.debug("Adding CSML file data")
787                    self.csmlFile = file
788                elif file.upper().find('CDML') > -1:
789                    logging.debug("Adding CDML file data")
790                    self.cdmlFile = file
791                self.contentFile = file
792            else:
793                logging.debug("No file data - adding contents of element instead")
794                divEl = contentTag.find('{%s}div'%ndgObject.ATOM_NS)#XHTML_NS)
795                div = divEl.text
796               
797                # NB, this can contain xhtml, so check for children
798                for child in divEl.getchildren():
799                    div += ET.tostring(child)
800                   
801                self.Content = div
802       
803        range = tree.findtext('{%s}temporalRange' %ndgObject.MOLES_NS)
804        if range:
805            logging.debug("Adding temporal range data")
806            timeData = range.split('/')
807            self.t1 = unescape(timeData[0])
808            if len(timeData) > 1:
809                self.t2 = unescape(timeData[1])
810       
811        where = tree.find('{%s}where' %ndgObject.GEOSS_NS)
812        if where:
813            # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
814            minBBox = where.findall('.//{%s}lowerCorner' %ndgObject.GML_NS)
815            if minBBox:
816                logging.debug("Adding min spatial range data")
817                minBBox = minBBox[0]
818                spatialData = minBBox.text.split()
819                self.minX = unescape(spatialData[0])
820                if len(spatialData) > 1:
821                    self.minY = unescape(spatialData[1])
822           
823            maxBBox = where.findall('.//{%s}upperCorner' %ndgObject.GML_NS)
824            if maxBBox:
825                maxBBox = maxBBox[0]
826                logging.debug("Adding max spatial range data")
827                spatialData = maxBBox.text.split()
828                self.maxX = unescape(spatialData[0])
829                if len(spatialData) > 1:
830                    self.maxY = unescape(spatialData[1])
831               
832        publishedDate = tree.findtext('{%s}published' %ndgObject.ATOM_NS)
833        if publishedDate:
834            logging.debug("Adding published date")
835            self.publishedDate = publishedDate
836               
837        updatedDate = tree.findtext('{%s}updated' %ndgObject.ATOM_NS)
838        if updatedDate:
839            logging.debug("Adding updated date")
840            self.updatedDate = updatedDate
841           
842        logging.info("Completed data ingest")
843   
844   
845    def _parseCategoryData(self, categories):
846        logging.debug("Adding category/parameters data")
847        for category in categories:
848            cat = Category()
849            cat.fromETElement(category)
850           
851            if cat.term == self.ATOM_TYPE:
852                logging.debug("Found atom type data")
853                self.atomTypeID = cat.label
854                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
855                continue
856            elif cat.term == self.ATOM_SUBTYPE:
857                logging.debug("Found atom subtype data")
858                self.subtypeID = cat.label
859                self.subtype = cat.scheme
860                continue
861
862            self.parameters.append(cat)
863
864
865    def __parseAtomID(self, atomID):
866        '''
867        Given an atom ID, extract the useful bits of info and set these on
868        the relevant atom attributes
869        @param atomID: an atom ID in the 'tag' format
870        '''
871        logging.debug("Extracting atom info from ID, '%s'" %atomID)
872        self.atomID = atomID
873        self.datasetID = atomID.split("__ATOM__")[-1]
874        self._generateAtomName(self.datasetID)
875        logging.debug("- all info extracted")
876   
877
878    def setDatasetID(self, datasetID):
879        '''
880        Set the dataset ID for the atom - and generate an appropriate atom name using this
881        @param datasetID: ID to set for the atom
882        '''
883        self.datasetID = datasetID
884        self._generateAtomName(datasetID) 
885        self.atomID = self.createAtomID(datasetID)
886
887
888    def createAtomID(self, datasetID):
889        '''
890        Create a unique ID, conforming to atom standards, for atom
891        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
892        @param datasetID: ID of atom's dataset
893        @return: unique ID
894        '''
895        logging.info("Creating unique ID for atom")
896        if not self.atomBrowseURL:
897            self._generateAtomName(datasetID)
898        urlBit = self.atomBrowseURL.split('://')[1]
899        urlBit = urlBit.replace('#', '')
900        urlBits = urlBit.split('/')
901        host = urlBits[0].split(':')[0] # avoid the port colon - as this breaks the ID format
902        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
903       
904        id = "tag:" + host + "," + dateBit + ":/" + "/".join(urlBits[1:])
905        logging.info("- unique ID created for atom")
906        logging.debug(" - '%s'" %id)
907        return id
908       
909       
910    def _generateAtomName(self, datasetID):
911        '''
912        Generate a consistent name for the atom - with full eXist doc path
913        @param datasetID: ID of atom's dataset
914        '''
915        self.atomName = datasetID + ".atom"
916        if not self.ME.providerID:
917            raise ValueError("Provider ID has not been specified for atom - please add this and retry")
918        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
919        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
920
921
922    def _parseLinksData(self, links):
923        '''
924        Extract links and atom data from array of link elements in the XML representation of the atom
925        @param links: an array of <link> elements
926        '''
927        # firstly, get all data to start with, so we can properly process it afterwards
928        linkData = {}
929        logging.debug("Getting link data")
930        for linkTag in links:
931            link = Link()
932            link.fromETElement(linkTag)
933
934            if not linkData.has_key(link.rel):
935                linkData[link.rel] = []
936           
937            linkData[link.rel].append(link)
938
939        # there should be one self referencing link - which will provide info on the atom itself
940        if not linkData.has_key('self'):
941            errorMessage = "Atom does not have self referencing link - " + \
942                "cannot ascertain datasetID without this - please fix"
943            logging.error(errorMessage)
944            raise ValueError(errorMessage)
945       
946        # this is the link describing the atom itself
947        self.atomBrowseURL = linkData['self'][0].href
948       
949        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
950        self.atomName = self.datasetID + ".atom"
951        # NB, only split on the stem, since the browse host may not be
952        # the same as that defined in VTD
953        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_STEM_URL)[-1]
954       
955        # now remove this value and the associated moles doc link
956        del linkData['self']
957        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
958        if linkData.has_key('related'):
959            relatedLinks = []
960            for link in linkData['related']:
961                if link.href != molesDoc:
962                    relatedLinks.append(link)
963           
964            linkData['related'] = relatedLinks
965               
966        # now add the remaining links to the atom
967        for key in linkData:
968            for link in linkData[key]:
969                logging.debug("Adding link data")
970                self.relatedLinks.append(link)
971       
972
973    def _addSpatialData(self, element):
974        '''
975        Add spatial coverage element to an input element
976        @param element: element to add coverage data to
977        '''
978        logging.info("Adding spatial data to Atom")
979        if not self.minX:
980            logging.info("No spatial data specified")
981            return
982        bbox = ET.SubElement(element, "georss:where")
983        envelope = ET.SubElement(bbox, "gml:Envelope")
984        lc = ET.SubElement(envelope, "gml:lowerCorner")
985        lc.text = escape(str(self.minX) + " " + str(self.minY))
986        uc = ET.SubElement(envelope, "gml:upperCorner")
987        uc.text = escape(str(self.maxX) + " " + str(self.maxY))
988
989       
990    def setAttribute(self, attributeName, attributeValue, escapeSpecials = True):
991        '''
992        Set the value of an atom attribute - and do some basic tidying up of the string content
993        - to escape any XML unfriendly characters
994        @param attributeName: name of the attribute whose value to set
995        @param attributeValue: value to set the attribute to 
996        @keyword escapeSpecials: if true, escape any special characters in the attribute
997        content.  Default = True
998        '''
999        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
1000        origValue = attributeValue
1001       
1002        # escape any special characters if a value has been specified
1003        # NB, need to cope with both single values and arrays
1004        isList = True
1005        if attributeValue:
1006            if not isinstance(attributeValue, list):
1007                attributeValue = [attributeValue]
1008                isList = False
1009               
1010            newVals = []
1011            for val in attributeValue:
1012                data = val
1013                if escapeSpecials:
1014                    utilities.escapeSpecialCharacters(val)
1015                newVals.append(self.objectify(data, attributeName))
1016            attributeValue = newVals
1017
1018        # handle the special case of authors; only one author is allowed per atom
1019        # - the others should be treated as contributors
1020        if attributeName == "authors":
1021            setattr(self, "author", attributeValue[0])
1022            if len(attributeValue) > 1:
1023                setattr(self, "contributors", attributeValue[1:])
1024        elif attributeName == "atomAuthors":
1025            self.ME.responsibleParties.extend(attributeValue)
1026        elif attributeName == "files":
1027            self.addUniqueRelatedLinks(attributeValue)
1028        else:
1029            if not isList:
1030                attributeValue = attributeValue[0]
1031            setattr(self, attributeName, attributeValue)
1032
1033
1034    def objectify(self, objectVals, attributeName):
1035        '''
1036        Some inputs are specified as strings but need to be converted into
1037        objects - do this here
1038        @param objectVals: a '|' delimited string of values
1039        @param attributeName: name of attribute the values belong to
1040        '''
1041        obj = None
1042        if type(objectVals) != str:
1043            return objectVals
1044       
1045        if attributeName == "relatedLinks":
1046            obj = Link()
1047        elif attributeName == "atomAuthors":
1048            obj = Person(personType = Person.RESPONSIBLE_PARTY_TYPE)
1049        elif attributeName == "authors":
1050            # NB, ensure there is only one author tag - extra authors are contributors
1051            authorType = Person.AUTHOR_TYPE
1052            if self.author and self.author.hasValue():
1053                authorType= Person.CONTRIBUTOR_TYPE
1054            obj = Person(personType = authorType)
1055        elif attributeName == 'files':
1056            obj = Link()
1057            objectVals = '%s|%s|%s' \
1058                %(self.VTD.getTermCurrentVocabURL(VTD.METADATA_SOURCE_TERM), objectVals, VTD.METADATA_SOURCE_TERM)
1059
1060        if obj:
1061            obj.fromString(objectVals)
1062            # NB, need to set it now, just in case we don't set it before coming back
1063            if attributeName == "authors" and (not self.author or not self.author.hasValue()):
1064                self.author = obj
1065            return obj
1066       
1067        return objectVals
1068
1069
1070    def toPrettyXML(self):
1071        '''
1072        Returns nicely formatted XML as string
1073        '''
1074        atomXML = self.toXML()
1075
1076        # create the string
1077        logging.debug("Converting the elementtree object into a string")
1078        prettyXML = et2text(atomXML.getroot())
1079
1080        # add XML version tag
1081        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
1082        logging.info("Created formatted version of XML object")
1083        return prettyXML
1084
1085
1086    def getLinksOfType(self, termID):
1087        '''
1088        Returns links in the atom related links attribute which match the specified
1089        term ID
1090        @param termID: the termID to look for in the related links - NB, this is
1091        matched to the end of the link.rel value
1092        @return links: array of Link objects with matching term type
1093        '''
1094        logging.debug("Getting atom links of type, '%s'" %termID)
1095        matchingLinks = []
1096        for link in self.relatedLinks:
1097            # firstly, handle special case where we only want the online ref type links
1098            # returned
1099            if termID == self.ONLINE_REF_LABEL:
1100                if not link.isChildAtom():
1101                    logging.debug("- found link with matching term type")
1102                    matchingLinks.append(link)
1103               
1104            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
1105                logging.debug("- found link with matching term type")
1106                matchingLinks.append(link)
1107               
1108        logging.debug("Returning matched links")
1109        return matchingLinks
1110       
1111       
1112    def getLogos(self):
1113        '''
1114        Return related links that are logos
1115        @return: array of Links containing the logos for the atom
1116        '''
1117        logos = []
1118        for link in self.relatedLinks:
1119            if link.rel.lower().endswith(VTD.LOGO_TERM.lower()):
1120                logos.append(link)
1121               
1122        return logos
1123   
1124   
1125    def isGranule(self):
1126        if self.atomTypeID == VTD.GRANULE_TERM:
1127            return True
1128        return False
1129   
1130   
1131    def isDE(self):
1132        if self.atomTypeID == VTD.DE_TERM:
1133            return True
1134        return False
1135   
1136    def isDeployment(self):
1137        if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM:
1138            return True
1139        return False
1140   
1141    def isDeployable(self):
1142        if (self.atomTypeID == VTD.ACTIVITY_TERM and self.subtypeID != VTD.DEPLOYMENT_TERM) or \
1143            self.atomTypeID == VTD.DPT_TERM or \
1144            self.atomTypeID == VTD.OBS_TERM:
1145            return True
1146        return False
1147   
1148    def isPublished(self):
1149        '''
1150        Check state of atom doc - if published or Published return True,
1151        otherwise return False
1152        '''
1153        return self.state.isPublishedState()
1154       
1155       
1156    def addCSMLData(self, csmlName, csmlContent, aggregateCoverage=False, useCSMLID=False):
1157        '''
1158        Parse CSML data and add extracted info to the atom
1159        @param csmlName: name of the csml file
1160        @param csmlContent: content of the csml file - NB, if this is set to None and the
1161        file, csmlName, is available locally, CsmlParser.Dataset will read in the file
1162        directly
1163        @keyword aggregateCoverage: if set to True, only coverage data that extends the
1164        atom coverage data will be added
1165        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
1166        this should only be True if creating a new atom - e.g. from a granulite
1167        @return csmlDoc: the CsmlParser.Dataset object with the csml data in
1168        '''
1169        logging.info("Creating CSML data model")
1170        self.csmlFile = csmlName
1171        self.contentFile = csmlName
1172        content = csmlContent or csmlName
1173   
1174        csmlDoc = CsmlParser.Dataset(file=content)
1175       
1176        logging.info("Extracting info from CSML file")
1177        logging.debug("Got dataset ID: %s" %csmlDoc.id)
1178        if useCSMLID:
1179            logging.debug(" - using this ID for the atom")
1180            self.setDatasetID(VTD.GRANULE_TERM + '_' + csmlDoc.id)
1181       
1182        title = csmlDoc.name.CONTENT
1183        logging.debug("Got dataset name (title): '%s'" %title)
1184        # NB, if a title is specified (and not as the default value), it automatically is used in
1185        # place of anything in the granulite file
1186        if title and title != "NAME OF DATASET GOES HERE":
1187            logging.info("Title, '%s', extracted from CSML file" %title)
1188            if self.title:
1189                logging.info("- NB, this will override the title specified in the granulite file ('%s')" \
1190                             %self.title)
1191            self.title = title
1192               
1193        bbox1 = csmlDoc.getBoundingBox()
1194        bbox2 = csmlDoc.getCSMLBoundingBox()
1195
1196        time = None
1197        if bbox2:
1198            time = bbox2.getTimeLimits()
1199   
1200        # now check for other parameters to add to granule
1201        # Firstly, extract the bounding envelope
1202        if bbox1:
1203            w, e = utilities.normaliseLongitude(bbox1[0],bbox1[2])
1204            n, s = (bbox1[3], bbox1[1])
1205   
1206            if not aggregateCoverage or (not self.maxY or float(n) > float(self.maxY)):
1207                self.maxY = n
1208               
1209            if not aggregateCoverage or (not self.minY or float(s) < float(self.minY)):
1210                self.minY = s
1211           
1212            if not aggregateCoverage or (not self.minX or float(w) < float(self.minX)):
1213                self.minX = w
1214   
1215            if not aggregateCoverage or (not self.maxX or float(e) > float(self.maxX)):
1216                self.maxX = e
1217           
1218            logging.debug("Got bounding box data from file: (%s, %s) , (%s, %s)" \
1219                          %(w, s, e, n))
1220           
1221            logging.debug("Updated atom bounding box data: (%s, %s) , (%s, %s)" \
1222                          %(self.minX, self.minY, self.maxX, self.maxY))
1223        else:
1224            logging.debug("No valid bounding box data found")
1225   
1226        if time:
1227            t1 = utilities.formatDateYYYYMMDD(time[0])
1228            if not aggregateCoverage or \
1229                (not self.t1 or datetime.datetime.strptime(t1, YEAR_FORMAT) < \
1230                    datetime.datetime.strptime(self.t1, YEAR_FORMAT)):
1231                self.t1 = t1
1232   
1233            t2 = time[1]
1234            if t2 and t2 != 'None':
1235                t2 = utilities.formatDateYYYYMMDD(t2)
1236                if not aggregateCoverage or \
1237                    (not self.t2 or datetime.datetime.strptime(t2, YEAR_FORMAT) > \
1238                        datetime.datetime.strptime(self.t2, YEAR_FORMAT)):
1239                    self.t2 = t2
1240           
1241            logging.debug("Got time range: %s -> %s" %(self.t1, self.t2))
1242        else:
1243            logging.debug("No valid time range data found")
1244   
1245        #create parameter summaries:
1246        #set up list to hold the parameters data
1247        parameters = []
1248        for feature in csmlDoc.featureCollection.featureMembers:
1249            if hasattr(feature.parameter, 'href'):
1250                paramTriple = ""
1251                if hasattr(feature, 'description'):
1252                    paramTriple = feature.description.CONTENT
1253                    paramTriple += " | " + feature.parameter.href
1254                   
1255                    term = ""
1256                    if hasattr(feature, 'name'):
1257                        term = feature.name.CONTENT
1258   
1259                    paramTriple += " | " + term
1260                   
1261                    logging.debug("Got parameter info: %s" %paramTriple)
1262                    parameters.append(paramTriple)
1263       
1264        # update the atom with the extracted parameters
1265        logging.info("Adding CSML parameters to granule atom")
1266        self.addParameters(parameters)
1267        logging.info("Finished adding CSML data")
1268        return csmlDoc
1269
1270
1271    def lookupAssociatedData(self, type, searchClient, lookupIndirectReferences=False):
1272        '''
1273        Check through the atom links and retrieve any associated data of the
1274        specified type
1275        @param type: type of associated data to lookup - currently VTD.DEPLOYMENT_TERM
1276        or VTD.DE_TERM
1277        @param searchClient: Client implementing the AbstractSearchXMLDBClient class
1278        @keyword lookupIndirectReferences: if True, the atom ID is used to search
1279        defined deployments to find those which reference it, otherwise only
1280        deployments data featured in the atom related links are processed
1281        '''
1282        logging.info("Looking up %s info" %type)
1283        self.allActivities = []
1284        self.allObs = []
1285        self.allDpts = []
1286
1287        if type != VTD.DE_TERM and type != VTD.DEPLOYMENT_TERM:
1288            raise ValueError('Unrecognised associated data type: %s' %type)
1289       
1290        # avoid duplicating lookup effort
1291        if (type == VTD.DEPLOYMENT_TERM and self.deployments) or \
1292            (type == VTD.DE_TERM and self.dataEntities):
1293            logging.info("- this info has already been looked up - returning")
1294            return
1295
1296        # firstly, collect all the references to the info required
1297        if lookupIndirectReferences:
1298            logging.info("Looking up indirect references")
1299           
1300            # if we're looking up DE data for deployments data, need to have the
1301            # deployments info looked up first
1302            if type == VTD.DE_TERM and self.isDeployable() and not self.deployments:
1303                self.lookupAssociatedData(VTD.DEPLOYMENT_TERM, searchClient, 
1304                                          lookupIndirectReferences = lookupIndirectReferences)
1305           
1306            logging.info("Looking up references to this atom from other %s" %type)
1307           
1308            # NB, if we're looking up deployments info, we only look up references
1309            # to this atom - if we're looking up DEs, we need to look up references
1310            # to the deployments referenced by this atom
1311            urls = [self.atomBrowseURL]
1312           
1313            if type == VTD.DE_TERM and self.isDeployable():
1314                urls = []
1315                for dep in self.deployments:
1316                    urls.append(dep.browseURL)
1317                   
1318            links = []
1319            for url in urls:
1320                doc = searchClient.getNDGDoc(type, ndgObject.ASSOCIATED_ATOM_DOC_TYPE, url,
1321                                             targetCollection = dc.ATOM_COLLECTION_PATH)
1322                # now need to turn this results set into actual atoms
1323                tree = ET.fromstring(doc)
1324                for atom in tree:
1325                    logging.debug("- found reference in %s" %type)
1326                    links.append(ET.tostring(atom))
1327                   
1328            logging.info("Finished looking up indirect references")
1329        else:
1330            links = self.getLinksOfType(self.VTD.DEPLOYMENT_TERM)
1331
1332        # now retrieve the references and extract the required data
1333        logging.info("Retrieving info from %s references" %type)
1334        if type == VTD.DEPLOYMENT_TERM:
1335            logging.info("Extracting links data to deployment entitites")
1336            self.deployments = []
1337            for link in links:
1338                if lookupIndirectReferences:
1339                    deploymentAtom = link
1340                else:
1341                    localID = link.href.split("__ATOM__")[-1]
1342                    deploymentAtom = searchClient.getNDGDoc('', 
1343                                                            'ATOM', localID, 
1344                                                            targetCollection = dc.ATOM_COLLECTION_PATH)
1345   
1346                deployment = Deployment.Deployment(Atom(xmlString=str(deploymentAtom)))
1347                self.deployments.append(deployment)
1348               
1349                self.addUniqueLinks(self.allActivities, deployment.activities)
1350                self.addUniqueLinks(self.allObs, deployment.obs)
1351                self.addUniqueLinks(self.allDpts, deployment.dpts)
1352        else:
1353            # for DE data, just store the title + link in a Link object
1354            self.dataEntities = []
1355            logging.info("Extracting links data to data entitites")
1356            for data in links:
1357                atom = Atom(xmlString=str(data))
1358                link = Link()
1359                link.title = atom.title
1360                link.href = atom.atomBrowseURL
1361                link.rel = atom.datasetID
1362               
1363                # NB, different deployments may be used by the same DE - so
1364                # avoid duplication
1365                self.addUniqueLinks(self.dataEntities, link)
1366           
1367        logging.info("Finished looking up %s info" %type)
1368
1369
1370    def addUniqueLinks(self, dataArray, links):
1371        '''
1372        Add links to specified array - if they are not already included
1373        @param dataArray: a list, potentially arlready containing links
1374        @param links: a Link or array of Links to add to the dataArray
1375        '''
1376        logging.debug("Adding new links")
1377        if not links:
1378            return
1379       
1380        if type(links) is not list:
1381            links = [links]
1382       
1383        for link in links:
1384            if type(link) is not Link:
1385                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
1386                continue
1387            if link not in dataArray:
1388                logging.debug("- adding unique link")
1389                dataArray.append(link)
1390        logging.debug("Finished adding links")
1391
1392       
1393    def getFullPath(self):
1394        '''
1395        Return full path to atom in eXist, if it exists, or None, otherwise
1396        @return fullPath: string - collection + filename of atom in eXist
1397        '''
1398        # NB, name assigned when atom created in eXist - so if not defined, not
1399        # in eXist
1400        logging.debug("Getting full path to atom")
1401        if self.atomName:
1402            logging.debug("Return full path to atom in eXist")
1403            return self.getDefaultCollectionPath() + self.atomName
1404        logging.debug("Atom doesn't currently exist in eXist - return 'None'")
1405        return None
1406   
1407   
1408    def getSubTypePrettyName(self):
1409        '''
1410        Return the subtype of the atom in a human readable form
1411        @return: sub type of atom as a verbose string
1412        '''
1413        logging.debug("Getting human readable version of atom subtype")
1414        subType = self.SUB_TYPE_NOT_DEFINED_NAME
1415        if self.subtypeID:
1416           subType = self.VTD.tidySubTypeTitle(self.subtypeID)
1417           
1418        logging.debug("- returning subtype: '%s'" %subType)
1419        return subType
Note: See TracBrowser for help on using the repository browser.