source: exist/trunk/python/ndgUtils/models/Atom.py @ 4780

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/Atom.py@4780
Revision 4780, 51.4 KB checked in by cbyrom, 11 years ago (diff)

Create AtomState? object to represent the different atom publication
states - tieing together the associated name and collection paths.
Update Atom model to use this.

RevLine 
[4209]1'''
2 Class representing data in  atom format - allowing moles data to be stored and accessed in a web feed compatible way
3 
4 @author: C Byrom, Tessella Jun 2008
5'''
6try: #python 2.5
7    from xml.etree import cElementTree as ET
8except ImportError:
9    try:
10        # if you've installed it yourself it comes this way
11        import cElementTree as ET
12    except ImportError:
13        # if you've egged it this is the way it comes
14        from ndgUtils.elementtree import cElementTree as ET
15import sys, logging, re, datetime
[4696]16from ndgUtils import ndgObject
[4209]17from ndgUtils.eXistConnector import eXistConnector
18from ndgUtils.ETxmlView import et2text
[4564]19import ndgUtils.lib.utilities as utilities
[4663]20from ndgUtils.models.vocabtermdata import VocabTermData as VTD
[4209]21from ndgUtils.models import MolesEntity as ME
[4419]22import csml.parser as CsmlParser
[4440]23from ndgUtils.models import Deployment as Deployment
[4780]24from ndgUtils.models import AtomState
[4209]25
[4229]26class AtomError(Exception):
27    """
28    Exception handling for Atom class.
29    """
30    def __init__(self, msg):
31        logging.error(msg)
32        Exception.__init__(self, msg)
33
34
[4249]35class Person(object):
[4209]36    '''
37    Class representing atom author type data - with name, uri and role attributes
[4721]38    @keyword personType: Type of person to create - specified using the Person.._Type
39    values.  Default is AUTHOR_TYPE.
40    @keyword namespace: a two value array of format, ['short_namespace_name', 'full_namespace_name']
41    - e.g. ['moles', 'http://ndg.nerc.ac.uk/schema/moles2beta']
[4209]42    '''
[4240]43    AUTHOR_TYPE = 0
44    CONTRIBUTOR_TYPE = 1
45    RESPONSIBLE_PARTY_TYPE = 2
46    ELEMENT_NAMES = ["author", "contributor", "responsibleParty"]
47   
48    def __init__(self, personType = AUTHOR_TYPE, namespace = None):
49        self.type = personType
[4721]50        if namespace:
51            self.ns_shortname = namespace[0]
52            self.ns_fullname = namespace[1]
53        else:
54            self.ns_shortname = ""
55            self.ns_fullname = ndgObject.ATOM_NS
56           
[4209]57        self.name = ""
58        self.uri = ""
59        self.role = ""
[4240]60       
[4244]61        # NB, the atom format specifies slightly different data contents
62        self.uriTagName = "email"
[4240]63        # NB, responsible party data is always stored in the moles section
64        if self.type == self.RESPONSIBLE_PARTY_TYPE:
[4721]65            self.ns_shortname = 'moles'
66            self.ns_fullname = ndgObject.MOLES_NS
[4244]67            self.uriTagName = "uri"
[4209]68
[4240]69    def __str__(self):
70        if self.name or self.uri or self.role:
71            return self.name + " | " + self.uri + " | " + self.role
72        return ""
[4592]73
74
75    def hasValue(self):
76        if self.name or self.uri or self.role:
77            return True
78        return False
[4240]79   
[4209]80    def fromString(self, personString):
[4564]81        (self.name, self.uri, self.role) = utilities.getTripleData(personString)
[4209]82       
83    def fromETElement(self, personTag):
[4721]84        self.name = personTag.findtext('{%s}name' %self.ns_fullname) or ""
85        self.role = personTag.findtext('{%s}role' %self.ns_fullname) or ""
86        self.uri = personTag.findtext('{%s}%s' %(self.ns_fullname, self.uriTagName)) or ""
[4244]87        logging.debug("Added name: '%s', role: '%s', %s: '%s'" \
88                      %(self.name, self.role, self.uriTagName, self.uri))
[4209]89
90    def toXML(self):
91        prefix = ""
[4721]92        if self.ns_shortname:
93            prefix = self.ns_shortname + ':'
[4209]94
[4240]95        author = ET.Element(prefix + self.ELEMENT_NAMES[self.type])
[4209]96
97        if self.name:
98            name = ET.SubElement(author, prefix + "name")
99            name.text = self.name
100       
101        if self.uri:
[4244]102            uri = ET.SubElement(author, prefix + self.uriTagName)
[4209]103            uri.text = self.uri
104       
105        if self.role:
106            role = ET.SubElement(author, prefix + "role")
107            role.text = self.role
108
109        return author
110   
[4240]111    def __cmp__(self, person1):
112        '''
113        Override comparison to allow proper object comparison when checking
114        if Person objects are in an array already - i.e. if person in personArray...
115        '''
[4314]116        if not person1:
117            return -1
118       
[4240]119        if self is person1:
120            return 0
121        elif self.uri == person1.uri and self.name == person1.name and \
122                self.role == person1.role and self.type == person1.type:
123            return 0
124        return 1
[4209]125
[4296]126
[4249]127class Link(object):
[4209]128    '''
129    Class representing an atom link - with href, title and rel attributes
130    '''
[4249]131
[4209]132    def __init__(self):
133        self.href = ""
134        self.title = ""
135        self.rel = ""
136
137    def fromString(self, linkString):
[4564]138        (self.href, self.title, self.rel) = utilities.getTripleData(linkString)
[4209]139       
140    def fromETElement(self, linkTag):
141        self.href = linkTag.attrib.get('href') or ""
142        self.rel = linkTag.attrib.get('rel') or ""
143        self.title = linkTag.attrib.get('title') or ""
144
145    def toXML(self):
146        link = ET.Element("link")
147        link.attrib["href"] = self.href
148        link.attrib["title"] = self.title
149        link.attrib["rel"] = self.rel
150        return link
151
[4244]152    def hasValue(self):
[4249]153        # NB, just a rel on its own is meaningless - so ignore
154        if self.href or self.title:
[4244]155            return True
156        return False
[4249]157   
158    def __str__(self):
159        if self.href or self.title or self.rel:
160            return self.href + " | " + self.title + " | " + self.rel
161        return ""
[4296]162   
[4314]163    def isChildAtom(self):
164        '''
165        Determines whether the link refers to another atom - e.g. a link to
166        a data granule
167        @return True, if so; False otherwise
168        '''
169        if self.rel.endswith(VTD.GRANULE_TERM) or \
170            self.rel.endswith(VTD.DEPLOYMENT_TERM) or \
171            self.rel.endswith(VTD.ACTIVITY_TERM) or \
172            self.rel.endswith(VTD.DPT_TERM) or \
173            self.rel.endswith(VTD.OBS_TERM):
174            return True
175       
176        return False
177   
[4296]178    def __cmp__(self, link1):
179        '''
180        Override comparison to allow proper object comparison when checking
181        if Link objects are in an array already - i.e. if link in linkArray...
182        '''
[4314]183        if not link1:
184            return -1
185       
[4296]186        if self is link1:
187            return 0
188        elif self.href == link1.href and self.title == link1.title and \
189                self.rel == link1.rel:
190            return 0
191        return 1
[4244]192
[4296]193
[4249]194class Category(object):
[4209]195    '''
196    Class representing an atom category - with term, scheme and label attributes
197    '''
198    def __init__(self):
199        self.term = ""
200        self.scheme = ""
201        self.label = ""
202
[4444]203    def fromString(self, linkString, escapeSpecialCharacters=True):
204        '''
205        Create Category from triple string of format, 'label | scheme | term'
206        @param linkString: triple string to create category with
207        @keyword escapeSpecialCharacters: if set to True, special characters in
208        triple string are escaped (default)
209        '''
[4564]210        (self.label, self.scheme, self.term) = utilities.getTripleData(linkString, \
[4444]211            doEscape=escapeSpecialCharacters)
[4209]212       
213    def fromETElement(self, linkTag):
214        self.term = linkTag.attrib.get('term') or ""
215        self.label = linkTag.attrib.get('label') or ""
216        self.scheme = linkTag.attrib.get('scheme') or ""
217
218    def toXML(self):
219        link = ET.Element("category")
220        link.attrib["term"] = self.term
221        link.attrib["scheme"] = self.scheme
222        link.attrib["label"] = self.label
223        return link
[4314]224   
225    def hasValue(self):
226        if self.scheme or self.label or self.term:
227            return True
228        return False
[4209]229
230
231class Atom(object):
232
[4244]233    # labels for use with the atom categories
[4209]234    ATOM_TYPE = "ATOM_TYPE"
235    ATOM_SUBTYPE = "ATOM_SUBTYPE"
236
[4244]237    # labels for use with the templates to set/extract specific inputs
238    ONLINE_REF_LABEL = "online_ref"
[4314]239    PARAMETER_LABEL = "parameter"
[4282]240    ATOM_REF_LABEL = "atom_ref"
[4296]241    DELIMITER = "---"
242    REMOVE_LABEL = "remove"
[4347]243   
244    # format to use for t1-t2 date range
245    YEAR_FORMAT = '%Y-%m-%d'
[4244]246
[4209]247    def __init__(self, atomType = None, vocabTermData = None, ndgObject = None, \
[4780]248                 xmlString = None, state = AtomState.WORKING_STATE, **inputs):
[4209]249        '''
250        Constructor - initialise the atom variables
[4780]251        @keyword atomType: type of atom to set up
252        @keyword vocabTermData: instance of VocabTermData object to use with atom
253        @keywork ndgObject: instance of ndgObject to use with atom
254        @keyword xmlString: XML representation of atom - will be parsed to populate
255        the atom data
256        @keyword state:  AtomState object representing the state of the atom
[4209]257        '''
258        logging.info("Initialising atom")
259        if atomType:
260            logging.info(" - of type '%s'" %atomType)
[4217]261        self.atomTypeID = atomType
[4209]262
263        # some data have further subtypes specified
[4286]264        self.subtypeID = None # this should be the termID
265        self.subtype = None # and this should be the fully formed vocab URL
[4209]266       
267        self.ndgObject = ndgObject
268
269        self.atomName = None
270        self.files = []
[4605]271        self.author = Person()
[4209]272        self.contributors = []
273        self.atomAuthors = []
274        self.parameters = []
275        self.spatialData = []
276        self.temporalData = []
277        self.relatedLinks = []
278        self.summary = []
[4296]279        self.content = []
[4440]280        # NB, this deployments data duplicates other atom data - and is only used for a
[4531]281        # convenient way to collect the info (by lookupAssociatedData()) for use in templates
282        self.deployments = []
283        # ditto for the following field
284        self.dataEntities = []
285           
[4209]286        self.csmlFile = None
287        self.cdmlFile = None
288        # general variable to use for setting the atom content - NB, if a csmlFile is specified
289        # (either directly or via a cdmlFile specification), this will be the content by default
290        # for this purpose
291        self.contentFile = None     
292        self.title = None
293        self.datasetID = None        # NB, the dataset id ends up in the atomName - <path><datasetID>.atom
294        self.atomID = None
295   
296        # boundary box info - to replace spatial/temporalData?
297        self.minX = None
298        self.minY = None
299        self.maxX = None
300        self.maxY = None
301        self.t1 = None
302        self.t2 = None
[4217]303
[4209]304        self.ME = ME.MolesEntity()
305       
306        # date when the atom was first ingested
307        self.publishedDate = None
308
309        # last update date
[4236]310        self.updatedDate = None
[4209]311
312        # assume atom in working state by default - this is used to define what collection
313        # in eXist the atom is stored in
[4219]314        self.state = state
[4209]315       
[4219]316        # additional, non standard atom data can be included in the molesExtra element
317        if vocabTermData:
318            self.VTD = vocabTermData
319        else:
320            self.VTD = VTD()
321       
[4209]322        if xmlString:
323            self.fromString(xmlString)
[4219]324
325        # if inputs passed in as dict, add these now
[4236]326        if inputs:
[4240]327            logging.info("Adding info to atom from input dict")
328            logging.debug(inputs)
[4236]329            self.__dict__.update(inputs)
330           
[4240]331            # NB, this doesn't trigger the Summary Property, so do this
332            # explicitly, if need be
[4282]333            if inputs.has_key('Summary'):
[4240]334                self.Summary = inputs.get('Summary')
[4296]335            if inputs.has_key('Content'):
336                self.Content = inputs.get('Content')
[4601]337            if inputs.has_key('author'):
338                name = inputs.get('author')
339                author = Person()
340                author.fromString(name)
341                self.author = author
[4240]342           
343            # also pass any moles data up to the moles entity object
[4627]344            if inputs.has_key('providerID'):
[4236]345                self.ME.providerID = inputs.get('providerID')
[4296]346               
[4627]347            if inputs.has_key('abbreviation'):
[4296]348                self.ME.abbreviation = inputs.get('abbreviation')
[4219]349
350        if self.atomTypeID:
351            self.atomTypeName = self.VTD.TERM_DATA[self.atomTypeID].title
352
[4209]353        logging.info("Atom initialised")
354
355
[4314]356    def addOnlineReferences(self, links):
[4296]357        '''
[4314]358        Add online reference data associated with the atom
359        - NB, care needs to be taken here since this data is stored in the atom
360        link elements and these are also used for the various atom associations
361        @param links: a Link or array of Links to add to the relatedLinks attribute
362        '''
363        logging.debug("Adding online references")
364        if not links:
365            return
366       
367        if type(links) is not list:
368            links = [links]
369       
370        # firstly clear out any online refs data from the existing related links
371        newLinks = []
372        for link in self.relatedLinks:
373            if link.isChildAtom():
374                newLinks.append(link)
375       
376        newLinks.extend(links)
377        self.relatedLinks = newLinks
378        logging.debug("Online references added")
379
380
381    def addUniqueRelatedLinks(self, links):
382        '''
[4296]383        Add links to relatedLinks array - if they are not already included
384        @param links: a Link or array of Links to add to the relatedLinks attribute
385        '''
[4444]386        self.addUniqueLinks(self.relatedLinks, links)
[4296]387       
[4314]388
389    def removeRelatedLinks(self, linksToDelete):
390        '''
391        Remove any links in the input list from the atom's related links list
392        @param linksToDelete: array of Link objects to remove from atom
393        '''
394        logging.debug("Removing related links from atom")
395        if not linksToDelete:
396            return
[4296]397       
[4314]398        if type(linksToDelete) is not list:
399            linksToDelete = [linksToDelete]
400       
401        updatedLinks = []
402        for link in self.relatedLinks:
403            if type(link) is not Link:
404                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
405                continue
406            if link in linksToDelete:
407                logging.debug("- found link to remove")
408            else:
409                updatedLinks.append(link)
[4296]410
[4314]411        self.relatedLinks = updatedLinks
412        logging.debug("Links removed")
[4780]413
414    def getPublicationStatePath(self):
415        '''
416        Determine the correct publication state collection for the atom
417        @return collectionPath: collection path for the publication state of the atom
418        '''
419        logging.debug("Getting collection path for atom publication state")
420        collectionPath = eXistConnector.BASE_COLLECTION_PATH + self.state.collectionPath
421        logging.debug("Returning publication state collection, '%s'" %collectionPath)
422        return collectionPath
[4314]423       
424
[4780]425    def getDefaultEntityCollectionPath(self):
[4219]426        '''
[4780]427        Determine the correct collection for the entity type of the atom
428        @return entityPath: collection path for the data type of the atom
[4219]429        '''
[4780]430        logging.debug("Getting collection path for atom entity type")
431        collectionPath = self.getPublicationStatePath()
[4219]432       
433        if self.atomTypeID == VTD.DE_TERM:
434            collectionPath += eXistConnector.DE_COLLECTION_PATH
435        elif self.atomTypeID == VTD.GRANULE_TERM:
436            collectionPath += eXistConnector.GRANULE_COLLECTION_PATH
[4296]437        elif self.atomTypeID == VTD.ACTIVITY_TERM and \
438            self.subtypeID == VTD.DEPLOYMENT_TERM:
[4286]439            collectionPath += eXistConnector.DEPLOYMENTS_COLLECTION_PATH
[4219]440        else:
441            collectionPath += eXistConnector.DEPLOYMENT_COLLECTION_PATH
442       
[4780]443        logging.debug("Returning entity collection, '%s'" %collectionPath)
444        return collectionPath
445       
446
447    def getDefaultCollectionPath(self):
448        '''
449        Determine the correct collection to use for the atom in eXist
450        '''
451        logging.debug("Getting default collection path for atom")
452        collectionPath = self.getDefaultEntityCollectionPath()
[4229]453        if not self.ME.providerID:
454            raise AtomError("Error: cannot determine atom collection path because " + \
455                            "the provider ID is not defined")
456           
457        collectionPath += self.ME.providerID + "/"
[4780]458        logging.debug("Returning collection, '%s'" %collectionPath)
[4219]459        return collectionPath
[4217]460
[4440]461
[4209]462    def __addAtomTypeDataXML(self, root):
463        '''
464        Add the atom type, and subtype data, if available, to atom categories
465        - and lookup and add the appropriate vocab term data
466        '''
[4217]467        if self.atomTypeID:
[4209]468            logging.info("Adding atom type info to XML output")
469            category = Category()
[4217]470            category.label = self.atomTypeID
[4209]471            # look up the appropriate vocab term data
[4217]472            category.scheme = self.VTD.getTermCurrentVocabURL(self.atomTypeID)
[4209]473            category.term = self.ATOM_TYPE
474            root.append(category.toXML())
475
[4286]476        if self.subtypeID:
[4209]477            logging.info("Adding atom subtype info to XML output")
478            # NB subtypes not all defined, so leave this out for the moment
[4286]479            category.label = self.subtypeID
[4209]480            # look up the appropriate vocab term data
[4286]481            category.scheme = self.VTD.getTermCurrentVocabURL(self.subtypeID)
[4209]482            category.term = self.ATOM_SUBTYPE
483            root.append(category.toXML())
484
485
486    def addMolesEntityData(self, abbreviation, provider_id, object_creation_time):
487        '''
488        Add data to include in the moles entity element
489        '''
490        logging.debug('Adding moles entity information')
491        self.ME.abbreviation = abbreviation
492        self.ME.providerID = provider_id
[4564]493        self.ME.createdDate = utilities.getISO8601Date(object_creation_time)
[4209]494        logging.debug('Moles entity information added')
495
496
[4240]497    def addAuthors(self, authors):
498        '''
499        Add author data appropriately to the atom
500        NB, these will overwrite any existing authors of the same type
501        @param authors: list of Person objects with the author data
502        '''
503        logging.debug('Adding authors data to Atom')
504        isFirstAuthor = {}
505        authorArray = None
506        for author in authors:
507            # NB, we're only allowed one atom author
508            if author.type == Person.AUTHOR_TYPE:
509                self.author = author
[4592]510                   
[4240]511                if isFirstAuthor.has_key(author.type):
512                    raise AtomError("Error: an atom can only have one author specified")
513                isFirstAuthor[author.type] = 1
514                continue
515            elif author.type == Person.CONTRIBUTOR_TYPE:
516                authorArray = self.contributors
517            elif author.type == Person.RESPONSIBLE_PARTY_TYPE:
518                authorArray = self.ME.responsibleParties
519               
520            # check if this is the first addition - if so, clear out the
521            # array in advance
522            if not isFirstAuthor.has_key(author.type):
523                logging.debug("Clearing out author array")
524                # NB, need to be careful to clear the array, not create a ref
525                # to a new array
526                del authorArray[:]
527                isFirstAuthor[author.type] = 1
528
[4592]529            if author.hasValue() and author not in authorArray:
[4240]530                logging.debug("Adding author (type:'%s', name:'%s', uri:'%s', role:'%s')" \
531                              %(author.type, author.name, author.uri, author.role))
532                authorArray.append(author)
533
534        logging.debug('Finished adding authors data')
535
536
[4209]537    def _isNewParameter(self, param):
538        '''
539        Check if a parameter is already specified in the atom, return False if
540        so, otherwise return True
541        '''
542        for p in self.parameters:
543            if p.term == param.term and \
544                p.scheme == param.scheme and \
545                p.label == param.label:
546                return False
547        return True
548
549
550    def addRelatedLinks(self, linkVals):
551        '''
552        Add related links in string format - converting to Link objects
[4555]553        NB, only add the link if it is unique
554       
[4209]555        @param linkVals: string of format, 'uri | title | vocabServerURL'
556        '''
[4555]557        link = self.objectify(linkVals, 'relatedLinks')
558        if link not in self.relatedLinks:
559            self.relatedLinks.append(link)
[4209]560
561
562    def addParameters(self, params):
563        '''
564        Add a parameter to list - ensuring it is unique and has been formatted and tidied appropriately
565        @params param: parameter, as string array, to add to atom parameters collection
566        '''
567        # avoid strings being parsed character by character
568        if type(params) is str:
569            params = [params]
570           
571        for param in params:
572            # firstly tidy parameter
[4564]573            param = utilities.tidyUpParameters(param)
[4209]574            category = Category()
[4444]575            # NB, data already tidied up here, so set keyword to avoid this happening again
[4512]576            category.fromString(param, escapeSpecialCharacters=True)
[4209]577
578            # now check for uniqueness
579            if self._isNewParameter(category):
580                logging.debug("Adding new parameter: %s" %param)
581                self.parameters.append(category)
582   
583   
584    def _linksToXML(self, root):
585        '''
586        Add required links to the input element
587        @param root: element to add links to - NB, should be the root element of the atom
588        '''
589        selfLink = ET.SubElement(root, "link")
590        selfLink.attrib["href"] = self.atomBrowseURL
591        selfLink.attrib["rel"] = "self"
592       
593        for relatedLink in self.relatedLinks:
[4244]594            if relatedLink.hasValue():
595                root.append(relatedLink.toXML())
[4209]596   
597    def toXML(self):
598        '''
599        Convert the atom into XML representation and return this
600        @return: xml version of atom
601        '''
602        logging.info("Creating formatted XML version of Atom")
603        root = ET.Element("entry")
[4696]604        root.attrib["xmlns"] = ndgObject.ATOM_NS
605        root.attrib["xmlns:moles"] = ndgObject.MOLES_NS
606        root.attrib["xmlns:georss"] = ndgObject.GEOSS_NS
607        root.attrib["xmlns:gml"] = ndgObject.GML_NS
[4209]608        id = ET.SubElement(root, "id")
609        id.text = self.atomID
610        title = ET.SubElement(root, "title")
611        title.text = self.title
612        self._linksToXML(root)
613
[4595]614        if self.author and self.author.hasValue():
[4592]615            root.append(self.author.toXML())
[4209]616           
617        for contributor in self.contributors:
618            root.append(contributor.toXML())
619
620        # add parameters data
621        for param in self.parameters:
[4314]622            if param.hasValue():
623                root.append(param.toXML())
[4209]624
625        # add the type and subtype data
626        self.__addAtomTypeDataXML(root)
627                   
628        summary = ET.SubElement(root, "summary")
[4347]629        summary.text = self.Summary
[4296]630                   
[4209]631        # add link to content, if required - NB, can only have one content element in atom
632        # - and this is mandatory
633        content = ET.SubElement(root, "content")
[4502]634        contentFile = self.contentFile or self.csmlFile or self.cdmlFile
635        if contentFile:
[4209]636            content.attrib["type"] = "application/xml"
[4502]637            content.attrib["src"] = contentFile
[4209]638        else:
[4347]639            content.attrib["type"] = "xhtml"
[4592]640            div = ET.SubElement(content, 'div')
[4696]641            div.attrib["xmlns"] = ndgObject.XHTML_NS
[4592]642            div.text = self.Content
[4209]643       
644        # if there's a published date already defined, assume we're doing an update now
645        # NB, update element is mandatory
646        currentDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
647        if not self.publishedDate:
648            self.publishedDate = currentDate
649
650        updated = ET.SubElement(root, "updated")
[4236]651        if not self.updatedDate:
652            self.updatedDate = currentDate
653        updated.text = self.updatedDate
[4209]654
655        published = ET.SubElement(root, "published")
656        published.text = self.publishedDate
657
[4592]658        # add the moles entity section, if it is required
659        if self.ME:
660            root.append(self.ME.toXML())
661
[4209]662        # add temporal range data, if available
663        temporalRange = ET.SubElement(root, "moles:temporalRange")
664        if self.t1:
665            temporalRange.text = self.t1
666            if self.t2:
667                temporalRange.text += "/" + self.t2
668
669        # add spatial range data, if available
670        self._addSpatialData(root)
671
672        tree = ET.ElementTree(root)
673        logging.info("XML version of Atom created")
674        return tree
675
676
677    def __getSummary(self):
678        logging.debug("Getting summary data")
679        summaryString = ""
680        for summary_line in self.summary:
681            summaryString += summary_line + "\n"
682
683        return summaryString
684
685    def __setSummary(self, summary):
686        logging.debug("Adding summary data")
687        self.summary = []
688        for summary_line in summary.split('\n'):
[4564]689            self.summary.append(utilities.escapeSpecialCharacters(summary_line))
[4209]690           
691    Summary = property(fset=__setSummary, fget=__getSummary, doc="Atom summary")
692
[4296]693
694    def __getContent(self):
695        logging.debug("Getting content data")
696        contentString = ""
697        # NB, there must be content specified in an atom
698        if not self.content:
699            return "Metadata document"
700       
701        for content_line in self.content:
702            contentString += content_line + "\n"
703
704        return contentString
705
706    def __setContent(self, content):
707        logging.debug("Adding content data")
708        self.content = []
709        for content_line in content.split('\n'):
[4490]710            self.content.append(content_line)
[4209]711           
[4296]712    Content = property(fset=__setContent, fget=__getContent, doc="Atom content")
713
714           
[4209]715    def fromString(self, xmlString):
716        '''
717        Initialise Atom object using an xmlString
718        @param xmlString: representation of atom as an XML string
719        '''
720        logging.info("Ingesting data from XML string")
721        logging.debug("Create elementtree instance with XML string")
722        tree = ET.fromstring(xmlString)
[4696]723        title = tree.findtext('{%s}title' %ndgObject.ATOM_NS)
[4209]724        if title:
725            logging.debug("Adding title data")
726            self.title = title
727
[4696]728        summary = tree.findtext('{%s}summary' %ndgObject.ATOM_NS)
[4209]729        if summary:
[4490]730            self.Summary = summary#.decode('unicode_escape')
[4209]731
[4696]732        authorElement = tree.find('{%s}author' %ndgObject.ATOM_NS)
[4592]733        if authorElement:
734            logging.debug("Adding author data")
735            author = Person()
736            author.fromETElement(authorElement)
737            self.author = author
[4209]738
[4696]739        contributorElements = tree.findall('{%s}contributor' %ndgObject.ATOM_NS)
[4209]740        for contributorElement in contributorElements:
741            logging.debug("Adding contributor data")
[4240]742            contributor = Person(personType = Person.CONTRIBUTOR_TYPE)
[4209]743            contributor.fromETElement(contributorElement)
744            self.contributors.append(contributor)
745
[4696]746        molesElement = tree.find('{%s}entity' %ndgObject.MOLES_NS)
[4209]747        if molesElement:
748            self.ME.fromET(molesElement)
749               
[4696]750        self.atomID = tree.findtext('{%s}id' %ndgObject.ATOM_NS)
[4209]751
[4696]752        self._parseCategoryData(tree.findall('{%s}category' %ndgObject.ATOM_NS))
[4209]753
[4696]754        self._parseLinksData(tree.findall('{%s}link' %ndgObject.ATOM_NS))
[4209]755           
[4696]756        contentTag = tree.find('{%s}content' %ndgObject.ATOM_NS)
[4209]757        if contentTag != None:
758            logging.debug("Found content tag - checking for CSML/CDML file data")
759            file = contentTag.attrib.get('src')
760            if file:
761                # NB, the path will reveal more reliably whether we're dealing with CSML and CDML files
762                if file.upper().find('CSML') > -1:
763                    logging.debug("Adding CSML file data")
764                    self.csmlFile = file
765                elif file.upper().find('CDML') > -1:
766                    logging.debug("Adding CDML file data")
767                    self.cdmlFile = file
768                self.contentFile = file
[4296]769            else:
770                logging.debug("No file data - adding contents of element instead")
[4592]771                div = contentTag.find('{http://www.w3.org/1999/xhtml}div')
772                self.Content = div.text
[4209]773       
[4696]774        range = tree.findtext('{%s}temporalRange' %ndgObject.MOLES_NS)
[4209]775        if range:
776            logging.debug("Adding temporal range data")
777            timeData = range.split('/')
778            self.t1 = timeData[0]
779            if len(timeData) > 1:
780                self.t2 = timeData[1]
781       
[4696]782        where = tree.find('{%s}where' %ndgObject.GEOSS_NS)
783        if where:
784            # NB, this parser won't mind if we're dealing with Envelope or EnvelopeWithTimePeriod
785            minBBox = where.findall('.//{%s}lowerCorner' %ndgObject.GML_NS)
786            if minBBox:
787                logging.debug("Adding min spatial range data")
788                minBBox = minBBox[0]
789                spatialData = minBBox.text.split()
790                self.minX = spatialData[0]
791                if len(spatialData) > 1:
792                    self.minY = spatialData[1]
793           
794            maxBBox = where.findall('.//{%s}upperCorner' %ndgObject.GML_NS)
795            if maxBBox:
796                maxBBox = maxBBox[0]
797                logging.debug("Adding max spatial range data")
798                spatialData = maxBBox.text.split()
799                self.maxX = spatialData[0]
800                if len(spatialData) > 1:
801                    self.maxY = spatialData[1]
[4209]802               
[4721]803        publishedDate = tree.findtext('{%s}published' %ndgObject.ATOM_NS)
[4209]804        if publishedDate:
805            logging.debug("Adding published date")
806            self.publishedDate = publishedDate
[4236]807               
[4721]808        updatedDate = tree.findtext('{%s}updated' %ndgObject.ATOM_NS)
[4236]809        if updatedDate:
810            logging.debug("Adding updated date")
811            self.updatedDate = updatedDate
[4209]812           
813        logging.info("Completed data ingest")
814   
815   
816    def _parseCategoryData(self, categories):
817        logging.debug("Adding category/parameters data")
818        for category in categories:
819            cat = Category()
820            cat.fromETElement(category)
821           
822            if cat.term == self.ATOM_TYPE:
823                logging.debug("Found atom type data")
[4217]824                self.atomTypeID = cat.label
825                self.atomTypeName = self.VTD.TERM_DATA[cat.label].title
[4209]826                continue
827            elif cat.term == self.ATOM_SUBTYPE:
828                logging.debug("Found atom subtype data")
[4286]829                self.subtypeID = cat.label
830                self.subtype = cat.scheme
[4209]831                continue
832
833            self.parameters.append(cat)
834   
835
836    def setDatasetID(self, datasetID):
837        '''
838        Set the dataset ID for the atom - and generate an appropriate atom name using this
839        @param datasetID: ID to set for the atom
840        '''
841        self.datasetID = datasetID
842        self._generateAtomName(datasetID) 
843        self.atomID = self.createAtomID(datasetID)
844
845
846    def createAtomID(self, datasetID):
847        '''
848        Create a unique ID, conforming to atom standards, for atom
849        NB, see http://diveintomark.org/archives/2004/05/28/howto-atom-id
850        @param datasetID: ID of atom's dataset
851        @return: unique ID
852        '''
853        logging.info("Creating unique ID for atom")
854        if not self.atomBrowseURL:
855            self._generateAtomName(datasetID)
856        urlBit = self.atomBrowseURL.split('://')[1]
857        urlBit = urlBit.replace('#', '')
858        urlBits = urlBit.split('/')
[4592]859        host = urlBits[0].split(':')[0] # avoid the port colon - as this breaks the ID format
[4209]860        dateBit = datetime.datetime.today().strftime("%Y-%m-%d")
861       
[4592]862        id = "tag:" + host + "," + dateBit + ":/" + "/".join(urlBits[1:])
[4209]863        logging.info("- unique ID created for atom")
864        logging.debug(" - '%s'" %id)
865        return id
866       
867       
868    def _generateAtomName(self, datasetID):
869        '''
870        Generate a consistent name for the atom - with full eXist doc path
871        @param datasetID: ID of atom's dataset
872        '''
873        self.atomName = datasetID + ".atom"
[4627]874        if not self.ME.providerID:
875            raise ValueError("Provider ID has not been specified for atom - please add this and retry")
[4236]876        self.ndgURI = self.ME.providerID + "__ATOM__" + datasetID
877        self.atomBrowseURL = VTD.BROWSE_ROOT_URL + self.ndgURI
[4209]878
879
880    def _parseLinksData(self, links):
881        '''
882        Extract links and atom data from array of link elements in the XML representation of the atom
883        @param links: an array of <link> elements
884        '''
885        # firstly, get all data to start with, so we can properly process it afterwards
886        linkData = {}
887        logging.debug("Getting link data")
888        for linkTag in links:
889            link = Link()
890            link.fromETElement(linkTag)
891
892            if not linkData.has_key(link.rel):
893                linkData[link.rel] = []
[4347]894           
895            linkData[link.rel].append(link)
[4209]896
897        # there should be one self referencing link - which will provide info on the atom itself
898        if not linkData.has_key('self'):
899            errorMessage = "Atom does not have self referencing link - " + \
900                "cannot ascertain datasetID without this - please fix"
901            logging.error(errorMessage)
902            raise ValueError(errorMessage)
903       
904        # this is the link describing the atom itself
905        self.atomBrowseURL = linkData['self'][0].href
906       
[4229]907        self.datasetID = self.atomBrowseURL.split("__ATOM__")[-1]
908        self.atomName = self.datasetID + ".atom"
[4236]909        self.ndgURI = self.atomBrowseURL.split(VTD.BROWSE_ROOT_URL)[1]
[4229]910       
[4209]911        # now remove this value and the associated moles doc link
912        del linkData['self']
913        molesDoc = self.atomBrowseURL.replace('ATOM', 'NDG-B1')
914        if linkData.has_key('related'):
915            relatedLinks = []
916            for link in linkData['related']:
917                if link.href != molesDoc:
918                    relatedLinks.append(link)
919           
920            linkData['related'] = relatedLinks
921               
922        # now add the remaining links to the atom
923        for key in linkData:
924            for link in linkData[key]:
925                logging.debug("Adding link data")
926                self.relatedLinks.append(link)
927       
928
929    def _addSpatialData(self, element):
930        '''
931        Add spatial coverage element to an input element
932        @param element: element to add coverage data to
933        '''
934        logging.info("Adding spatial data to Atom")
935        if not self.minX:
936            logging.info("No spatial data specified")
937            return
[4490]938        bbox = ET.SubElement(element, "georss:where")
[4209]939        envelope = ET.SubElement(bbox, "gml:Envelope")
940        lc = ET.SubElement(envelope, "gml:lowerCorner")
[4502]941        lc.text = str(self.minX) + " " + str(self.minY)
[4209]942        uc = ET.SubElement(envelope, "gml:upperCorner")
[4502]943        uc.text = str(self.maxX) + " " + str(self.maxY)
[4209]944
945       
946    def setAttribute(self, attributeName, attributeValue):
947        '''
948        Set the value of an atom attribute - and do some basic tidying up of the string content
949        - to escape any XML unfriendly characters
950        @param attributeName: name of the attribute whose value to set
951        @param attributeValue: value to set the attribute to 
952        '''
953        logging.debug("Setting attribute, %s, to %s" %(attributeName, attributeValue))
954        origValue = attributeValue
955       
956        # escape any special characters if a value has been specified
957        # NB, need to cope with both single values and arrays
958        if attributeValue:
959            if type(attributeValue) is list:
960                newVals = []
961                for val in attributeValue:
[4564]962                    newVals.append(self.objectify(utilities.escapeSpecialCharacters(val), attributeName))
[4209]963                attributeValue = newVals
964                   
965            else:
[4564]966                attributeValue = self.objectify(utilities.escapeSpecialCharacters(attributeValue), attributeName)
[4209]967
968        # handle the special case of authors; only one author is allowed per atom
969        # - the others should be treated as contributors
970        if attributeName == "authors":
971            setattr(self, "author", attributeValue[0])
972            if len(attributeValue) > 1:
973                setattr(self, "contributors", attributeValue[1:])
[4240]974        elif attributeName == "atomAuthors":
[4531]975            if isinstance(attributeValue, list):
976                for val in attributeValue:
977                    self.ME.responsibleParties.append(val)
978            else:
979                self.ME.responsibleParties.append(attributeValue)
[4582]980        elif attributeName == "files":
981            self.addUniqueRelatedLinks(attributeValue)
[4209]982        else:
983            setattr(self, attributeName, attributeValue)
984
985
986    def objectify(self, objectVals, attributeName):
987        '''
988        Some inputs are specified as strings but need to be converted into
989        objects - do this here
990        @param objectVals: a '|' delimited string of values
991        @param attributeName: name of attribute the values belong to
992        '''
993        obj = None
994        if type(objectVals) != str:
995            return objectVals
996       
[4347]997        if attributeName == "relatedLinks":
[4209]998            obj = Link()
[4721]999        elif attributeName == "atomAuthors":
1000            obj = Person(personType = Person.RESPONSIBLE_PARTY_TYPE)
1001        elif attributeName == "authors":
[4555]1002            # NB, ensure there is only one author tag - extra authors are contributors
1003            authorType = Person.AUTHOR_TYPE
[4605]1004            if self.author and self.author.hasValue():
[4555]1005                authorType= Person.CONTRIBUTOR_TYPE
1006            obj = Person(personType = authorType)
[4582]1007        elif attributeName == 'files':
1008            obj = Link()
1009            objectVals = '%s|%s|%s' \
1010                %(self.VTD.getTermCurrentVocabURL(VTD.METADATA_SOURCE_TERM), objectVals, VTD.METADATA_SOURCE_TERM)
[4209]1011
1012        if obj:
1013            obj.fromString(objectVals)
[4555]1014            # NB, need to set it now, just in case we don't set it before coming back
[4605]1015            if attributeName == "authors" and (not self.author or not self.author.hasValue()):
[4555]1016                self.author = obj
[4209]1017            return obj
1018       
1019        return objectVals
1020
1021
1022    def toPrettyXML(self):
1023        '''
1024        Returns nicely formatted XML as string
1025        '''
1026        atomXML = self.toXML()
1027
1028        # create the string
1029        logging.debug("Converting the elementtree object into a string")
1030        prettyXML = et2text(atomXML.getroot())
1031
1032        # add XML version tag
1033        prettyXML = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n" + prettyXML
1034        logging.info("Created formatted version of XML object")
1035        return prettyXML
[4301]1036
1037
1038    def getLinksOfType(self, termID):
1039        '''
1040        Returns links in the atom related links attribute which match the specified
1041        term ID
1042        @param termID: the termID to look for in the related links - NB, this is
1043        matched to the end of the link.rel value
1044        @return links: array of Link objects with matching term type
1045        '''
1046        logging.debug("Getting atom links of type, '%s'" %termID)
1047        matchingLinks = []
1048        for link in self.relatedLinks:
[4314]1049            # firstly, handle special case where we only want the online ref type links
1050            # returned
1051            if termID == self.ONLINE_REF_LABEL:
1052                if not link.isChildAtom():
1053                    logging.debug("- found link with matching term type")
1054                    matchingLinks.append(link)
1055               
1056            elif link and link.rel and link.rel.lower().endswith(termID.lower()):
[4301]1057                logging.debug("- found link with matching term type")
1058                matchingLinks.append(link)
[4314]1059               
[4301]1060        logging.debug("Returning matched links")
[4314]1061        return matchingLinks
1062       
[4347]1063       
1064    def getLogos(self):
1065        '''
1066        Return related links that are logos
1067        @return: array of Links containing the logos for the atom
1068        '''
1069        logos = []
1070        for link in self.relatedLinks:
1071            if link.rel.lower().endswith(VTD.LOGO_TERM.lower()):
1072                logos.append(link)
1073               
1074        return logos
1075   
1076   
1077    def isGranule(self):
1078        if self.atomTypeID == VTD.GRANULE_TERM:
1079            return True
1080        return False
1081   
1082   
1083    def isDE(self):
1084        if self.atomTypeID == VTD.DE_TERM:
1085            return True
1086        return False
1087   
1088    def isDeployment(self):
1089        if self.subtypeID and self.subtypeID == VTD.DEPLOYMENT_TERM:
1090            return True
1091        return False
[4444]1092   
1093    def isDeployable(self):
1094        if (self.atomTypeID == VTD.ACTIVITY_TERM and self.subtypeID != VTD.DEPLOYMENT_TERM) or \
1095            self.atomTypeID == VTD.DPT_TERM or \
1096            self.atomTypeID == VTD.OBS_TERM:
1097            return True
1098        return False
[4780]1099   
1100    def isPublished(self):
1101        '''
1102        Check state of atom doc - if published or Published return True,
1103        otherwise return False
1104        '''
1105        return self.state.isPublishedState()
[4419]1106       
[4564]1107       
[4502]1108    def addCSMLData(self, csmlName, csmlContent, aggregateCoverage=False, useCSMLID=False):
[4440]1109        '''
1110        Parse CSML data and add extracted info to the atom
[4502]1111        @param csmlName: name of the csml file
1112        @param csmlContent: content of the csml file - NB, if this is set to None and the
1113        file, csmlName, is available locally, CsmlParser.Dataset will read in the file
1114        directly
[4440]1115        @keyword aggregateCoverage: if set to True, only coverage data that extends the
[4564]1116        atom coverage data will be added
[4502]1117        @keyword useCSMLID: if True, use the CSML doc ID as the dataset ID - NB,
1118        this should only be True if creating a new atom - e.g. from a granulite
1119        @return csmlDoc: the CsmlParser.Dataset object with the csml data in
[4440]1120        '''
1121        logging.info("Creating CSML data model")
[4502]1122        self.csmlFile = csmlName
1123        self.contentFile = csmlName
1124        content = csmlContent or csmlName
[4555]1125   
[4502]1126        csmlDoc = CsmlParser.Dataset(file=content)
[4440]1127       
1128        logging.info("Extracting info from CSML file")
1129        logging.debug("Got dataset ID: %s" %csmlDoc.id)
[4502]1130        if useCSMLID:
1131            logging.debug(" - using this ID for the atom")
1132            self.setDatasetID(VTD.GRANULE_TERM + '_' + csmlDoc.id)
[4440]1133       
1134        title = csmlDoc.name.CONTENT
1135        logging.debug("Got dataset name (title): '%s'" %title)
1136        # NB, if a title is specified (and not as the default value), it automatically is used in
1137        # place of anything in the granulite file
1138        if title and title != "NAME OF DATASET GOES HERE":
1139            logging.info("Title, '%s', extracted from CSML file" %title)
1140            if self.title:
1141                logging.info("- NB, this will override the title specified in the granulite file ('%s')" \
1142                             %self.title)
1143            self.title = title
1144               
1145        bbox1 = csmlDoc.getBoundingBox()
1146        bbox2 = csmlDoc.getCSMLBoundingBox()
[4571]1147
1148        time = None
1149        if bbox2:
1150            time = bbox2.getTimeLimits()
[4419]1151   
[4440]1152        # now check for other parameters to add to granule
1153        # Firstly, extract the bounding envelope
1154        if bbox1:
[4564]1155            w, e = utilities.normaliseLongitude(bbox1[0],bbox1[2])
[4440]1156            n, s = (bbox1[3], bbox1[1])
[4419]1157   
[4440]1158            if not aggregateCoverage or (not self.maxY or float(n) > float(self.maxY)):
1159                self.maxY = n
1160               
1161            if not aggregateCoverage or (not self.minY or float(s) < float(self.minY)):
1162                self.minY = s
1163           
1164            if not aggregateCoverage or (not self.minX or float(w) < float(self.minX)):
1165                self.minX = w
[4419]1166   
[4440]1167            if not aggregateCoverage or (not self.maxX or float(e) > float(self.maxX)):
1168                self.maxX = e
[4419]1169           
[4440]1170            logging.debug("Got bounding box data from file: (%s, %s) , (%s, %s)" \
1171                          %(w, s, e, n))
[4419]1172           
[4440]1173            logging.debug("Updated atom bounding box data: (%s, %s) , (%s, %s)" \
1174                          %(self.minX, self.minY, self.maxX, self.maxY))
1175        else:
1176            logging.debug("No valid bounding box data found")
1177   
1178        if time:
[4564]1179            t1 = utilities.formatDateYYYYMMDD(time[0])
[4440]1180            if not aggregateCoverage or \
1181                (not self.t1 or datetime.datetime.strptime(t1, YEAR_FORMAT) < \
1182                    datetime.datetime.strptime(self.t1, YEAR_FORMAT)):
1183                self.t1 = t1
1184   
1185            t2 = time[1]
1186            if t2 and t2 != 'None':
[4564]1187                t2 = utilities.formatDateYYYYMMDD(t2)
[4440]1188                if not aggregateCoverage or \
1189                    (not self.t2 or datetime.datetime.strptime(t2, YEAR_FORMAT) > \
1190                        datetime.datetime.strptime(self.t2, YEAR_FORMAT)):
1191                    self.t2 = t2
1192           
1193            logging.debug("Got time range: %s -> %s" %(self.t1, self.t2))
1194        else:
1195            logging.debug("No valid time range data found")
1196   
1197        #create parameter summaries:
1198        #set up list to hold the parameters data
1199        parameters = []
1200        for feature in csmlDoc.featureCollection.featureMembers:
1201            if hasattr(feature.parameter, 'href'):
1202                paramTriple = ""
1203                if hasattr(feature, 'description'):
1204                    paramTriple = feature.description.CONTENT
1205                    paramTriple += " | " + feature.parameter.href
1206                   
1207                    term = ""
1208                    if hasattr(feature, 'name'):
1209                        term = feature.name.CONTENT
1210   
1211                    paramTriple += " | " + term
1212                   
1213                    logging.debug("Got parameter info: %s" %paramTriple)
1214                    parameters.append(paramTriple)
[4419]1215       
[4440]1216        # update the atom with the extracted parameters
1217        logging.info("Adding CSML parameters to granule atom")
1218        self.addParameters(parameters)
1219        logging.info("Finished adding CSML data")
[4502]1220        return csmlDoc
[4419]1221
1222
[4531]1223    def lookupAssociatedData(self, type, dr, lookupIndirectReferences=False):
[4440]1224        '''
[4531]1225        Check through the atom links and retrieve any associated data of the
1226        specified type
1227        @param type: type of associated data to lookup - currently VTD.DEPLOYMENT_TERM
1228        or VTD.DE_TERM
[4440]1229        @param dr: Instance of DocumentRetrieve object - NB, this requires eXist
1230        config details which are not available to the Atom object
[4444]1231        @keyword lookupIndirectReferences: if True, the atom ID is used to search
1232        defined deployments to find those which reference it, otherwise only
1233        deployments data featured in the atom related links are processed
[4440]1234        '''
[4531]1235        logging.info("Looking up %s info" %type)
[4535]1236       
[4440]1237        self.allActivities = []
1238        self.allObs = []
1239        self.allDpts = []
[4419]1240
[4531]1241        if type != VTD.DE_TERM and type != VTD.DEPLOYMENT_TERM:
1242            raise ValueError('Unrecognised associated data type: %s' %type)
1243       
1244        # avoid duplicating lookup effort
[4535]1245        if (type == VTD.DEPLOYMENT_TERM and self.deployments) or \
1246            (type == VTD.DE_TERM and self.dataEntities):
[4531]1247            logging.info("- this info has already been looked up - returning")
1248            return
1249
1250        # firstly, collect all the references to the info required
[4444]1251        if lookupIndirectReferences:
[4535]1252            logging.info("Looking up indirect references")
[4531]1253           
1254            # if we're looking up DE data for deployments data, need to have the
1255            # deployments info looked up first
1256            if type == VTD.DE_TERM and self.isDeployable() and not self.deployments:
1257                self.lookupAssociatedData(VTD.DEPLOYMENT_TERM, dr, lookupIndirectReferences)
1258           
1259            logging.info("Looking up references to this atom from other %s" %type)
1260           
[4535]1261            # NB, if we're looking up deployments info, we only look up references
1262            # to this atom - if we're looking up DEs, we need to look up references
1263            # to the deployments referenced by this atom
[4531]1264            urls = [self.atomBrowseURL]
1265           
1266            if type == VTD.DE_TERM and self.isDeployable():
1267                urls = []
1268                for dep in self.deployments:
1269                    urls.append(dep.browseURL)
1270                   
[4444]1271            links = []
[4531]1272            for url in urls:
1273                doc = dr.get(type, dr.ATOM_TYPE, url, \
[4555]1274                             targetCollection = eXistConnector.BASE_COLLECTION_PATH)
[4531]1275                # now need to turn this results set into actual atoms
1276                tree = ET.fromstring(doc)
1277                for atom in tree:
[4535]1278                    logging.debug("- found reference in %s" %type)
[4531]1279                    links.append(ET.tostring(atom))
[4535]1280                   
1281            logging.info("Finished looking up indirect references")
[4444]1282        else:
1283            links = self.getLinksOfType(self.VTD.DEPLOYMENT_TERM)
1284
[4535]1285        # now retrieve the references and extract the required data
1286        logging.info("Retrieving info from %s references" %type)
[4531]1287        if type == VTD.DEPLOYMENT_TERM:
[4696]1288            logging.info("Extracting links data to deployment entitites")
[4535]1289            self.deployments = []
[4531]1290            for link in links:
1291                if lookupIndirectReferences:
1292                    deploymentAtom = link
1293                else:
1294                    localID = link.href.split("__ATOM__")[-1]
1295                    deploymentAtom = dr.get(self.ME.providerID, 'ATOM', localID, \
[4555]1296                                            targetCollection = eXistConnector.BASE_COLLECTION_PATH)
[4531]1297   
1298                deployment = Deployment.Deployment(Atom(xmlString=str(deploymentAtom)))
1299                self.deployments.append(deployment)
1300               
1301                self.addUniqueLinks(self.allActivities, deployment.activities)
1302                self.addUniqueLinks(self.allObs, deployment.obs)
1303                self.addUniqueLinks(self.allDpts, deployment.dpts)
1304        else:
1305            # for DE data, just store the title + link in a Link object
[4535]1306            self.dataEntities = []
[4696]1307            logging.info("Extracting links data to data entitites")
[4531]1308            for data in links:
1309                atom = Atom(xmlString=str(data))
1310                link = Link()
1311                link.title = atom.title
1312                link.href = atom.atomBrowseURL
[4696]1313                link.rel = atom.datasetID
[4535]1314               
1315                # NB, different deployments may be used by the same DE - so
1316                # avoid duplication
1317                self.addUniqueLinks(self.dataEntities, link)
[4444]1318           
[4531]1319        logging.info("Finished looking up %s info" %type)
[4444]1320
1321
1322    def addUniqueLinks(self, dataArray, links):
1323        '''
1324        Add links to specified array - if they are not already included
1325        @param dataArray: a list, potentially arlready containing links
1326        @param links: a Link or array of Links to add to the dataArray
1327        '''
1328        logging.debug("Adding new links")
1329        if not links:
1330            return
1331       
1332        if type(links) is not list:
1333            links = [links]
1334       
1335        for link in links:
1336            if type(link) is not Link:
1337                logging.warning("Link is not of 'Link' object type (type='%s') - skipping" %type(link))
1338                continue
1339            if link not in dataArray:
1340                logging.debug("- adding unique link")
1341                dataArray.append(link)
[4780]1342        logging.debug("Finished adding links")
1343
1344       
1345    def getFullPath(self):
1346        '''
1347        Return full path to atom in eXist, if it exists, or None, otherwise
1348        @return fullPath: string - collection + filename of atom in eXist
1349        '''
1350        # NB, name assigned when atom created in eXist - so if not defined, not
1351        # in eXist
1352        logging.debug("Getting full path to atom")
1353        if self.atomName:
1354            logging.debug("Return full path to atom in eXist")
1355            return self.getDefaultCollectionPath() + self.atomName
1356        logging.debug("Atom doesn't currently exist in eXist - return 'None'")
1357        return None
Note: See TracBrowser for help on using the repository browser.