source: ndgCommon/trunk/ndg/common/src/dal/ndgDirectory.py @ 4988

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/dal/ndgDirectory.py@4988
Revision 4988, 4.0 KB checked in by cbyrom, 11 years ago (diff)

Refactor ndgRetrieve making much more structured + turning into an
object to allow simple re-use + improve documentation and logging +
update unit tests.

Line 
1'''
2 Treats an NDG directory as something that can be counted,listed, and dated
3 
4 @author: B Lawrence?
5'''
6from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient
7from xml.etree import ElementTree as ET
8
9
10class ndgDirectory:
11    ''' Treats an NDG directory as something that can be counted,listed, and dated '''
12    def __init__(self,targetCollection,existHost,docType='DIF',fast=0,
13                 passwordFile=None):
14        ''' Initialise a targetCollection directory of a particular docType.
15        Valid docTypes are:
16            DIF, MDIP, ATOM, MOLES
17            (Not all supported ...yet )
18            Note that if fast is set true, the members attribute is not populated
19            at instantiation.
20        '''
21       
22        self.members=[]
23        self.entries=[]
24        self.lastUpdated=None
25        self.targetCollection=targetCollection
26        self.docType = docType
27       
28        inputs = {}
29        if passwordFile:
30            inputs['passwordFile'] = passwordFile
31           
32        #get the right namespace
33        self.ns={'DIF':'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/',\
34                 'MOLES':'http://ndg.nerc.ac.uk/moles',\
35                 'MDIP':'http://www.oceannet.org/mdip/xml',\
36                 'ATOM': 'http://www.w3.org/2005/Atom'}[docType]
37       
38        #make sure we have a db available
39        self.searchClient = SearchClient(dbHostName = existHost, 
40                                         configFileName = passwordFile)
41
42        #select the right query according to the docType
43        xqName={'DIF':'DIFList','MOLES':'molesList', \
44                'MDIP':'MDIPList', 'moles':'molesList', \
45                'ATOM': 'atomList'}[docType]
46        self.docType=docType
47        xq = self.searchClient.resources.xq[xqName]
48        # set the query up with the correct target collection
49        self.xq=xq.replace('TargetCollection',self.targetCollection)
50     
51        if not fast: 
52            m=self.getMembers()
53
54       
55    def getMembers(self):
56        '''
57        Build the membership of the directory by URI (based on the FILENAME)
58        '''
59        doc = self.searchClient.client.runQuery(self.xq)
60        if not doc:
61            return []
62
63        et = ET.fromstring(doc[0])
64        for member in et:
65            fn = member.findtext('{%s}fileName'%self.ns)
66            eid = member.findtext('{%s}repositoryID'%self.ns)
67            c = member.findtext('{%s}created'%self.ns)
68            self.members.append({'fileName':fn,'EntryID':eid,'created':c})
69       
70        self.files=[i['fileName'] for i in self.members]
71        self.entries=[i['EntryID'] for i in self.members]
72        return self.members
73
74   
75    def get(self, fileName):
76        ''' Return the actual document corresponding to uri in this directory '''
77        #Use the fact that we know its document name ...
78        # NB, atoms have a more structured collection directory - as a result
79        # AtomList.xq returns the full path to the listed docs - so avoid
80        # concat'ing the collectionName for these
81        if fileName.startswith(self.targetCollection):
82            name = fileName
83        else:
84            name='%s/%s'%(self.targetCollection, fileName)
85
86        results = self.searchClient.getDocs([name])
87        return results[name]
88
89   
90    def created(self,fileName=None):
91        ''' Return the creation date of the collection (if uri is None), or
92        of the specific member within the collection '''
93        q="'%s'"%self.targetCollection
94        if fileName:
95            # if we're dealing with atom docs, these include the full collection
96            # path already - so strip this off
97            if fileName.startswith(self.targetCollection):
98                fileBits = fileName.split('/')
99                fileName = fileBits[-1]
100                tc = '/' + '/'.join(fileBits[0:-1])
101                q = "'%s', '%s'" %(tc, fileName)
102            else:
103                q+=",'%s'"%fileName
104        xq="<created>{xmldb:created(%s)}</created>"%q
105        r = self.searchClient.client.runQuery(xq)[0]
106        e=ET.fromstring(str(r))
107        return e.text
108   
Note: See TracBrowser for help on using the repository browser.