Changeset 3147 for exist/trunk


Ignore:
Timestamp:
14/12/07 14:59:41 (12 years ago)
Author:
lawrence
Message:

eggable version including elementtree 1.3

Location:
exist/trunk/python/ndgUtils
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • exist/trunk/python/ndgUtils/__init__.py

    r3110 r3147  
     1''' The ndgUtils package provides utilities for  
     2 - manipulating metadata held in eXist databases, 
     3 - manipulating xml metadata content,  
     4 - accessing a remote ndgSearch web service 
     5 ''' 
     6 # These are the things we want automatically loaded ... 
     7from ndgRetrieve import ndgRetrieve 
     8from DocumentRetrieve import DocumentRetrieve 
     9import BeautifulSoup 
     10from xmlHandler2 import xmlHandler 
     11from ndgXqueries import ndgXqueries 
     12from ndgObject import ndgObject 
     13from ndgSearch import ndgSearch 
  • exist/trunk/python/ndgUtils/ndgObject.py

    r3127 r3147  
    1 from ndgRetrieve import ndgRetrieve 
     1from ndgUtils.ndgRetrieve import ndgRetrieve 
    22class ndgObject: 
    33    ''' This class instantiates an ndgObject which describes the various ways of obtaining itself, primarily 
  • exist/trunk/python/ndgUtils/ndgRetrieve.py

    r3127 r3147  
    1 import DocumentRetrieve, ndgObject, xmlHandler2 
     1import DocumentRetrieve 
     2from ndgSearch import ndgSearch 
    23import cgi,time 
    34 
    4 def ndgRetrieve(uri,config,logger=0,output='',remote=0): 
     5def ndgRetrieve(uri,config,logger=0,output='',remote=0,discovery=0): 
    56     
    6     ''' Given an ndgObject, uri, retrieve it, with security in place. 
     7    ''' Given an uri (an instance of ndgObject), retrieve it, with security in place. 
    78            If logger is non-zero it should be a python log class 
    8             If output is not '' it should be desired output schema (normally 
    9         to be used for discovery documents in a variety of outputs or 
    10         to indicate that an original document is required. 
     9            If output is not '' it should be desired output schema! 
    1110            If remote is non zero, then attempt to get the document 
    12         from a remote site via a (secured) restful http call''' 
     11        from a remote exist site via a (secured) restful http call 
     12            If discovery is non-zero AND the schema is in  
     13                [DIF,MDIP,or ISO] 
     14        then use the ndgSearch interface to obtain the document rather 
     15        than direct from an eXist database.''' 
    1316     
    1417    def getws(config,uri,remote): 
     
    2023        #    actual exist repository er. 
    2124        #    I'm sure this can be cleaned up further :-) 
    22         #try: 
    23         r=config.get('NDG_B_SERVICE',uri.repository) 
    24         sr=config.get('DEFAULT','repository') 
    25         er=config.get('NDG_EXIST',uri.repository) 
    26         #except: 
    27         #    raise ValueError,'Config file not available or is incomplete' 
    28         if r<> sr: 
    29             if not remote: 
    30                 return 0,'The uri [%s] is not available on [%s]'%(uri,server) 
     25        if remote and discovery: 
     26            raise ValueError, 'Invalid options to ndgRetrieve (remote AND discovery impossible)' 
     27        if discovery: 
     28            if uri.schema in ['ISO','DIF','MDIP']: 
     29                ws=ndgSearch() 
    3130            else: 
    32                 ps=config.get('DEFAULT','proxyServer') 
    33                 ws=DocumentRetrieve.ndgHTTP(r,proxyServer=ps) 
     31                raise ValueError,'Invalid schema (%s) with discovery option in ndgRetrieve'%uri.schema 
    3432        else: 
    35             pwf=config.get('NDG_EXIST','passwordFile') 
    36             ws=DocumentRetrieve.DocumentRetrieve(er,pwfile=pwf) 
     33            try: 
     34                r=config.get('NDG_B_SERVICE',uri.repository) 
     35                sr=config.get('DEFAULT','repository') 
     36            except: 
     37                raise ValueError,"Config file not available or incomplete" 
     38            if r<> sr: 
     39                if not remote: 
     40                    return 0,'The uri [%s] is not available on [%s]'%(uri,server) 
     41                else: 
     42                    try: 
     43                        ps=config.get('DEFAULT','proxyServer') 
     44                    except: 
     45                        raise ValueError,'Config file not available or has no [proxyServer]' 
     46                    ws=DocumentRetrieve.ndgHTTP(r,proxyServer=ps) 
     47            else: 
     48                try: 
     49                    er=config.get('NDG_EXIST',uri.repository) 
     50                except: 
     51                    raise ValueError,'Config file not available or has no Exist repository' 
     52                pwf=config.get('NDG_EXIST','passwordFile') 
     53                ws=DocumentRetrieve.DocumentRetrieve(er,pwfile=pwf) 
    3754        return 1,ws 
    3855     
    39     try: 
    40         server=config.get('DEFAULT','server') 
    41     except: 
    42         raise ValueError('Config file not available or does not include "server" entry!') 
    4356    status,ws=getws(config,uri,remote) 
    4457   
     
    7285        return 0,'Document retrieve[%s] failed [%s]'%(uri,cgi.escape(e)) 
    7386     
    74     try: 
    75         x=xmlHandler2.xmlHandler(r,string=1) 
    76         return 1,x 
    77     except Exception,e: 
    78         logString='Document parse for [%s] failed [%s]'%(uri,e) 
    79         if logger: logger.info(logString) 
    80         return 0,'''<p>Unable to parse documment [%s]<br/>  
    81                     Reason was [%s]</p>'''%(uri,cgi.escape(str(e))) 
     87    # we used to return an xmlHandler instance, but we don't do that any more ... 
     88    return 1,r 
    8289     
  • exist/trunk/python/ndgUtils/ndgSearch.py

    r3127 r3147  
    2323      fromString = staticmethod(expatbuilder.parseString) 
    2424      fromStream = staticmethod(expatbuilder.parse) 
    25   
     25# 
     26# Locator (this mimics the code which lives in the wsdl2py generated DiscoveryService_services, 
     27# but explicitly allowsa different port address) ... 
     28# 
     29 
     30default_HostAndPort="http://glue.badc.rl.ac.uk:8080/axis2/services/DiscoveryService" 
    2631 
    2732class ndgSearch: 
    2833    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl ''' 
    2934     
    30     def __init__(self,logger=None,tracefile=None): 
    31         '''get an instance of the service''' 
    32         #how do we get a different backend provider? 
     35    def __init__(self,logger=None,tracefile=None,HostAndPort=default_HostAndPort): 
     36        '''Get an instance of the NDG Discovery Service.  
     37            Optional Keywords are:  
     38                logger -  a python logging instance, for collecting stats 
     39                tracefile - for collecting debug output 
     40                HostAndPort - the location of the SOAP web service (default is ndg Glue) 
     41        ''' 
     42        #Get a different backend provider via the url argument 
    3343        loc=DiscoveryServiceLocator() 
    34         #self.server=loc.getDiscovery(readerclass=ExpatReaderClass, 
    35         self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile) 
     44        self.server=loc.getDiscoveryServicePortType(url=default_HostAndPort, 
     45                                    readerclass=ExpatReaderClass,tracefile=tracefile) 
    3646        self.serverSessionID=None 
    3747        self.logger=logger 
     48        if self.logger:  
     49            logger.info('Discovery web service connection to %s'%default_HostAndPort) 
    3850        self.__reset() 
    3951         
  • exist/trunk/python/ndgUtils/test_ndgRetrieve.py

    r3127 r3147  
    11import ndgObject 
    22from ndgRetrieve import ndgRetrieve 
     3from xmlHandler2 import xmlHandler 
    34import ConfigParser 
    45import unittest 
     
    2122        status,xml=ndgRetrieve(uri,self.c) 
    2223        if not status: raise ValueError(xml) 
    23         self.assertEqual(xml.tree.tag,'dgMetadataRecord') 
     24        x=xmlHandler(xml,string=1) 
     25        self.assertEqual(x.tree.tag,'dgMetadataRecord') 
    2426         
    2527    def testNoB(self): 
     
    3133         
    3234    def testXMLdif(self): 
    33         ''' Make sure we can encode and decode at least one DIF properly ''' 
     35        ''' Make sure we can recall at least one DIF properly ''' 
    3436        doc='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP' 
    3537        doc='badc.nerc.ac.uk__DIF__dataent_11738019833217179' 
  • exist/trunk/python/ndgUtils/test_xqueries.py

    r3128 r3147  
    1010import ConfigParser 
    1111from xmlrpclib import Fault 
     12from ndgUtils.elementtree import ElementTree as ET 
    1213 
    1314class TestCase(unittest.TestCase): 
    1415     
    1516    def setUp(self):  
     17        ''' Automatically run before test cases ''' 
    1618        f='test.config' 
    1719        self.c=ConfigParser.ConfigParser() 
    1820        self.c.read(f) 
    1921        self.xq=ndgXqueries() 
    20         self.xmldb=ndg_eXist(db='glue.badc.rl.ac.uk') 
     22        self.xmldb1=ndg_eXist(db='glue.badc.rl.ac.uk') 
     23        self.xmldb2=ndg_eXist(db='chinook.badc.rl.ac.uk') 
    2124        self.testDElist=['badc.nerc.ac.uk__NDG-B1__dataent_COAPEC'] 
    2225     
    23     def _getmoles(uri,targetCollection): 
     26    def _logToFile(self,fname,xml): 
     27        ''' Writes some xml to a log file ''' 
     28        f=file(fname,'w') 
     29        f.write(xml) 
     30        f.close() 
     31     
     32    def _getmoles(self,uri,targetCollection,db): 
    2433        ''' Returns a moles document by URI from targetCollection ''' 
    2534        n=ndgObject(uri) 
    2635        xq=self.xq.actual('moles',targetCollection,n.repository,n.localID) 
    27         id,s=self.xmldb.executeQuery(xq) 
    28         self.assertEqual(s['hits'],1,'Unable to get unique %s'%uri) 
    29         return self.retrieve(id,1) 
     36        id,s=db.executeQuery(xq) 
     37        self.assertEqual(s['hits'],1,'Unable to obtain from database the document for unique id %s (hits=%s)'%(uri,s['hits'])) 
     38        return db.retrieve(id,0) 
    3039         
    3140    def test_DIFlist(self): 
     
    3342        xq=self.xq['DIFList'] 
    3443        xq=xq.replace('TargetCollection','/db/discovery/original/DIF') 
    35         r=self.xmldb.executeChunkedQuery(xq,1,1) 
     44        r=self.xmldb1.executeChunkedQuery(xq,1,1) 
    3645        print r 
     46         
     47    def test_molesList(self,dtype=None): 
     48        ''' Test the MOLES listing query on chinook ''' 
     49         
     50        def _getType(r): 
     51            moles='{http://ndg.nerc.ac.uk/moles}' 
     52            tree=ET.fromstring(str(r)) 
     53            rid=tree.findtext('*/%srepositoryIdentifier'%moles) 
     54            lid=tree.findtext('*/%slocalIdentifier'%moles) 
     55            xq=self.xq.actual('molesObjectType',tc,rid,lid) 
     56            r=self.xmldb2.executeChunkedQuery(xq,1,1) 
     57            tree=ET.fromstring(str(r)) 
     58            return int(tree.findtext('%sobjectType'%moles)) 
     59         
     60        xq=self.xq['molesList'] 
     61        tc='/db/ndg_B_metadata/badc.nerc.ac.uk' 
     62        moles='{http://ndg.nerc.ac.uk/moles}' 
     63        xq=xq.replace('TargetCollection',tc) 
     64        #this returns the first one of  
     65        r=self.xmldb2.executeChunkedQuery(xq,1,1) 
     66        #make sure it has got a dgDataEntity record (that's the default) 
     67        ot=_getType(r) 
     68        self.assertEqual(ot,4,'Data Entity should be 4 (is %s)'%ot) 
     69        # now let's convince ourselves that another sort of 
     70        # listing query should work, start by constructing the query 
     71        xq=self.xq['molesList'] 
     72        xq=xq.replace('TargetCollection',tc) 
     73        xq=xq.replace('dgDataEntity','dgActivity') 
     74        # and deliver it 
     75        r=self.xmldb2.executeChunkedQuery(xq,1,1) 
     76        #and make sure it has the right type of entity 
     77        ot=_getType(r) 
     78        self.assertEqual(ot,1,'Activity should be 1 (is %s)'%ot) 
     79         
     80    def test_moles2dif(self): 
     81        ''' Test the creation of a DIF file from rich moles content ''' 
     82        uri=self.testDElist[0] 
     83        uri=uri.replace('NDG-B1','NDG-B0') 
     84        n=ndgObject(uri) 
     85        targetCollection='/db/ndg_B_metadata/%s'%n.repository 
     86         
     87        #check it's there ... 
     88        xml=self._getmoles(uri,targetCollection,self.xmldb2) 
     89         
     90        #ok, now let's try a moles2dif xquery 
     91        xquery=self.xq.actual('moles2dif',targetCollection,n.repository,n.localID) 
     92        id,s=self.xmldb2.executeQuery(xquery) 
     93        print '%s hit in %s ms'%(s['hits'],s['queryTime'])  
     94        self.assertEqual(1,s['hits'],'Should have had one dif document returned!') 
     95        dif_from_moles=self.xmldb2.retrieve(id,0) 
     96        self._logToFile('dif_from_moles.xml',dif_from_moles) 
     97         
    3798 
    38     def test_moles2difRoundTrip(self): 
    39         ''' Tests the moles 2 dif round tripping ''' 
    40         # start by choosing an existing NDG moles data entity 
     99    def atest_dif2molesRoundTrip(self): 
     100        # Actually we now know that the dif2moles code was very lightweight and doesn't 
     101        # do much, so expectations of round tripping have been given up ... 
     102        ''' Tests the dif 2 moles round tripping. Note that an issue is that the  
     103        personnel and organisation material gets handled oddly ''' 
     104         
     105        # start by choosing an original DIF  
    41106        moles1=self.testDElist[0] 
    42107        dif1=moles1.replace('NDG-B1','DIF') 
     
    46111        targetCollection='/db/discovery/original/DIF/%s'%n.repository 
    47112        difName=n.uri+'.xml' 
    48         dif1_original=self.xmldb.getDoc(targetCollection,difName) 
     113        dif1_original=self.xmldb1.getDoc(targetCollection,difName) 
    49114        self.assertEqual(dif1_original[0:4],'<DIF','unable to load raw dif file %s'%difName) 
    50115         
     
    58123         
    59124        # and deliver the xquery ... 
    60         id,s=self.xmldb.executeQuery(xquery) 
     125        id,s=self.xmldb1.executeQuery(xquery) 
    61126        self.assertEqual(1,s['hits'],'Should have had one moles document returned!') 
    62127        moles_from_dif1=self.xmldb.retrieve(id,0) 
    63         self.xmldb.release(id) 
     128        self.xmldb1.release(id) 
    64129         
    65130        # now load it back into exist 
     
    67132        # but first check the dummyCollection doesn't exist, and if it does, get rid of it 
    68133        xq='''for $s in xmldb:get-child-collections('/db') return <collection>/db/{$s}</collection>''' 
    69         id,s=self.xmldb.executeQuery(xq) 
    70         collections=[self.xmldb.retrieveNext(id,i) for i in range(s['hits'])] 
    71         self.xmldb.release(id) 
     134        id,s=self.xmldb1.executeQuery(xq) 
     135        collections=[self.xmldb1.retrieveNext(id,i) for i in range(s['hits'])] 
     136        self.xmldb1.release(id) 
    72137        if '<collection>%s</collection>'%dummyCollection in collections:  
    73             ok=self.xmldb.removeCollection(dummyCollection) 
     138            ok=self.xmldb1.removeCollection(dummyCollection) 
    74139            self.assertEqual(True,ok,'We needed to delete the scratch collection!') 
    75140        #ok, now we can goahed and create the collection and stuff the file into it 
    76         ok=self.xmldb.createCollection(dummyCollection) 
     141        ok=self.xmldb1.createCollection(dummyCollection) 
    77142        self.assertEqual(True,ok,'Unable to createCollection') 
    78143        moles2=ndgObject(n.uri.replace('DIF','NDG-B0')) 
    79144        dummyFile='%s/%s'%(dummyCollection,moles2.uri) 
    80         ok=self.xmldb.storeXML(moles_from_dif1,dummyFile) 
     145        ok=self.xmldb1.storeXML(moles_from_dif1,dummyFile) 
    81146        self.assertEqual(True,ok,'Unable to upload xml file') 
    82147         
    83148        # now let's see what we get for a dif back from that moles document! 
    84149        xquery=self.xq.actual('moles2dif',dummyCollection,moles2.repository,moles2.localID) 
    85         id,s=self.xmldb.executeQuery(xquery) 
     150        id,s=self.xmldb1.executeQuery(xquery) 
    86151        self.assertEqual(1,s['hits'],'Should have had one dif document returned!') 
    87152        dif1_from_moles=self.xmldb.retrieve(id,0) 
    88153         
    89         #are they the same? 
    90         self.assertEqual(dif1_from_moles,dif1_original) 
    91          
    92          
    93       
     154        # are they the same? well, there is no guarantee that they should be 
     155        # in the same order if the schema used xsd:any rather than xsd:sequence ... 
     156        # so this simple test would fail  
     157        # self.assertEqual(dif1_from_moles,dif1_original) 
     158 
     159        self._logToFile('original_dif.xml',dif1_original) 
     160        self._logToFile('moles_dif.xml',dif1_from_moles) 
     161 
     162 
    94163if __name__=="__main__": 
    95164    unittest.main() 
Note: See TracChangeset for help on using the changeset viewer.