1 | ''' |
---|
2 | Extend eXistInterface class - to add document retrieval functionality |
---|
3 | |
---|
4 | @author: B Lawrence? |
---|
5 | ''' |
---|
6 | from ndg.common.src.clients.xmldb.eXist.eXistInterface import ndg_eXist |
---|
7 | from ndg.common.src.lib.ndgXqueries import ndgXqueries |
---|
8 | from xml.etree import ElementTree as ET |
---|
9 | import urllib2, logging, socket |
---|
10 | |
---|
11 | |
---|
12 | def httpify(url): |
---|
13 | ''' |
---|
14 | Ensure a url has an http prefix |
---|
15 | ''' |
---|
16 | if url[0:4] != 'http': |
---|
17 | url = 'http://' + url |
---|
18 | return url |
---|
19 | |
---|
20 | |
---|
21 | class DocumentRetrieve(ndg_eXist): |
---|
22 | ''' |
---|
23 | This class provides a document retrieval service via the NDG exist interfaces |
---|
24 | ''' |
---|
25 | |
---|
26 | ATOM_TYPE = 'ATOM-TYPE' |
---|
27 | ATOM_BACKUP_TYPE = 'ATOM-BACKUP' |
---|
28 | ATOM = 'ATOM' |
---|
29 | def __init__(self, repository, pwfile='passwords.txt'): |
---|
30 | logging.info("Using repository, '%s'" %repository) |
---|
31 | |
---|
32 | ndg_eXist.__init__(self, db=repository, passwordFile=pwfile) |
---|
33 | logging.info("DB connection initialised") |
---|
34 | self.repository=repository |
---|
35 | self.xq=ndgXqueries() |
---|
36 | self.knownQueries={'DIF':'moles2dif','DC':'moles2DC','ISO19139':'moles2iso19139', \ |
---|
37 | 'NDG-B0':'moles','NDG-B1':'molesObjectType','MDIP':'moles2mdip', \ |
---|
38 | 'NDG-A0':'csml','NumSim':'numsim', self.ATOM:'atom', \ |
---|
39 | self.ATOM_BACKUP_TYPE:'atom', self.ATOM_TYPE:'atomTypeList'} |
---|
40 | |
---|
41 | def _retrieveDoc(self, schema, xqtype, targetCollection, repository, localID): |
---|
42 | ''' |
---|
43 | Retrieve doc using specified XQuery type |
---|
44 | @return: docName, docContents |
---|
45 | ''' |
---|
46 | logging.debug("Retrieving doc - type, '%s', coll, '%s', rep:'%s', localID:'%s'" \ |
---|
47 | %(xqtype,targetCollection,repository,localID)) |
---|
48 | xquery=self.xq.actual(xqtype,targetCollection,repository,localID) |
---|
49 | |
---|
50 | id,summary=self.executeQuery(xquery) |
---|
51 | # NB, backups will inevitably return lots of docs - only retrieve the top one |
---|
52 | # for the moment - since this is not really needed atm |
---|
53 | if summary['hits'] != 1 and schema != self.ATOM_BACKUP_TYPE: |
---|
54 | raise ValueError('Unable to obtain single %s document [%s] (hits=%s)'\ |
---|
55 | %(schema,localID,summary['hits'])) |
---|
56 | |
---|
57 | docName = summary['documents'][0][0] |
---|
58 | r=self.retrieve(id,0,{}) |
---|
59 | self.sessionRelease(id) |
---|
60 | return docName, r |
---|
61 | |
---|
62 | |
---|
63 | def get(self,repository,schema,localID,targetCollection='/db/discovery/moles', \ |
---|
64 | includeDocNameData=False): |
---|
65 | ''' |
---|
66 | @keyword includeDocNameData: if True, a dictionary is returned, instead of the dataset, with the key |
---|
67 | being the name of the document and the entry being the dataset |
---|
68 | ''' |
---|
69 | logging.debug("Get called with rep:'%s', schema:'%s', localID:'%s', collection:'%s'" \ |
---|
70 | %(repository,schema,localID,targetCollection)) |
---|
71 | docName = "" |
---|
72 | if schema not in self.knownQueries: |
---|
73 | raise TypeError('Unknown Schema "%s" in URI'%schema) |
---|
74 | |
---|
75 | xqtype=self.knownQueries[schema] |
---|
76 | |
---|
77 | if schema == 'NDG-B1': |
---|
78 | # this is a general moles object - so need to further establish the type of moles doc |
---|
79 | # it is to get the correct XQUery to use |
---|
80 | name, xml = self._retrieveDoc(schema, xqtype, targetCollection, \ |
---|
81 | repository, localID) |
---|
82 | xml=ET.fromstring(xml) |
---|
83 | otype=int(xml.text or 0) |
---|
84 | |
---|
85 | xqtype={4:'stubB_dataEntity', |
---|
86 | 3:'stubB_observationStation', |
---|
87 | 2:'stubB_DPT', |
---|
88 | 1:'stubB_activity'}[otype] |
---|
89 | |
---|
90 | docName, r = self._retrieveDoc(schema, xqtype, targetCollection, |
---|
91 | repository, localID) |
---|
92 | if includeDocNameData: |
---|
93 | return {docName: r} |
---|
94 | return r |
---|
95 | |
---|
96 | |
---|
97 | def error(self,string,t,r,s,l): |
---|
98 | raise ValueError,string+' for %s:%s:%s in %s'%(r,s,l,t) |
---|
99 | |
---|
100 | |
---|
101 | class genericHTTP(object): |
---|
102 | ''' Provides a generic HTTP request class ''' |
---|
103 | def __init__(self,proxyServer=None): |
---|
104 | if proxyServer is None: |
---|
105 | proxyHandler=urllib2.ProxyHandler({}) |
---|
106 | else: |
---|
107 | proxy=httpify(proxyServer) |
---|
108 | proxyHandler=urllib2.ProxyHandler({'http':proxy}) |
---|
109 | self.opener=urllib2.build_opener(proxyHandler) |
---|
110 | |
---|
111 | def get(self,url): |
---|
112 | url = httpify(url) |
---|
113 | request=urllib2.Request(url) |
---|
114 | logging.info("Getting data from url: %s" %url) |
---|
115 | response='Cannot obtain remote file: ' |
---|
116 | try: |
---|
117 | f = self.opener.open(request) |
---|
118 | response='' |
---|
119 | except urllib2.URLError,e: |
---|
120 | if hasattr(e,'reason'): |
---|
121 | response+='No access to server [%s]'%e.reason |
---|
122 | elif hasattr(e,'code'): |
---|
123 | response+='Response code [%s]'%e.code |
---|
124 | except socket.error: |
---|
125 | response+='Network Socket problem' |
---|
126 | except Exception,e: |
---|
127 | response+='[%s]'%str(e) |
---|
128 | |
---|
129 | if response=='': |
---|
130 | return f.read() |
---|
131 | else: |
---|
132 | raise IOError(response) |
---|
133 | |
---|
134 | |
---|
135 | class ndgHTTP(genericHTTP): |
---|
136 | ''' Provides a get method to obtain an xml document from a remote NDG repository ''' |
---|
137 | def __init__(self,remoteHost,proxyServer=None): |
---|
138 | self.remoteHost=remoteHost |
---|
139 | genericHTTP.__init__(self,proxyServer) |
---|
140 | |
---|
141 | def uriget(self,uri): |
---|
142 | # NB, having this import at the module level can cause problems |
---|
143 | # with resolving imports when using this module - e.g. from the |
---|
144 | # ndgObject level |
---|
145 | from ndg.common.src.models.ndgObject import ndgObject |
---|
146 | n = ndgObject(uri) |
---|
147 | return self.get(n.repository,n.schema,n.localID) |
---|
148 | |
---|
149 | def get(self,repository,schema,localID,**kw): |
---|
150 | ''' Return a remote ndg document ''' |
---|
151 | #TODO what about security? Probably means we need to get the headers of our responses sorted ... |
---|
152 | url='%s/retrieve/%s__%s__%s'%(self.remoteHost,repository,schema,localID) |
---|
153 | return genericHTTP.get(self,url) |
---|
154 | |
---|
155 | def setSecurity(self,location,usercode,password): |
---|
156 | ''' Use a usercode password to set security credentials at a specific location ''' |
---|
157 | pass |
---|
158 | |
---|
159 | |
---|
160 | class ndgVocabPOX(genericHTTP): |
---|
161 | ''' Provides a POX interface to the vocab server ''' |
---|
162 | def __init__(self,path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/",proxyServer=None): |
---|
163 | genericHTTP.__init__(self,proxyServer) |
---|
164 | self.path="http://vocab.ndg.nerc.ac.uk/axis2/services/vocab/" |
---|
165 | self.ns="http://vocab.ndg.nerc.ac.uk/" |
---|
166 | def getRelated(self,subject): |
---|
167 | ''' Get a related record ''' |
---|
168 | url='%sgetRelatedRecordByCriteria?subjectText=%s&predicate=255&inferences=True&objectList=%slist/P211/current'%(self.path,subject,self.ns) |
---|
169 | self.url=url |
---|
170 | doc=genericHTTP.get(self,url) |
---|
171 | x=ET.fromstring(doc) |
---|
172 | b=x.findall('*/{urn:vocab/types}broadMatch') |
---|
173 | n=x.findall('*/{urn:vocab/types}narrowMatch') |
---|
174 | s=x.findall('*/{urn:vocab/types}exactMatch') |
---|
175 | self.broader=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in b] |
---|
176 | self.narrower=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in n] |
---|
177 | self.synonyms=[(i.find('{urn:vocab/types}entryTerm').text or '') for i in s] |
---|
178 | return [self.broader,self.narrower,self.synonyms] |
---|
179 | |
---|