1 | """ |
---|
2 | Demonstration handle dump for CMIP/ESGF files .. |
---|
3 | |
---|
4 | -h: print this message; |
---|
5 | -v: print version; |
---|
6 | -t: run a test; |
---|
7 | -f <file name>: examine file, print path to replacement if this file is obsolete, print path to sibling files (or replacements). |
---|
8 | -id <tracking id>: examine handle record of tracking id. |
---|
9 | """ |
---|
10 | ## see https://www.handle.net/proxy_servlet.html for info on restfull API |
---|
11 | |
---|
12 | import string, collections, os |
---|
13 | import ncq3 |
---|
14 | import xml |
---|
15 | from xml.dom import minidom |
---|
16 | |
---|
17 | try: |
---|
18 | import urllib |
---|
19 | from urllib import request |
---|
20 | except: |
---|
21 | import requests |
---|
22 | |
---|
23 | from testdata import * |
---|
24 | |
---|
25 | class phandle(object): |
---|
26 | |
---|
27 | def __init__(self, hdlDict, k='values'): |
---|
28 | """Obsolete class to parse handle metadat ... replaced by pyhandle class""" |
---|
29 | self.h = hdlDict |
---|
30 | self.d = {} |
---|
31 | try: |
---|
32 | for r in hdlDict[k]: |
---|
33 | self.d[r['type']] = r['data'] |
---|
34 | except: |
---|
35 | print ( hdlDict[k] ) |
---|
36 | raise |
---|
37 | |
---|
38 | class ghandle(object): |
---|
39 | htmpl = 'http://hdl.handle.net/api/handles/%s' |
---|
40 | dh = dummyHandles() |
---|
41 | def __init__(self,hdl,url=None): |
---|
42 | """Class to retrieve a handle .. optionally to retrieve from test data. |
---|
43 | Still needs some error handling based on 'responseCode'.""" |
---|
44 | |
---|
45 | if hdl[:5] == 'xxxxx': |
---|
46 | self.msg = self.dh.hh[hdl] |
---|
47 | else: |
---|
48 | if url == None: |
---|
49 | thisid = string.replace(hdl,'hdl:999999', '10876.test' ) |
---|
50 | url = self.htmpl % thisid |
---|
51 | self.fetch(url) |
---|
52 | |
---|
53 | def fetch(self,url): |
---|
54 | |
---|
55 | try: |
---|
56 | fh = request.urlopen( url ) |
---|
57 | msg = eval( fh.read() ) |
---|
58 | except: |
---|
59 | r = requests.get( url ) |
---|
60 | msg = r.json() |
---|
61 | assert type( msg ) == type( {} ), 'Response of wrong type' |
---|
62 | for k in ['responseCode', 'handle']: |
---|
63 | assert k in msg, 'Required key %s not found: %s' % (k, str( msg.keys() ) ) |
---|
64 | |
---|
65 | self.msg = msg |
---|
66 | |
---|
67 | class GHandles(object): |
---|
68 | def __init__(self): |
---|
69 | """Class to manage a local cache of retrieved handles, to avoid repeat http requests""" |
---|
70 | self.hh = {} |
---|
71 | def __call__(self,hdl): |
---|
72 | if hdl not in self.hh: |
---|
73 | self.hh[hdl] = ghandle(hdl) |
---|
74 | return self.hh[hdl] |
---|
75 | |
---|
76 | |
---|
77 | class pyhandle(object): |
---|
78 | def __init__(self,id): |
---|
79 | self.REC_id = id |
---|
80 | self.REC_got = False |
---|
81 | |
---|
82 | def get(self): |
---|
83 | if self.REC_got: |
---|
84 | return |
---|
85 | |
---|
86 | g = ghandles( self.REC_id ) |
---|
87 | for r in g.msg['values']: |
---|
88 | if str(r['type']) in ['replaces', 'replacedBy', 'isReplacedBy','parent','replaced_by']: |
---|
89 | self.__dict__[r['type']] = pyhandle( r['data']['value'] ) |
---|
90 | |
---|
91 | elif r['type'] == 'children': |
---|
92 | cl = eval( r['data']['value'] ) |
---|
93 | self.__dict__['children'] = [pyhandle(c) for c in cl] |
---|
94 | |
---|
95 | else: |
---|
96 | self.__dict__[r['type']] = r['data']['value'] |
---|
97 | |
---|
98 | self.obsolete = 'replaced_by' in self.__dict__ |
---|
99 | if 'URL' in self.__dict__: |
---|
100 | fn = string.split( self.__dict__['URL'], '/' )[-1] |
---|
101 | self.__dict__['filename'] = fn |
---|
102 | else: |
---|
103 | self.__dict__['filename'] = '__unnamed__' |
---|
104 | |
---|
105 | def addLatest(self): |
---|
106 | """Retrieve handle records for replacements until a current file is found.""" |
---|
107 | if not self.obsolete: |
---|
108 | return |
---|
109 | self.replaced_by.get() |
---|
110 | self.replacements=[self.replaced_by,] |
---|
111 | while self.replacements[-1].obsolete: |
---|
112 | self.replacements.append( self.replacements[-1].replaced_by.get() ) |
---|
113 | self.latest = self.replacements[-1] |
---|
114 | |
---|
115 | def addSiblings(self): |
---|
116 | if self.aggregation_level != 'file': |
---|
117 | print 'No known siblings .....' |
---|
118 | return |
---|
119 | |
---|
120 | if 'parent' not in self.__dict__: |
---|
121 | print 'No parent' |
---|
122 | return |
---|
123 | |
---|
124 | self.parent.get() |
---|
125 | self.siblings = [] |
---|
126 | for c in self.parent.children: |
---|
127 | if c.REC_id != self.REC_id: |
---|
128 | self.siblings.append( c ) |
---|
129 | |
---|
130 | class main(object): |
---|
131 | knownargs0 = ['-h','-v','-t'] |
---|
132 | knownargs1 = ['-f','-id'] |
---|
133 | def __init__(self, args): |
---|
134 | self.htmpl = 'http://hdl.handle.net/api/handles/%s' |
---|
135 | self.version = '0.01.01' |
---|
136 | self.args = args |
---|
137 | self.parseArgs() |
---|
138 | if self.d.get( '-v', False ): |
---|
139 | print ( self.version ) |
---|
140 | return |
---|
141 | |
---|
142 | if self.d.get( '-h', False ): |
---|
143 | print (self.version) |
---|
144 | print ( __doc__ ) |
---|
145 | return |
---|
146 | |
---|
147 | if self.d.get( '-t', False ): |
---|
148 | self.runTest() |
---|
149 | return |
---|
150 | |
---|
151 | if '-f' in self.d: |
---|
152 | fn = self.d['-f'] |
---|
153 | self.dumpF(fn) |
---|
154 | |
---|
155 | if '-id' in self.d: |
---|
156 | id = self.d['-id'] |
---|
157 | self.dumpF('',id=id) |
---|
158 | |
---|
159 | def dumpF(self,fn, id=None): |
---|
160 | if id == None: |
---|
161 | assert os.path.isfile( fn ), 'File %s not found' % fn |
---|
162 | f = ncHead( fn ) |
---|
163 | thisid = string.replace(f.ga['tracking_id'], 'hdl:999999', '10876.test' ) |
---|
164 | else: |
---|
165 | thisid = string.replace(id,'hdl:999999', '10876.test' ) |
---|
166 | |
---|
167 | self.p = pyhandle( thisid ) |
---|
168 | self.p.get() |
---|
169 | if self.p.obsolete: |
---|
170 | print '******* OBSOLETE FILE ******' |
---|
171 | self.p.addLatest() |
---|
172 | for this in self.p.replacements: |
---|
173 | print 'REPLACED BY: ',this.URL, this.REC_id |
---|
174 | else: |
---|
175 | print '****** File is current' |
---|
176 | |
---|
177 | if 'parent' not in self.p.__dict__: |
---|
178 | print 'No parent' |
---|
179 | else: |
---|
180 | self.p.parent.get() |
---|
181 | print 'Parent: %s' % self.p.parent.REC_id |
---|
182 | self.p.addSiblings() |
---|
183 | print '------- SIBLINGS -------' |
---|
184 | for c in self.p.siblings: |
---|
185 | c.get() |
---|
186 | if c.obsolete: |
---|
187 | c.addLatest() |
---|
188 | print '>>>', c.latest.URL |
---|
189 | else: |
---|
190 | print c.URL |
---|
191 | |
---|
192 | def dumpF__obsolete(self,fn, id=None): |
---|
193 | if id == None: |
---|
194 | assert os.path.isfile( fn ), 'File %s not found' % fn |
---|
195 | f = ncHead( fn ) |
---|
196 | thisid = string.replace(f.ga['tracking_id'], 'hdl:999999', '10876.test' ) |
---|
197 | else: |
---|
198 | thisid = string.replace(id,'hdl:999999', '10876.test' ) |
---|
199 | |
---|
200 | url = self.htmpl % thisid |
---|
201 | g = ghandle( thisid ) |
---|
202 | self.p = phandle( g.msg ) |
---|
203 | gp = ghandle( self.p.d['parent']['value'] ) |
---|
204 | self.pp = phandle( gp.msg ) |
---|
205 | isReplaced = 'replaced_by' in self.p.d |
---|
206 | if isReplaced: |
---|
207 | print '******* OBSOLETE FILE ******' |
---|
208 | pr = self.p |
---|
209 | while isReplaced: |
---|
210 | h2 = pr.d['replaced_by']['value'] |
---|
211 | pr = phandle( ghandle( pr.d['replaced_by']['value'] ).msg ) |
---|
212 | isReplaced = 'replaced_by' in pr.d |
---|
213 | print 'REPLACED BY: ',pr.d['URL']['value'],h2 |
---|
214 | else: |
---|
215 | print 'File is current' |
---|
216 | |
---|
217 | cl = eval( self.pp.d['children']['value'] ) |
---|
218 | thisfound=False |
---|
219 | print '------- SIBLINGS -------' |
---|
220 | for c in cl: |
---|
221 | if c == thisid: |
---|
222 | thisfound = True |
---|
223 | else: |
---|
224 | gc = ghandle( c ) |
---|
225 | pc = phandle( gc.msg ) |
---|
226 | isReplaced = 'replaced_by' in pc.d |
---|
227 | rsq = [pc,] |
---|
228 | if isReplaced: |
---|
229 | while isReplaced: |
---|
230 | pc = phandle( ghandle( pc.d['replaced_by']['value'] ).msg ) |
---|
231 | isReplaced = 'replaced_by' in pc.d |
---|
232 | print '>>>', pc.d['URL']['value'] |
---|
233 | elif 'URL' in pc.d: |
---|
234 | print pc.d['URL']['value'] |
---|
235 | else: |
---|
236 | print 'Sibling url not found',pc.d.keys() |
---|
237 | |
---|
238 | def runTest(self): |
---|
239 | f = ncHead( fn1 ) |
---|
240 | url = self.htmpl % f.ga['tracking_id'] |
---|
241 | hdl = string.replace( f.ga['tracking_id'], 'hdl:999999', '10876.test' ) |
---|
242 | g = ghandle( hdl ) |
---|
243 | self.p = phandle( g.msg ) |
---|
244 | expected= ['creation_date', 'aggregation_level', 'HS_ADMIN', '10320/loc', 'checksum', 'URL', 'parent'] |
---|
245 | ## 'replaced_by' if obsolete |
---|
246 | for k in expected: |
---|
247 | assert k in self.p.d, 'Expected handle content key %s not found:: %s' % (k,str(self.p.d.keys())) |
---|
248 | assert 'tracking_id' in self.p.d or 'trackingID' in self.p.d, 'No tracking id found: %s' % str(self.p.d.keys()) |
---|
249 | for k in expected: |
---|
250 | print ('%s: %s' % (k,self.p.d[k])) |
---|
251 | |
---|
252 | print ('PARSING PARENT ..... ' ) |
---|
253 | print ( self.htmpl % self.p.d['parent']['value'] ) |
---|
254 | g = ghandle( self.p.d['parent']['value'] ) |
---|
255 | self.pp = phandle( g.msg ) |
---|
256 | for k in self.pp.d.keys(): |
---|
257 | print ('%s: %s' % (k,self.pp.d[k])) |
---|
258 | #'isReplacedBy' if obsolete |
---|
259 | expected= ['creation_date', 'aggregation_level', 'HS_ADMIN', '10320/loc', 'checksum', 'URL', 'children', 'tracking_id'] |
---|
260 | |
---|
261 | def parseArgs(self): |
---|
262 | self.d = {} |
---|
263 | kn = self.knownargs0 + self.knownargs1 |
---|
264 | xx = [] |
---|
265 | al = self.args[1:] |
---|
266 | while len(al) > 0: |
---|
267 | a = al.pop(0) |
---|
268 | if a not in kn: |
---|
269 | xx.append(a) |
---|
270 | elif a in self.knownargs1: |
---|
271 | self.d[a] = al.pop(0) |
---|
272 | else: |
---|
273 | self.d[a] = True |
---|
274 | if len(xx) > 0: |
---|
275 | print ('ARGUMENTS NOT RECOGNISED: %s' % str(xx) ) |
---|
276 | |
---|
277 | class ncHead(object): |
---|
278 | def __init__(self, fn): |
---|
279 | """Read global attributes of a NetCDF file""" |
---|
280 | nc0 = ncq3.open( fn ) |
---|
281 | nc0.getDigest() |
---|
282 | self.ga = {} |
---|
283 | for a in nc0.alla: |
---|
284 | self.ga[a.name] = a.value |
---|
285 | |
---|
286 | ghandles = GHandles() |
---|
287 | if __name__ == "__main__": |
---|
288 | import sys |
---|
289 | m = main( sys.argv ) |
---|
290 | |
---|
291 | |
---|