source: cows_wps/trunk/cows_wps/utils/common.py @ 7575

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/cows_wps/trunk/cows_wps/utils/common.py@7575
Revision 7575, 11.3 KB checked in by astephen, 9 years ago (diff)

Added xpath extraction to extractFromXML

Line 
1"""
2common.py
3=========
4
5Provides a number of utilities for the WPS.
6
7"""
8# Import standard library modules
9import sys
10import os
11import time
12import socket
13import random
14import md5
15import string
16import datetime
17import urllib2
18import Cookie
19import xml.etree.ElementTree as ET
20
21#from cows_wps.utils import case_sensitive_ordered_config_parser as cp
22
23import logging
24log = logging.getLogger(__name__)
25
26def exitNicely(doc_string="", msg=None):
27    "Write a message and then exit cleanly."
28    print doc_string + "\n" + msg
29    sys.exit()
30
31
32def generateLocalFilePath(proc_base_dir, job_id, file_name):
33    "Returns a system file path to read data from."
34   
35    file_path = os.path.join(proc_base_dir, job_id, "outputs", file_name)
36       
37    return file_path
38
39def parseQueryString(qs):
40    """Parses query string to dict."""
41    arg_dict = {}
42    args = qs.split("?")[-1].strip()
43
44    for item in args.split("&"):
45        if item=="": continue
46        name,value = item.split("=")
47        arg_dict[name] = value
48
49    return arg_dict
50
51
52def parseSimpleInputs(inputs_file):
53    """
54    Parses all inputs from simple file and returns a dictionary of valid types.
55    """
56    inputs = open(inputs_file)
57    lines = inputs.readlines()
58    inputs.close()
59
60    arg_dict = {}
61    for line in lines:
62        items, value = line.strip().split("=")
63        (name, item_or_array, item_type) = items.split(",")
64        if item_or_array == "array":
65            value = [eval("%s('%s')" % (item_type, v)) for v in value.split(",")]
66        elif item_or_array == "item":
67            value = eval("%s('%s')" % (item_type, value))
68        arg_dict[name] = value
69
70    return arg_dict
71
72
73def makeHTML(title, arg_dict={}, message=""):
74    """Returns a simple HTML doc that lists args sent."""
75    html = """<HTML>
76<HEAD><TITLE>%s</TITLE></HEAD>
77<BODY>
78<H2>%s</H2>
79
80<P>Welcome to this test service.</P>
81""" % (title, title)
82
83    if len(message)>0:
84        html = html+"<P>Message: "+message+"</P>\n"
85
86    if len(arg_dict)>0:
87        html = html+"""<P>You sent the arguments:</P>
88<UL>
89"""
90        keys = arg_dict.keys()
91        keys.sort()
92        for key in keys:
93            html=html+"""  <LI>%s = %s</LI>\n""" % (key, arg_dict[key])
94        html = html+"</UL>\n"
95    html=html+"""
96</BODY>
97</HTML>
98"""
99    return html
100
101
102def listIfNot(item, rttype = "string"):
103    "Returns non-list/tuple as only item in list."
104    if type(item) not in (type((1,2)), type([1,2])):
105        item = [item]
106
107    if rttype == "string":
108        item = [str(i) for i in item]
109    elif rttype == "float":
110        item = [float(i) for i in item] 
111
112    return item
113
114
115# Make dict for image formats
116imageFormats = {"image/png": "png",
117                "image/jpeg": "jpg",
118                "application/pdf": "pdf",
119                "application/postscript": "ps"}
120
121reverseImageFormats = {}
122for (k, v) in imageFormats.items():
123    reverseImageFormats[ v ] = k
124
125
126def fixTrueFalse(value):
127    "Make true or false value a lower title cased version."
128    if value.lower() in ("true", "false"):
129        value = eval(value.lower().title())
130    return value
131
132
133def getUniqueId(*args):
134    """
135    Generates a universally unique ID.
136    Any arguments only create more randomness.
137    """
138    t = long( time.time() * 1000 )
139    r = long( random.random()*100000000000000000L )
140    try:
141        a = socket.gethostbyname( socket.gethostname() )
142    except:
143        # if we can't get a network address, just imagine one
144        a = random.random()*100000000000000000L
145    data = str(t) + ' ' + str(r) + ' ' + str(a) + ' ' + str(args)
146    data = md5.md5(data).hexdigest()
147    return data
148
149
150def underscoreLowerToTitleCamel(s):
151    """
152    Returns a title camel case string version of an
153    underscore-delimited lower case string.
154    """
155    l = []
156    next_upper = True
157    for i in s:
158        if i == "_":
159            next_upper = True
160        else:
161            if next_upper == True:
162                i = i.upper()
163                next_upper = False
164            l.append(i)
165               
166    return "".join(l)
167
168
169def titleCamelToUnderscoreLower(s, spacer="_"):
170    """
171    Returns a lower case underscore-delimited version of
172    title camel case string.
173    """
174    l = []
175    exceptional_mappings = {"1":"1"}
176    if s in exceptional_mappings.keys():
177        return exceptional_mappings[s]
178
179    is_first = True
180    last = ""
181    for i in s:
182        if i in string.uppercase or i in "0123456789":
183            i = i.lower()
184            if is_first == False:
185                i = spacer + i
186
187        is_first = False
188        l.append(last)
189        last = i
190
191    l.append(last)
192
193    return "".join(l)
194
195def readLinesFromFile(path):
196    fout = open(path, 'r')
197    lines = fout.readlines()
198    fout.close()
199    return lines
200   
201
202def stringToDateTime(timeString, format="%Y-%m-%d %H:%M:%S", includeMicroseconds=False):
203   
204    if includeMicroseconds:
205        p1, p2 = str(timeString).split(".",1)
206        d = datetime.datetime.strptime(p1, format)
207        d = d.replace(microsecond = int(p2))
208    else:
209        d = datetime.datetime.strptime(timeString, format)
210       
211    return d
212
213
214def convertToBooleanIfString(value):
215    """
216    Accepts an argument, if that argument is not already a boolean it will
217    attempt to convert it from a string to a boolean and return it, otherwise
218    the original argument is returned.
219    """
220   
221    if type(value) != bool:
222        return stringToBool(value)
223    else:
224        return value
225
226def stringToBool(boolAsString):
227    """
228    Attempts to convert a string to a boolean, comparis the string with 'true' and 'false' (ignores case) and
229    returns the appropriate boolean value.
230   
231    If the string matches neither 'true' nor 'false' an exception is raised.
232    """
233   
234    if boolAsString.lower() == 'true':
235        return True
236    elif boolAsString.lower() == 'false':
237        return False
238    else:
239        raise Exception("Can't convert the string %s to a boolean" % (boolAsString,))
240   
241
242def buildProcessCallable(processModuleString):
243    package, klassName = processModuleString.split('#')
244   
245    module = __import__(package, fromlist=[''])
246    klass = getattr(module, klassName)   
247    processCallable = klass()
248   
249    return processCallable
250
251
252def downloadFromURL(url, cookies = None):
253    """
254    Downloads response from URL. Use cookies if ``cookies`` is defined.
255    """
256    req = urllib2.Request(url = url)
257
258    # Now go through each cookie and add it to the request object before sending
259    if cookies != None:
260        for cookie in cookies.items():
261            new_cookie = Cookie.SimpleCookie()
262            (k, v) = cookie
263            new_cookie[k] = v
264            new_cookie[k]["path"] = "/"
265            req.add_header('Cookie', new_cookie.output())
266
267    getter = urllib2.urlopen(req)
268    resp = getter.read()
269    getter.close()
270    return resp
271
272
273def forceToElementTree(item):
274    """
275    Returns an element tree node from item, which can be type:
276      * element tree node
277      * xml as string
278      * xml file
279    """
280    if ET.iselement(item): return item
281
282    item = str(item)
283
284    try:
285        xml = ET.fromstring(item)
286    except:
287        try:
288            xml = ET.parse(item)
289        except:
290            raise Exception("Cannot force item to element tree node.")
291
292    return xml
293 
294
295def mapXMLNamespace(xml_path, format = "{%s}%s"):
296    """
297    Returns an xml path item appropraite for element tree searching from
298    an input item of "<namespace_id>:<element>" or just "<element>"
299
300    If path includes "/" separators then map all of it.
301    """
302    namespace_map = {"gml": "http://www.opengis.net/gml",
303                     "wcs": "http://www.opengis.net/wcs",
304                     "ows": "http://www.opengeospatial.net/ows",
305                     "wps": "http://www.opengeospatial.net/wps",
306                    }
307
308    parts = xml_path.split("/")
309    mapped_parts = []
310
311#    format = "%s:%s" # "{%s}%s"
312#    format = "{%s}%s"
313
314    for part in parts:
315
316        for (id, ns) in namespace_map.items():
317            if part.find("%s:" % id) == 0:
318                mapped_item = format % (ns, part.split(":")[-1])
319                break
320        else:
321            mapped_item = item
322
323        mapped_parts.append(mapped_item)
324
325    mapped_path = "/".join(mapped_parts)
326    return mapped_path
327   
328
329
330def extractFromXML(xml, xml_path):
331    """
332    Extracts a list of items from an XML string/doc. The list is found in
333    the xml_path lookup which is represented like a file path. E.g.
334
335        Extracts values at xml_path location within XML response
336        E.g. <person><job>nurse mother</job></person> and "person/job" will resturn ["nurse", "mother"]
337
338    """
339#    fout = open("/tmp/xml.xml", "w"); fout.write(xml) ; fout.close()
340    node = forceToElementTree(xml)
341#ET.fromstring(xml)
342
343    path_parts = xml_path.split("/")[1:]
344    this_node = node
345
346    for item in path_parts:
347        log.warn("item: %s" % str(this_node))
348
349        """for (id, ns) in namespace_map.items():
350            if item.find("%s:" % id) == 0:
351                search_item = "{%s}%s" % (ns, item.split(":")[-1])
352                break
353        else:
354            search_item = item
355"""
356
357        search_item = mapXMLNamespace(item) 
358
359        this_node = this_node.find(search_item)
360       
361
362    try:
363        extracted_list = this_node.text.split()
364        return extracted_list
365    except:
366        raise Exception("Could not extract element from XML using xml path: '%s'" % xml_path)
367
368
369def extractFromXML2(xml, xpath):
370
371    orig_xpath = xpath
372    xml = forceToElementTree(xml)
373
374    xpath_parts = xpath.split("/")
375
376    if xml.getroot().tag == mapXMLNamespace(xpath_parts[0]):
377        xpath = "/".join(xpath_parts[1:])
378
379    if xpath  == "":
380        try:
381            extracted_list = xml.text.split()
382            return extracted_list
383        except:
384            raise Exception("Could not extract element from XML using xml path: '%s'" % orig_xpath)
385
386    else:
387        mapped_xpath = mapXMLNamespace(xpath)
388        results = xml.findall(mapped_xpath)
389        lists = [r.text.split() for r in results] 
390
391        response = []
392        for l in lists:
393            response.extend(l)
394
395        return response
396
397#extractFromXML("/tmp/xml.xml", "wps:ExecuteResponse/wps:ProcessOutputs/wps:Output/wps:ComplexValue/wps:WPSResponseDetails/wps:ProcessSpecificContent/wps:StashCodes")
398#e = extractFromXML2("/tmp/xml.xml", "wps:ExecuteResponse/wps:ProcessOutputs/wps:Output/wps:ComplexValue/wps:WPSResponseDetails/wps:ProcessSpecificContent/wps:StashCodes")
399#print "RESP:", e
400
401def mapDownloadURLToFilePath(url):
402    from cows_wps.model.managers import requestManager
403    items = url.split("/")
404    (job_id, file_name) = items[-2:]
405
406    req = requestManager.getRequest(job_id, sgeUpdate=False)
407    process_dir = req.job.process_dir
408    fpath = os.path.join(process_dir, "outputs", file_name)
409    return fpath
410
411
412def isAdminLoggedIn(request): 
413    "Returns boolean."
414    admin = False
415    admin_cookie = request.cookies.get("wps_ui_admin", None)
416
417    if admin_cookie and admin_cookie == request.environ["REMOTE_ADDR"]:
418        admin = True
419
420    return admin
421
422
423if __name__ == "__main__":
424
425    for i in "abcde": print getUniqueId(i) 
426
427    print "Testing back and forth mappings..."
428    tof = titleCamelToUnderscoreLower
429    fromf = underscoreLowerToTitleCamel
430    for s in ("SamplingDict1", "WGenRuns", "MeaningPeriod", "HotPotato23Highway"):
431        n = tof(s)
432        new_s = fromf(n)
433        print "s:", s, "n:", n, "new_s:", new_s, "same:", (s == new_s)
434   
435    print "Testing use of space from Camel Case version."
436    print tof("SamplingDictNumber2", spacer=" ")
Note: See TracBrowser for help on using the repository browser.