Ignore:
Timestamp:
06/10/10 12:47:23 (9 years ago)
Author:
astephen
Message:

Added xpath extraction to extractFromXML

File:
1 edited

Legend:

Unmodified
Added
Removed
  • cows_wps/trunk/cows_wps/utils/common.py

    r7573 r7575  
    271271 
    272272 
    273 def extractFromXML(xml, xml_path): 
    274     """ 
    275     Extracts a list of items from an XML string/doc. The list is found in 
    276     the xml_path lookup which is represented like a file path. E.g. 
    277  
    278         Extracts values at xml_path location within XML response 
    279         E.g. <person><job>nurse mother</job></person> and "person/job" will resturn ["nurse", "mother"] 
    280  
    281     """ 
    282 #    fout = open("/tmp/xml.xml", "w"); fout.write(xml) ; fout.close() 
    283     node = ET.fromstring(xml) 
    284  
     273def forceToElementTree(item): 
     274    """ 
     275    Returns an element tree node from item, which can be type: 
     276      * element tree node 
     277      * xml as string 
     278      * xml file 
     279    """ 
     280    if ET.iselement(item): return item 
     281 
     282    item = str(item) 
     283 
     284    try: 
     285        xml = ET.fromstring(item) 
     286    except: 
     287        try: 
     288            xml = ET.parse(item) 
     289        except: 
     290            raise Exception("Cannot force item to element tree node.") 
     291 
     292    return xml 
     293  
     294 
     295def mapXMLNamespace(xml_path, format = "{%s}%s"): 
     296    """ 
     297    Returns an xml path item appropraite for element tree searching from 
     298    an input item of "<namespace_id>:<element>" or just "<element>" 
     299 
     300    If path includes "/" separators then map all of it. 
     301    """ 
    285302    namespace_map = {"gml": "http://www.opengis.net/gml", 
    286303                     "wcs": "http://www.opengis.net/wcs", 
    287304                     "ows": "http://www.opengeospatial.net/ows", 
    288                      "wps": "http://www.opengeospatial.net/wps",  
     305                     "wps": "http://www.opengeospatial.net/wps", 
    289306                    } 
     307 
     308    parts = xml_path.split("/") 
     309    mapped_parts = [] 
     310 
     311#    format = "%s:%s" # "{%s}%s" 
     312#    format = "{%s}%s" 
     313 
     314    for part in parts: 
     315 
     316        for (id, ns) in namespace_map.items(): 
     317            if part.find("%s:" % id) == 0: 
     318                mapped_item = format % (ns, part.split(":")[-1]) 
     319                break 
     320        else: 
     321            mapped_item = item 
     322 
     323        mapped_parts.append(mapped_item) 
     324 
     325    mapped_path = "/".join(mapped_parts) 
     326    return mapped_path 
     327     
     328 
     329 
     330def extractFromXML(xml, xml_path): 
     331    """ 
     332    Extracts a list of items from an XML string/doc. The list is found in 
     333    the xml_path lookup which is represented like a file path. E.g. 
     334 
     335        Extracts values at xml_path location within XML response 
     336        E.g. <person><job>nurse mother</job></person> and "person/job" will resturn ["nurse", "mother"] 
     337 
     338    """ 
     339#    fout = open("/tmp/xml.xml", "w"); fout.write(xml) ; fout.close() 
     340    node = forceToElementTree(xml) 
     341#ET.fromstring(xml) 
    290342 
    291343    path_parts = xml_path.split("/")[1:] 
     
    295347        log.warn("item: %s" % str(this_node)) 
    296348 
    297         for (id, ns) in namespace_map.items(): 
     349        """for (id, ns) in namespace_map.items(): 
    298350            if item.find("%s:" % id) == 0: 
    299351                search_item = "{%s}%s" % (ns, item.split(":")[-1]) 
     
    301353        else: 
    302354            search_item = item 
    303  
    304         print this_node, search_item 
     355""" 
     356 
     357        search_item = mapXMLNamespace(item)  
     358 
    305359        this_node = this_node.find(search_item) 
    306360        
     
    312366        raise Exception("Could not extract element from XML using xml path: '%s'" % xml_path) 
    313367 
    314   
     368 
     369def extractFromXML2(xml, xpath): 
     370 
     371    orig_xpath = xpath 
     372    xml = forceToElementTree(xml) 
     373 
     374    xpath_parts = xpath.split("/") 
     375 
     376    if xml.getroot().tag == mapXMLNamespace(xpath_parts[0]): 
     377        xpath = "/".join(xpath_parts[1:]) 
     378 
     379    if xpath  == "": 
     380        try: 
     381            extracted_list = xml.text.split() 
     382            return extracted_list 
     383        except: 
     384            raise Exception("Could not extract element from XML using xml path: '%s'" % orig_xpath) 
     385 
     386    else: 
     387        mapped_xpath = mapXMLNamespace(xpath) 
     388        results = xml.findall(mapped_xpath) 
     389        lists = [r.text.split() for r in results]  
     390 
     391        response = [] 
     392        for l in lists: 
     393            response.extend(l) 
     394 
     395        return response 
     396 
     397#extractFromXML("/tmp/xml.xml", "wps:ExecuteResponse/wps:ProcessOutputs/wps:Output/wps:ComplexValue/wps:WPSResponseDetails/wps:ProcessSpecificContent/wps:StashCodes") 
     398#e = extractFromXML2("/tmp/xml.xml", "wps:ExecuteResponse/wps:ProcessOutputs/wps:Output/wps:ComplexValue/wps:WPSResponseDetails/wps:ProcessSpecificContent/wps:StashCodes")  
     399#print "RESP:", e 
     400 
    315401def mapDownloadURLToFilePath(url): 
    316402    from cows_wps.model.managers import requestManager 
Note: See TracChangeset for help on using the changeset viewer.