Changeset 5096


Ignore:
Timestamp:
11/03/09 17:40:46 (10 years ago)
Author:
cbyrom
Message:

Adjust correcting of unicode/string contents + handling of empty proxy
settings.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • ndgCommon/trunk/ndg/common/src/lib/utilities.py

    r5010 r5096  
    142142    '''     
    143143    logging.debug("Reading info from uri, '%s'" %uri) 
     144    if not proxy: 
     145        proxy = {} 
    144146    f = urllib.urlopen(uri, proxies = proxy) 
    145147    pageData = f.read() 
     
    296298 
    297299 
     300 
     301 
    298302def encodeIntoHTMLNumericalCodes(inputString): 
    299303    ''' 
     
    304308    ''' 
    305309    # NB, the latin coding accepts unicode up to 255 
    306     correctedString = getString(inputString)#inputString.decode('string_escape')#('latin-1') 
     310    #correctedString = getString(inputString)#inputString.decode('string_escape')#('latin-1') 
     311     
     312    if isinstance(inputString, unicode): 
     313        correctedString = inputString.encode('unicode_escape') 
     314    else: 
     315        correctedString = unicode(inputString, errors = 'replace') 
    307316    #unicode(inputString).encode('unicode_escape') 
    308317    # the XMLCHARREFREPLACE does the required character replacement 
    309     return correctedString.encode('utf-8', 'xmlcharrefreplace') 
    310     #return correctedString.encode('ascii', 'xmlcharrefreplace') 
     318    #return correctedString.encode('utf-8', 'xmlcharrefreplace') 
     319    return correctedString.encode('ascii', 'xmlcharrefreplace') 
    311320    #return correctedString.encode('utf-8') 
    312321 
     
    340349        logging.debug("- invalid") 
    341350    return isValid 
    342      
     351 
    343352 
    344353def escapeSpecialCharacters(inputString): 
     
    350359    correctedString = cgi.escape(inputString) 
    351360     
     361    correctedString = encodeIntoHTMLNumericalCodes(correctedString) 
    352362    if inputString != correctedString: 
    353363        logging.info("Note: input data made XML friendly (\nold:'%s' \nnew:'%s')" %(inputString, correctedString)) 
Note: See TracChangeset for help on using the changeset viewer.