Changeset 1097
- Timestamp:
- 06/06/06 13:24:29 (15 years ago)
- Location:
- TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi
- Files:
-
- 1 added
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/DIF.py
r1049 r1097 8 8 from ServiceBinding import Service 9 9 from renderEntity import renderEntity 10 from ETxmlView import loadET 10 11 11 12 class DIF: … … 16 17 '''Initialise a python dif instance based on an xml document ''' 17 18 self.metadataType='DIF' 18 #self.xml=xml.decode('utf-8','replace')19 #self.xml=unicode(xml,'latin-1')20 #self.xml=xml.encode('latin-1','replace')21 encodings=['ascii',]#['latin-1','ascii','iso-8859-1']22 19 try: 23 for option in encodings: 24 try: 25 s=xml.encode(option,'replace') 26 27 self.elem=ET.fromstring(s) 28 self.xml=s 29 except UnicodeError: 30 pass 31 else: 32 break 20 self.elem=loadET(xml) 21 self.xml=xml 33 22 except: 34 23 # for some reason we can't parse the document ... 35 #print '<p>%s</p>'%xml[11385:11395] 36 #raise bnl 37 #import codecs 38 #f=codecs.open('unparseable.xml','a','utf-8') 39 #f.write(xml+'\n') 40 print '<p>'+s[11360:11420]+'</p>' 41 raise 24 raise 42 25 self.elem=None 43 26 return -
TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/ETxmlView.py
r996 r1097 8 8 import ElementTree as ET 9 9 import re 10 from sub_orphan import * 10 11 11 12 def et2text(elem,indent='',html=0,space=' '): … … 64 65 result+='%s/%s%s'%(lt,span(elem.tag,'xmlElemTag'),gt) 65 66 return div(result,'xmlElem') 67 68 def loadET(inputString): 69 ''' This method returns an elementtree object after some cleaning 70 of the string, essentially a hack to make sure that xml doesn't contain any 71 naughty & characters alone (typically from URL copies), or unescaped orphan 72 < or > signs ... and that the unicode has been processed to something that 73 might work''' 74 75 inputString=re.sub(r'&(?!\w+;)', '&', inputString) 76 77 # first just try and do it so we don't waste time if we don't need to ... 78 try: 79 elem=ET.fromstring(inputString) 80 return elem 81 except: 82 pass 83 # and carry on 66 84 67 def xmlCleanup(string): 68 '''This is a hack to make sure that xml doesn't contain any naughty & characters alone 69 (typically from URL copies)''' 70 #match ? but not (?! if it's followed by alphanumeric \w characters + and a semicolon ; 71 #don't want to use urlencode cos we don't know where it came from ... 72 return re.sub(r'&(?!\w+;)', '&', string) 85 #ok, let's deal with orphan > and < signs then ... 86 subtool=subAI() 87 s=subtool.sub(inputString) 88 89 #now let's sort out an encoding 90 encodings=['utf-8','latin-1','iso-8859-1','ascii',] 91 elem=None 92 for option in encodings: 93 try: 94 s=s.encode(option,'replace') 95 try: 96 elem=ET.fromstring(s) 97 except: 98 s=re.sub('\n','<br/>LINE: ',s) 99 print '<p>%s</p>'%s 100 raise 101 except UnicodeError: 102 pass 103 else: 104 break 105 return elem 73 106 74 107 def xml2text(xmlString): 75 tree= ET.fromstring(xmlCleanup(xmlString))108 tree=loadET(xmlString) 76 109 return et2text(tree) 77 110 78 111 def xml2HTML(xmlString,**kw): 79 tree= ET.fromstring(xmlCleanup(xmlString))112 tree=loadET(xmlString) 80 113 return et2HTML(tree,**kw) -
TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/browseCGI.py
r1050 r1097 230 230 results=ws.GetResults(offset=state.offset,number=state.stride) 231 231 difs=[] 232 for result in results: 233 difs.append(xmlCleanup(result)) 232 for result in results: difs.append(result) 234 233 html=renderDiscoverySet(difs,state,selector=self.selector, 235 234 summary=1,spatial=1,temporal=1,services=1) -
TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/renderDiscoverySet.py
r1023 r1097 132 132 for result in results: 133 133 #g.write(xmlCleanup(result)+'\n') 134 difs.append( xmlCleanup(result))134 difs.append(result) 135 135 html=renderDiscoverySet(difs,state,summary=1,spatial=1,temporal=1) 136 136 f=file('output.html','wb')
Note: See TracChangeset
for help on using the changeset viewer.