source: CMIP6dreqbuild/trunk/src/framework/docChecks.py @ 760

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreqbuild/trunk/src/framework/docChecks.py@760
Revision 760, 1.7 KB checked in by mjuckes, 4 years ago (diff)

release cand

Line 
1import lxml, string,  re, sys
2import lxml.etree as et
3from hashlib import md5
4
5python2 = True
6if sys.version_info[0] == 3:
7  assert False, 'HAVE NOT YET MADE THE CANONICAL WRITE SECTION BELOW WORK WITH PYTHON 3 ... ISSUE WITH STRINGIO API'
8  python2 = False
9  import dreqPy
10  import dreqPy.utilP3
11  ##from dreqPy.utilP3 import mlog3
12  from io import StringIO
13  prnt = dreqPy.utilP3.mlog3().prnt
14else:
15  from dreqPy.utilP2 import util
16  from StringIO import StringIO
17  prnt = util.mlog().prnt
18
19if len(sys.argv) > 1:
20  ifile = sys.argv[1]
21  ofile = 'dreq.xml'
22else:
23  ifile = 'out/annotated_20150731.xml'
24  ofile = 'out/annotated_20150731_chk.xml'
25
26if len(sys.argv) > 2:
27  ofile = sys.argv[2]
28
29doc = et.parse( ifile )
30root = doc.getroot()
31chapters = root.getchildren()
32snames = []
33for c in chapters[1:]:
34  for s in c.getchildren():
35    tn = s.tag
36    ix = tn.find( '}' )
37    if ix != -1:
38      tn = tn[ix+1:]
39    snames.append(tn)
40
41output = StringIO()
42if python2:
43  doc.write_c14n(output)
44else:
45  doc.write_c14n(output,encoding='unicode')
46sdoc = output.getvalue()
47ll = string.split( sdoc, '\n' )
48s2 = string.join( ll, '\n' )
49md5f = md5( sdoc ).hexdigest()
50md5f2 = md5( s2 ).hexdigest()
51md5p = md5( string.join( ll[1:], '\n' ) ).hexdigest()
52prnt ('%s, %s' % (md5f, md5p ) )
53prnt (md5f2 )
54oo = open( ofile, 'w' )
55oo.write( sdoc )
56oo.close()
57
58## use re to extract sections of canonical document
59oo = open('sectionChecksums.csv', 'w' )
60oo.write( 'Section\tMD5 Checksum\n' )
61for s in snames:
62  s1 = re.compile( '<%s(.*)</%s>' % (s,s), re.DOTALL )
63  x = s1.search( sdoc ).group(0)
64  md5x = md5( x ).hexdigest()
65  prnt ( 'Checksum of section %s: %s' % (s,md5x) )
66  oo.write( '%s\t%s\n' % (s,md5x) )
67oo.close()
Note: See TracBrowser for help on using the repository browser.