source: CMIP6dreqbuild/trunk/src/framework/docChecks.py @ 989

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreqbuild/trunk/src/framework/docChecks.py@989
Revision 989, 1.7 KB checked in by mjuckes, 3 years ago (diff)

new ingest stream

Line 
1import lxml, string,  re, sys
2import lxml.etree as et
3from hashlib import md5
4
5python2 = True
6if sys.version_info[0] == 3:
7  assert False, 'HAVE NOT YET MADE THE CANONICAL WRITE SECTION BELOW WORK WITH PYTHON 3 ... ISSUE WITH STRINGIO API'
8  python2 = False
9  import dreqPy
10  import dreqPy.utilP3
11  ##from dreqPy.utilP3 import mlog3
12  from io import StringIO
13  prnt = dreqPy.utilP3.mlog3().prnt
14else:
15  from dreqPy.utilP2 import util
16  from StringIO import StringIO
17  prnt = util.mlog().prnt
18
19if len(sys.argv) > 1:
20  ifile = sys.argv[1]
21  ofile = 'dreq.xml'
22else:
23  ##ifile = 'out/annotated_20150731_mt.xml'
24  ifile = 'test.xml'
25  ofile = 'out/annotated_20150731_chk.xml'
26
27if len(sys.argv) > 2:
28  ofile = sys.argv[2]
29
30doc = et.parse( ifile )
31root = doc.getroot()
32chapters = root.getchildren()
33snames = []
34for c in chapters[1:]:
35  for s in c.getchildren():
36    tn = s.tag
37    ix = tn.find( '}' )
38    if ix != -1:
39      tn = tn[ix+1:]
40    snames.append(tn)
41
42output = StringIO()
43if python2:
44  doc.write_c14n(output)
45else:
46  doc.write_c14n(output,encoding='unicode')
47sdoc = output.getvalue()
48ll = string.split( sdoc, '\n' )
49s2 = string.join( ll, '\n' )
50md5f = md5( sdoc ).hexdigest()
51md5f2 = md5( s2 ).hexdigest()
52md5p = md5( string.join( ll[1:], '\n' ) ).hexdigest()
53prnt ('%s, %s' % (md5f, md5p ) )
54prnt (md5f2 )
55oo = open( ofile, 'w' )
56oo.write( sdoc )
57oo.close()
58
59## use re to extract sections of canonical document
60oo = open('sectionChecksums.csv', 'w' )
61oo.write( 'Section\tMD5 Checksum\n' )
62for s in snames:
63  s1 = re.compile( '<%s(.*)</%s>' % (s,s), re.DOTALL )
64  x = s1.search( sdoc ).group(0)
65  md5x = md5( x ).hexdigest()
66  prnt ( 'Checksum of section %s: %s' % (s,md5x) )
67  oo.write( '%s\t%s\n' % (s,md5x) )
68oo.close()
Note: See TracBrowser for help on using the repository browser.