source: CMIP6dreqbuild/trunk/src/framework/cmchecl.py @ 979

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreqbuild/trunk/src/framework/cmchecl.py@1149
Revision 979, 3.3 KB checked in by mjuckes, 3 years ago (diff)

01.00.07

Line 
1import string
2
3## specification is not explicit -- but following assumes no brackets in comment.
4#dim1: [dim2: [dim3: ...]] method [where type1 [over type2]] [within|over days|years] [(comment)]
5##
6## except that comment can occur after every method ...
7
8
9## parse left to right:
10## starts <d1>:<space>
11## optional <dx>:<space>
12## or <word> == method
13## where|within|over|(
14import re
15
16## s1 appears to work, splitting string into dictionaries.  ad "+$" to end to see whether string as a whole is a complete match.
17##s1 = re.compile( '((?P<a>(([a-zA-Z][a-zA-Z0-9-_]*:\s+)+)([a-zA-Z]*\s*))(?P<b>where\s[a-zA-Z][a-zA-Z0-9-]*\s*){0,1}(?P<c>((over|within)\s(days|years)\s*){0,3})(?P<d>\(.*?\)){0,1})' )
18
19
20a1 = ('dim: mid_range d2: point', 2, True)
21a2 = ('d1: d2: mean d3: minimum (comment: test)', 2, True)
22a3 = ('d1: d2: maximum d3: point', 2, True)
23a4 = ('d1: d2: maximum d3: mean where x over days', 2, True)
24a4 = ('d1: d2: maximum d3: mean where x over days within years (comment ...)', 2, True)
25a5 = ('d1: d2:    maximum where stuff d3: mean', 2, True)
26a6 = ('d1: d2:    maximum where stuff d3: point  bad stuff here', 2, False)
27
28## to check the "xxx" in "where xxx" need to know about the data .... can't really check up front, unless the check is for
29## conventional areas + supplied additional terms ....
30
31class check_cm(object):
32  """Check whether a string is a valid CF cell_methods string.
33     The first check is with a regular expression, check_cm.tstr, which will parse a valid cell_methods string into sections. It does not check the validity of methods or dimensions, but checks for words and generic syntax.
34     The second stage validates the methods specified against the CF list (check_cm.methods).
35  """
36
37  def __init__(self):
38    self.methods = {'point', 'sum', 'mean', 'maximum', 'minimum', 'mid_range', 'standard_deviation', 'variance', 'mode', 'median'}
39    self.tstr = '(((?P<a>(([a-zA-Z][a-zA-Z0-9-_]*:\s+)+)(?P<e>[a-zA-Z_]*\s*))(?P<b>where\s[a-zA-Z][a-zA-Z0-9_-]*\s*(?P<bb>over\s[a-zA-Z][a-zA-Z0-9_-]*\s*){0,1}){0,1}(?P<c>((over|within)\s(days|years|hours)\s*){0,3})(?P<d>\(.*?\)){0,1})\s*)'
40    self.s1 = re.compile( "^" + self.tstr + "+$" )
41    self.s2 = re.compile( self.tstr )
42
43  def test(self,a0,isv=True,nn=None,acceptEmpty=True):
44    a = string.strip(a0)
45    if a == '':
46       if acceptEmpty:
47         self.res = None
48         return True
49       else:
50         print 'ERROR.004: empty cell methods string'
51         return False
52    y = self.s1.match(a)
53    if not y:
54      if isv:
55        print 'ERROR.002: match failed: %s' % a
56      else:
57        print 'Bad string detected: %s' % a
58      return False
59    else:
60      z = [i.groupdict() for i in self.s2.finditer(a)]
61      if nn != None:
62        if len(z) != nn:
63          print 'ERROR.003: Failed to parse into sections: ',z,len(z),nn
64          return False
65      for zz in z:
66        if string.strip(zz['e']) not in self.methods:
67          if isv:
68            print 'ERROR.004: bad method %s [%s]' % (zz['e'],zz)
69          else:
70            print 'Bad string detected (method wrong): %s' % a
71          return False
72      self.res = z
73      return True
74
75if __name__ == '__main__':
76  cc = check_cm()
77  for a, nn, isv in [a1,a2,a3,a4,a5,a6]:
78    cc.test(a,isv=isv,nn=nn)
79
80  for a in open('qq').readlines():
81    l = string.strip(a)
82    if len(l) != 0:
83      r = cc.test(l)
84      if not r:
85        print 'ERROR: %s' % l
Note: See TracBrowser for help on using the repository browser.