source: nappy/trunk/nappy/utils/compare_na.py @ 3466

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/utils/compare_na.py@3466
Revision 3466, 6.2 KB checked in by astephen, 12 years ago (diff)

New comparison utilities for use in the test suite.

Line 
1#!/usr/bin/env python
2
3"""
4ompare_na.py
5============
6
7Tool to compare contents of NASA Ames files or directories full of files.
8Allows you to compare headers and data blocks in NASA Ames.
9
10Usage:
11======
12
13    compare_na.py [-h | --header-only]  [-b | --body-only]
14                  [-n | --number-strict]
15                  [-1 <delimiter_1> | --delimiter-1=<delimiter_1>]
16                  [-2 <delimiter_2> | --delimiter-2=<delimiter_2>]
17                  <item1> <item2>
18                                                                   
19
20Where:
21======
22
23    <item1> and <item2>         can either be a text file or directory.
24    -h | --header-only          selects compare only header(s)
25    -b | --body-only            selects compare only body(s)
26    -n | --number-strict        compares exact formatting of numbers in data block
27                                (default is to compare them by value).
28    <delimiter_1>               delimiter to use for file 1.
29    <delimiter_2>               delimiter to use for file 2.
30
31"""
32
33# Import standard library modules
34import os
35import sys
36import re
37import getopt
38
39# Import local modules
40from compare import *
41
42file_exclusion_patterns = (".*CSV.*", ".*svn.*", "\..*", ".*\.pyc$", ".*~$") 
43file_exclusions = [re.compile(pattn) for pattn in file_exclusion_patterns]
44
45
46def compareNA(i1, i2, **kwargs):
47    """
48    Compares items whether files or directories.
49    Reports any differences at the command line but
50    also returns them in a dictionary as:
51    ???
52    **kwargs are forwarded as dictionary to compNAFiles().
53    """
54    if os.path.isfile(i1):
55        apply(compNAFiles, (i1, i2), kwargs)
56    elif os.path.isdir(i1):
57        compDirs(i1, i2)
58    else:
59        exitNicely("Cannot recognise/find item '" + i1 + "'.")
60
61
62def compareSections(l1, l2, number_clever=True, delimiter_1=None, delimiter_2=None):
63    """
64    Compares sections of NASA Ames files (i.e. headers and bodies).
65    """ 
66    leng = len(l1)
67    if len(l2) < leng:
68        leng = len(l2)
69
70    for i in range(leng):
71        items1 = l1[i].split(delimiter_1)
72        items2 = l2[i].split(delimiter_2)
73       
74        same = True
75        if len(items1) != len(items2):
76            same = False
77        else: 
78            if number_clever == False:
79                if items1 != items2:
80                    same = False
81            else:
82                for count in range(len(items1)):
83                    try:
84                        a = float(items1[count])
85                        b = float(items2[count])
86                    except:
87                        a = items1[count]
88                        b = items2[count]
89                    if a != b:
90                        same = False
91                        break 
92             
93        if same == False:
94            print "Line %s:" % (i+1)
95            print ">>>", l1[i]
96            print "<<<", l2[i]
97
98    return same
99
100
101def compNAFiles(f1, f2, header=True, body=True, number_clever=True, delimiter_1=None,
102                delimiter_2=None):
103    """
104    Compares contents of two NASA Ames files f1 and f2.
105    header=False or body=False will not compare these sections of the files.
106    number_clever=True will compare 5.00000 and 5 making them equal in the body.
107    If f1_delimiter and f2_delimiter are provided then the comparer will consider
108    two lines identical if they have the delimiters sent in as arguments.
109    """ 
110    name = os.path.split(f1)[-1]
111    # Ignore anything that is in exclusion list
112    for excl in file_exclusions:
113        if excl.match(name):
114            print "IGNORING EXCLUDED file:", f1
115            return
116
117    # Check they exist
118    for f in (f1, f2):
119        if not os.path.isfile(f):
120            exitNicely("CANNOT compare files as item does not exist:" + f)
121   
122    # Note delimiter set as None will do split on white-space (which we want!)
123
124    l1 = open(f1).readlines()
125    l2 = open(f2).readlines()
126
127    head_len1 = int(l1[0].split(delimiter_1)[0])
128    head_len2 = int(l2[0].split(delimiter_2)[0])
129
130    header1 = l1[:head_len1]
131    header2 = l2[:head_len2]
132    body1 = l1[head_len1:]
133    body2 = l2[head_len2:]
134
135
136    if header == True:
137        print "Comparing headers:"
138        print ">>> %s header:" % f1
139        print "<<< %s header:" % f2
140        same = compareSections(header1, header2, number_clever, delimiter_1, delimiter_2) 
141        if same == True:
142            print "HEADERS ARE IDENTICAL."
143        if len(header1) != len(header2):
144            print "Header lengths differ:\n>>> %s: %s\n<<< %s: %s" % (f1, len(header1), f2, len(header2))
145
146    if body == True:
147        print "Comparing bodies:"
148        print ">>> %s body:" % f1
149        print "<<< %s body:" % f2
150        same = compareSections(body1, body2, number_clever, delimiter_1, delimiter_2)
151        if same == True:
152            print "BODIES ARE IDENTICAL."
153        if len(body1) != len(body2):
154            print "Body lengths differ:\n>>> %s: %s\n<<< %s: %s" % (f1, len(body1), f2, len(body2))
155       
156
157
158def parseArgs(args):
159    """
160    Parses arguments returning a dictionary.
161    """
162    arg_dict = {}
163    a = arg_dict
164    a["header"] = True
165    a["body"] = True
166    a["number_clever"] = True
167    a["delimiter_1"] = None
168    a["delimiter_2"] = None
169
170    (arg_list, files) = getopt.getopt(args, "hbn1:2:", ["header-only", "body-only",
171                         "number-strict", "delimiter-1=", "delimiter-2="])
172
173    for arg, value in arg_list:
174        if arg in ("--header-only", "-h"):
175            a["body"] = False
176        elif arg in ("--body-only", "-b"):
177            a["header"] = False 
178        elif arg in ("--number-strict", "-n"):
179            a["number_clever"] = False
180        elif arg in ("--delimiter-1", "-1"):
181            a["delimiter_1"] = value
182        elif arg in ("--delimiter-2", "-2"):
183            a["delimiter_2"] = value
184        else:
185            exitNicely("Unrecognised argument provided: " + arg)
186
187    if len(files) != 2:
188        exitNicely("Must provide a minimum of two file names as command line arguments.")
189
190    if a["header"] == False and a["body"] == False:
191        exitNicely("Invalid selection: header-only and body-only cannot be selected together.")
192
193    return (files, a)
194
195
196def main(args):
197    "Main controller."
198    files, arg_dict = parseArgs(args)
199    apply(compareNA, files, arg_dict) 
200   
201 
202if __name__=="__main__":
203
204    args = sys.argv[1:]
205    main(args)
Note: See TracBrowser for help on using the repository browser.