source: nappy/trunk/nappy/utils/compare_na.py @ 3468

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/utils/compare_na.py@3468
Revision 3468, 6.4 KB checked in by astephen, 12 years ago (diff)

Fixed a few bugs and improved return check whether really same or not.

Line 
1#!/usr/bin/env python
2
3"""
4ompare_na.py
5============
6
7Tool to compare contents of NASA Ames files or directories full of files.
8Allows you to compare headers and data blocks in NASA Ames.
9
10Usage:
11======
12
13    compare_na.py [-h | --header-only]  [-b | --body-only]
14                  [-n | --number-strict]
15                  [-1 <delimiter_1> | --delimiter-1=<delimiter_1>]
16                  [-2 <delimiter_2> | --delimiter-2=<delimiter_2>]
17                  <item1> <item2>
18                                                                   
19
20Where:
21======
22
23    <item1> and <item2>         can either be a text file or directory.
24    -h | --header-only          selects compare only header(s)
25    -b | --body-only            selects compare only body(s)
26    -n | --number-strict        compares exact formatting of numbers in data block
27                                (default is to compare them by value).
28    <delimiter_1>               delimiter to use for file 1.
29    <delimiter_2>               delimiter to use for file 2.
30
31"""
32
33# Import standard library modules
34import os
35import sys
36import re
37import getopt
38
39# Import local modules
40from compare import *
41
42file_exclusion_patterns = (".*CSV.*", ".*svn.*", "\..*", ".*\.pyc$", ".*~$") 
43file_exclusions = [re.compile(pattn) for pattn in file_exclusion_patterns]
44
45
46def exitNicely(msg):
47    "Tidy exit."
48    print __doc__
49    print msg
50    sys.exit()
51
52
53def compareNA(i1, i2, **kwargs):
54    """
55    Compares items whether files or directories.
56    Reports any differences at the command line but
57    also returns them in a dictionary as:
58    ???
59    **kwargs are forwarded as dictionary to compNAFiles().
60    """
61    if os.path.isfile(i1):
62        apply(compNAFiles, (i1, i2), kwargs)
63    elif os.path.isdir(i1):
64        compDirs(i1, i2)
65    else:
66        exitNicely("Cannot recognise/find item '" + i1 + "'.")
67
68
69def compareSections(l1, l2, number_clever=True, delimiter_1=None, delimiter_2=None):
70    """
71    Compares sections of NASA Ames files (i.e. headers and bodies).
72    """ 
73    leng = len(l1)
74    if len(l2) < leng:
75        leng = len(l2)
76
77    all_same = True
78
79    for i in range(leng):
80        items1 = l1[i].split(delimiter_1)
81        items2 = l2[i].split(delimiter_2)
82       
83        same = True
84        if len(items1) != len(items2):
85            same = False
86        else: 
87            if number_clever == False:
88                if items1 != items2:
89                    same = False
90            else:
91                for count in range(len(items1)):
92                    try:
93                        a = float(items1[count])
94                        b = float(items2[count])
95                    except:
96                        a = items1[count]
97                        b = items2[count]
98                    if a != b:
99                        same = False
100                        break 
101             
102        if same == False:
103            all_same = False
104            print "Line %s:" % (i+1)
105            print ">>>", l1[i]
106            print "<<<", l2[i]
107
108    return all_same
109
110
111def compNAFiles(f1, f2, header=True, body=True, number_clever=True, delimiter_1=None,
112                delimiter_2=None):
113    """
114    Compares contents of two NASA Ames files f1 and f2.
115    header=False or body=False will not compare these sections of the files.
116    number_clever=True will compare 5.00000 and 5 making them equal in the body.
117    If f1_delimiter and f2_delimiter are provided then the comparer will consider
118    two lines identical if they have the delimiters sent in as arguments.
119    """ 
120    name = os.path.split(f1)[-1]
121    # Ignore anything that is in exclusion list
122    for excl in file_exclusions:
123        if excl.match(name):
124            print "IGNORING EXCLUDED file:", f1
125            return
126
127    # Check they exist
128    for f in (f1, f2):
129        if not os.path.isfile(f):
130            exitNicely("CANNOT compare files as item does not exist:" + f)
131   
132    # Note delimiter set as None will do split on white-space (which we want!)
133
134    l1 = open(f1).readlines()
135    l2 = open(f2).readlines()
136
137    head_len1 = int(l1[0].split(delimiter_1)[0])
138    head_len2 = int(l2[0].split(delimiter_2)[0])
139
140    header1 = l1[:head_len1]
141    header2 = l2[:head_len2]
142    body1 = l1[head_len1:]
143    body2 = l2[head_len2:]
144
145    same = True
146    if header == True:
147        print "Comparing headers:"
148        print ">>> %s header:" % f1
149        print "<<< %s header:" % f2
150        same = compareSections(header1, header2, number_clever, delimiter_1, delimiter_2) 
151        if same == True:
152            print "HEADERS ARE IDENTICAL."
153        if len(header1) != len(header2):
154            print "Header lengths differ:\n>>> %s: %s\n<<< %s: %s" % (f1, len(header1), f2, len(header2))
155
156    if body == True:
157        print "Comparing bodies:"
158        print ">>> %s body:" % f1
159        print "<<< %s body:" % f2
160        same = compareSections(body1, body2, number_clever, delimiter_1, delimiter_2)
161        if same == True:
162            print "BODIES ARE IDENTICAL."
163        if len(body1) != len(body2):
164            print "Body lengths differ:\n>>> %s: %s\n<<< %s: %s" % (f1, len(body1), f2, len(body2))
165       
166    return same
167
168
169def parseArgs(args):
170    """
171    Parses arguments returning a dictionary.
172    """
173    arg_dict = {}
174    a = arg_dict
175    a["header"] = True
176    a["body"] = True
177    a["number_clever"] = True
178    a["delimiter_1"] = None
179    a["delimiter_2"] = None
180
181    (arg_list, files) = getopt.getopt(args, "hbn1:2:", ["header-only", "body-only",
182                         "number-strict", "delimiter-1=", "delimiter-2="])
183
184    for arg, value in arg_list:
185        if arg in ("--header-only", "-h"):
186            a["body"] = False
187        elif arg in ("--body-only", "-b"):
188            a["header"] = False 
189        elif arg in ("--number-strict", "-n"):
190            a["number_clever"] = False
191        elif arg in ("--delimiter-1", "-1"):
192            a["delimiter_1"] = value
193        elif arg in ("--delimiter-2", "-2"):
194            a["delimiter_2"] = value
195        else:
196            exitNicely("Unrecognised argument provided: " + arg)
197
198    if len(files) != 2:
199        exitNicely("Must provide a minimum of two file names as command line arguments.")
200
201    if a["header"] == False and a["body"] == False:
202        exitNicely("Invalid selection: header-only and body-only cannot be selected together.")
203
204    return (files, a)
205
206
207def main(args):
208    "Main controller."
209    files, arg_dict = parseArgs(args)
210    apply(compareNA, files, arg_dict) 
211   
212 
213if __name__=="__main__":
214
215    args = sys.argv[1:]
216    main(args)
Note: See TracBrowser for help on using the repository browser.