1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | """ |
---|
4 | compare_na.py |
---|
5 | ============= |
---|
6 | |
---|
7 | Tool to compare contents of NASA Ames files or directories full of files. |
---|
8 | Allows you to compare headers and data blocks in NASA Ames. |
---|
9 | |
---|
10 | Usage: |
---|
11 | ====== |
---|
12 | |
---|
13 | compare_na.py [-h | --header-only] [-b | --body-only] |
---|
14 | [-n | --number-strict] |
---|
15 | [-1 <delimiter_1> | --delimiter-1=<delimiter_1>] |
---|
16 | [-2 <delimiter_2> | --delimiter-2=<delimiter_2>] |
---|
17 | <item1> <item2> |
---|
18 | |
---|
19 | |
---|
20 | Where: |
---|
21 | ====== |
---|
22 | |
---|
23 | <item1> and <item2> can either be a text file or directory. |
---|
24 | -h | --header-only selects compare only header(s) |
---|
25 | -b | --body-only selects compare only body(s) |
---|
26 | -n | --number-strict compares exact formatting of numbers in data block |
---|
27 | (default is to compare them by value). |
---|
28 | <delimiter_1> delimiter to use for file 1. |
---|
29 | <delimiter_2> delimiter to use for file 2. |
---|
30 | |
---|
31 | """ |
---|
32 | |
---|
33 | # Import standard library modules |
---|
34 | import os |
---|
35 | import sys |
---|
36 | import re |
---|
37 | import getopt |
---|
38 | |
---|
39 | # Import local modules |
---|
40 | from compare import * |
---|
41 | |
---|
42 | file_exclusion_patterns = (".*CSV.*", ".*svn.*", "\..*", ".*\.pyc$", ".*~$") |
---|
43 | file_exclusions = [re.compile(pattn) for pattn in file_exclusion_patterns] |
---|
44 | |
---|
45 | |
---|
46 | def exitNicely(msg): |
---|
47 | "Tidy exit." |
---|
48 | print __doc__ |
---|
49 | print msg |
---|
50 | sys.exit() |
---|
51 | |
---|
52 | |
---|
53 | def compareNA(i1, i2, **kwargs): |
---|
54 | """ |
---|
55 | Compares items whether files or directories. |
---|
56 | Reports any differences at the command line but |
---|
57 | also returns them in a dictionary as: |
---|
58 | ??? |
---|
59 | **kwargs are forwarded as dictionary to compNAFiles(). |
---|
60 | """ |
---|
61 | if os.path.isfile(i1): |
---|
62 | apply(compNAFiles, (i1, i2), kwargs) |
---|
63 | elif os.path.isdir(i1): |
---|
64 | compDirs(i1, i2) |
---|
65 | else: |
---|
66 | exitNicely("Cannot recognise/find item '" + i1 + "'.") |
---|
67 | |
---|
68 | |
---|
69 | def compareSections(l1, l2, number_clever=True, delimiter_1=None, delimiter_2=None): |
---|
70 | """ |
---|
71 | Compares sections of NASA Ames files (i.e. headers and bodies). |
---|
72 | """ |
---|
73 | leng = len(l1) |
---|
74 | if len(l2) < leng: |
---|
75 | leng = len(l2) |
---|
76 | |
---|
77 | all_same = True |
---|
78 | |
---|
79 | for i in range(leng): |
---|
80 | items1 = l1[i].split(delimiter_1) |
---|
81 | items2 = l2[i].split(delimiter_2) |
---|
82 | |
---|
83 | same = True |
---|
84 | if len(items1) != len(items2): |
---|
85 | same = False |
---|
86 | else: |
---|
87 | if number_clever == False: |
---|
88 | if items1 != items2: |
---|
89 | same = False |
---|
90 | else: |
---|
91 | for count in range(len(items1)): |
---|
92 | try: |
---|
93 | a = float(items1[count]) |
---|
94 | b = float(items2[count]) |
---|
95 | except: |
---|
96 | a = items1[count] |
---|
97 | b = items2[count] |
---|
98 | if a != b: |
---|
99 | same = False |
---|
100 | break |
---|
101 | |
---|
102 | if same == False: |
---|
103 | all_same = False |
---|
104 | print "Line %s:" % (i+1) |
---|
105 | print ">>>", l1[i] |
---|
106 | print "<<<", l2[i] |
---|
107 | |
---|
108 | return all_same |
---|
109 | |
---|
110 | |
---|
111 | def compNAFiles(f1, f2, header=True, body=True, number_clever=True, delimiter_1=None, |
---|
112 | delimiter_2=None): |
---|
113 | """ |
---|
114 | Compares contents of two NASA Ames files f1 and f2. |
---|
115 | header=False or body=False will not compare these sections of the files. |
---|
116 | number_clever=True will compare 5.00000 and 5 making them equal in the body. |
---|
117 | If f1_delimiter and f2_delimiter are provided then the comparer will consider |
---|
118 | two lines identical if they have the delimiters sent in as arguments. |
---|
119 | """ |
---|
120 | name = os.path.split(f1)[-1] |
---|
121 | # Ignore anything that is in exclusion list |
---|
122 | for excl in file_exclusions: |
---|
123 | if excl.match(name): |
---|
124 | print "IGNORING EXCLUDED file:", f1 |
---|
125 | return |
---|
126 | |
---|
127 | # Check they exist |
---|
128 | for f in (f1, f2): |
---|
129 | if not os.path.isfile(f): |
---|
130 | exitNicely("CANNOT compare files as item does not exist:" + f) |
---|
131 | |
---|
132 | # Note delimiter set as None will do split on white-space (which we want!) |
---|
133 | |
---|
134 | l1 = open(f1).readlines() |
---|
135 | l2 = open(f2).readlines() |
---|
136 | |
---|
137 | head_len1 = int(l1[0].split(delimiter_1)[0]) |
---|
138 | head_len2 = int(l2[0].split(delimiter_2)[0]) |
---|
139 | |
---|
140 | header1 = l1[:head_len1] |
---|
141 | header2 = l2[:head_len2] |
---|
142 | body1 = l1[head_len1:] |
---|
143 | body2 = l2[head_len2:] |
---|
144 | |
---|
145 | same = True |
---|
146 | if header == True: |
---|
147 | print "Comparing headers:" |
---|
148 | print ">>> %s header:" % f1 |
---|
149 | print "<<< %s header:" % f2 |
---|
150 | same = compareSections(header1, header2, number_clever, delimiter_1, delimiter_2) |
---|
151 | if same == True: |
---|
152 | print "HEADERS ARE IDENTICAL." |
---|
153 | if len(header1) != len(header2): |
---|
154 | print "Header lengths differ:\n>>> %s: %s\n<<< %s: %s" % (f1, len(header1), f2, len(header2)) |
---|
155 | |
---|
156 | if body == True: |
---|
157 | print "Comparing bodies:" |
---|
158 | print ">>> %s body:" % f1 |
---|
159 | print "<<< %s body:" % f2 |
---|
160 | same = compareSections(body1, body2, number_clever, delimiter_1, delimiter_2) |
---|
161 | if same == True: |
---|
162 | print "BODIES ARE IDENTICAL." |
---|
163 | if len(body1) != len(body2): |
---|
164 | print "Body lengths differ:\n>>> %s: %s\n<<< %s: %s" % (f1, len(body1), f2, len(body2)) |
---|
165 | |
---|
166 | return same |
---|
167 | |
---|
168 | |
---|
169 | def parseArgs(args): |
---|
170 | """ |
---|
171 | Parses arguments returning a dictionary. |
---|
172 | """ |
---|
173 | arg_dict = {} |
---|
174 | a = arg_dict |
---|
175 | a["header"] = True |
---|
176 | a["body"] = True |
---|
177 | a["number_clever"] = True |
---|
178 | a["delimiter_1"] = None |
---|
179 | a["delimiter_2"] = None |
---|
180 | |
---|
181 | (arg_list, files) = getopt.getopt(args, "hbn1:2:", ["header-only", "body-only", |
---|
182 | "number-strict", "delimiter-1=", "delimiter-2="]) |
---|
183 | |
---|
184 | for arg, value in arg_list: |
---|
185 | if arg in ("--header-only", "-h"): |
---|
186 | a["body"] = False |
---|
187 | elif arg in ("--body-only", "-b"): |
---|
188 | a["header"] = False |
---|
189 | elif arg in ("--number-strict", "-n"): |
---|
190 | a["number_clever"] = False |
---|
191 | elif arg in ("--delimiter-1", "-1"): |
---|
192 | a["delimiter_1"] = value |
---|
193 | elif arg in ("--delimiter-2", "-2"): |
---|
194 | a["delimiter_2"] = value |
---|
195 | else: |
---|
196 | exitNicely("Unrecognised argument provided: " + arg) |
---|
197 | |
---|
198 | if len(files) != 2: |
---|
199 | exitNicely("Must provide a minimum of two file names as command line arguments.") |
---|
200 | |
---|
201 | if a["header"] == False and a["body"] == False: |
---|
202 | exitNicely("Invalid selection: header-only and body-only cannot be selected together.") |
---|
203 | |
---|
204 | return (files, a) |
---|
205 | |
---|
206 | |
---|
207 | def main(args): |
---|
208 | "Main controller." |
---|
209 | files, arg_dict = parseArgs(args) |
---|
210 | apply(compareNA, files, arg_dict) |
---|
211 | |
---|
212 | |
---|
213 | if __name__=="__main__": |
---|
214 | |
---|
215 | args = sys.argv[1:] |
---|
216 | main(args) |
---|