source: nappy/trunk/nappy/nc_interface/nc_to_na.py @ 4699

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nc_interface/nc_to_na.py@4699
Revision 4699, 16.6 KB checked in by astephen, 12 years ago (diff)

Various nappy updates to improve comments and combining them with inserted comments.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6nc_to_na.py
7=============
8
9Holds the class NCToNA (sub-classing CDMSToNA) that converts a NetCDF file to
10one or more NASA Ames files.
11
12"""
13
14# Imports from python standard library
15import sys
16
17# Import from nappy package
18import nappy
19from nappy.na_error import na_error
20import nappy.utils
21import nappy.utils.common_utils
22import nappy.nc_interface.cdms_to_na
23import nappy.nc_interface.na_content_collector
24
25# Import external packages (if available)
26if sys.platform.find("win") > -1:
27    raise na_error.NAPlatformError("Windows does not support CDMS. CDMS is required to convert to CDMS objects and NetCDF.")
28
29try:
30    import cdms2 as cdms
31except:
32    try:
33        import cdms
34    except:
35        raise Exception("Could not import third-party software. Nappy requires the CDMS and Numeric packages to be installed to convert to CDMS and NetCDF.")
36
37cdms.setAutoBounds("off") 
38
39# Define global variables
40DEBUG = nappy.utils.getDebug() 
41default_delimiter = nappy.utils.getDefault("default_delimiter")
42default_float_format = nappy.utils.getDefault("default_float_format")
43comment_override_rule = nappy.utils.getDefault("comment_override_rule")
44add_column_headers = bool(nappy.utils.getDefault("add_column_headers"))
45
46config_dict = nappy.utils.getConfigDict()
47header_partitions = config_dict["header_partitions"]
48hp = header_partitions
49
50# Define global variables
51permitted_overwrite_metadata = ("DATE",  "RDATE", "ANAME", "MNAME",
52           "ONAME", "ORG", "SNAME", "VNAME", "SCOM", "NCOM")
53items_as_lists = ["DATE", "RDATE", "ANAME", "VNAME"]
54
55
56class NCToNA(nappy.nc_interface.cdms_to_na.CDMSToNA):
57    """
58    Converts a NetCDF file to one or more NASA Ames files.
59    """
60
61    def __init__(self, nc_file, var_ids=None, na_items_to_override={}, 
62            only_return_file_names=False, exclude_vars=[],
63            requested_ffi=None,
64            ):
65        """
66        Sets up instance variables.
67        Typical usage is:
68        >>>    import nappy.nc_interface.nc_to_na as nc_to_na
69        >>>    c = nc_to_na.NCToNA("old_file.nc")
70        >>>    c.convert()
71        >>>    c.writeNAFiles("new_file.na", delimiter=",")
72
73        OR:
74        >>>    c = nc_to_na.NCToNA("old_file.nc")
75        >>>    file_names = c.constructNAFileNames()
76        """
77        self.nc_file = nc_file
78
79        # Now need to read CDMS file so parent class methods are compatible
80        (cdms_variables, global_attributes) = self._readCDMSFile(var_ids, exclude_vars)
81        nappy.nc_interface.cdms_to_na.CDMSToNA.__init__(self, cdms_variables, global_attributes=global_attributes, 
82                                                        na_items_to_override=na_items_to_override, 
83                                                        only_return_file_names=only_return_file_names,
84                                                        requested_ffi=requested_ffi)
85 
86
87    def _readCDMSFile(self, var_ids=None, exclude_vars=[]):
88        """
89        Reads the file and returns all the CDMS variables in a list as well
90        as the global attributes: (cdms_variable_list, global_atts_list)
91        If var_ids is defined then only get those.
92        """
93        fin = cdms.open(self.nc_file)
94        cdms_variables = []
95
96        # Make sure var_ids is a list
97        if type(var_ids) == type("string"):
98            var_ids = [var_ids]
99
100        for var_id in fin.listvariables():
101            if var_ids == None or var_id in var_ids:
102                if var_ids not in exclude_vars:
103                    cdms_variables.append(fin(var_id))
104
105        globals = fin.attributes.items()
106        return (cdms_variables, globals) 
107
108    def constructNAFileNames(self, na_file=None):
109        """
110        Works out what the file names of the output NA files will be and
111        returns a list of them.
112        """
113        self.convert()
114
115        file_names = []
116        # create file name if not given
117        if na_file == None:
118            base_name = self.nc_file
119            if base_name[-3:] == ".nc":
120                base_name = base_name[:-3]
121            na_file = base_name + ".na"
122
123        file_counter = 1
124        # Now, create some valid file names
125        for this_na_dict in self.na_dict_list:
126            if len(self.na_dict_list) == 1:
127                suffix = ""
128            else:
129                suffix = "_%s" % file_counter
130
131            # Create file name
132            name_parts = na_file.split(".")   
133            new_name = (".".join(name_parts[:-1])) + suffix + "." + name_parts[-1]
134            file_names.append(new_name)
135            file_counter += 1
136           
137        return file_names
138
139    def writeNAFiles(self, na_file=None, delimiter=default_delimiter, annotation=False,
140                     float_format=default_float_format, size_limit=None, no_header=False):
141        """
142        Writes the self.na_dict_list content to one or more NASA Ames files.
143        Output file names are based on the self.nc_file name unless specified
144        in the na_file_name argument in which case that provides the main name
145        that is appended to if multiple output file names are required.
146
147        TODO: no_header is NOT implemented.
148        """
149        self.convert() # just in case not already called
150
151        # Gets a list of NA file_names that will be produced.
152        file_names = self.constructNAFileNames(na_file)
153
154        # Set up some counters: file_counter is the expected number of files.
155        #      full_file_counter includes any files that have been split across multiple output NA files
156        #              because size_limit exceeded.
157        file_counter = 1
158        full_file_counter = 1
159
160        # Get any NASA Ames dictionary values that should be overwritten with local values
161        local_attributes = nappy.utils.getLocalAttributesConfigDict()
162        local_na_atts = local_attributes["na_attributes"]
163
164        # define final override list by using defaults then locally provided changes
165        overriders = local_na_atts
166        for (okey, ovalue) in self.na_items_to_override.items():
167            overriders[okey] = ovalue
168
169        # Now loop through writing the outputs
170        for na_dict_and_var_ids in self.na_dict_list:
171            file_name = file_names[file_counter - 1]
172            msg = "\nWriting output NASA Ames file: %s" % file_name
173            if DEBUG: print msg
174            self.output_message.append(msg)
175
176            # Set up current na dict
177            (this_na_dict, vars_to_write) = na_dict_and_var_ids
178
179            # Override content of NASA Ames if they are permitted
180            for key in overriders.keys():
181
182                if key in permitted_overwrite_metadata:   
183                    if key in items_as_lists:
184                        new_item = overriders[key].split()                 
185                    else:
186                        new_item = overriders[key]
187
188                    # Do specific overwrite for comments by inserting lines at start
189                    if key in ("SCOM", "NCOM"):
190
191                        # Use rule defined in config file in terms of where to put new comments
192                        if comment_override_rule == "replace":
193                            comments_list = new_item[:]
194                        elif comment_override_rule in ("insert", "extend"):
195                            comments_list = self._cleanWrapComments(existing_comments, new_comments, key, comment_override_rule)
196                        else:
197                            raise Exception("Did not recognise comment_override_rule: " + str(comment_override_rule))
198
199                        """elif comment_override_rule == "insert":
200                            new_comments = new_items[:]
201                            existing_comments = this_na_dict.get(key, [])
202                            comments_list = self._cleanWrapComments(existing_comments, new_comments, key, comment_override_rule)
203                            #comments_list = new_item[:] + this_na_dict.get(key, [])
204                        elif comment_override_rule == "extend":
205                            comments_list = this_na_dict.get(key, []) + new_item[:]  """
206
207                        this_na_dict[key] = comments_list
208                        this_na_dict["N%sL" % key] = len(comments_list)
209                        #print "COMMMENTS:", comments_list
210                        #print "Added to comments:", key
211                         
212                    elif not this_na_dict.has_key(key) or new_item != this_na_dict[key]:
213                        this_na_dict[key] = new_item
214                        msg = "Metadata overwritten in output file: '%s' is now '%s'" % (key, this_na_dict[key])
215                        if DEBUG: print msg
216                        self.output_message.append(msg)
217
218            # For certain FFIs create final Normal comments as a list of column headers before data section
219            if add_column_headers == True:
220                self._updateWithColumnHeaders(this_na_dict, delimiter)
221       
222            file_list = []
223            # Cope with size limits if specified and FFI is 1001
224            # Seems to be writing different chunks of a too long array to different na_dicts to then write to separate files.
225            if size_limit is not None and (this_na_dict["FFI"] == 1001 and len(this_na_dict["V"][0]) > size_limit):
226                files_written = self._writeNAFileSubsetsWithinSizeLimit(this_na_dict, file_name, delimiter=delimiter,
227                                                                        float_format=float_format, size_limit=size_limit,
228                                                                        annotation=annotation)
229                file_list.extend(files_written)
230
231            # If not having to split file into multiple outputs (normal condition)
232            else:               
233                x = nappy.openNAFile(file_name, 'w', this_na_dict)
234                x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
235                x.close()
236                file_list.append(file_name)
237
238            # Report on what has been written
239            msg = "\nWrote the following variables:" + "\n  " + ("\n  ".join(vars_to_write[0]))
240            if DEBUG: print msg
241            self.output_message.append(msg)
242       
243            msg = ""
244            aux_var_count = vars_to_write[1]
245            if len(aux_var_count) > 0:
246                msg = "\nWrote the following auxiliary variables:" + "\n  " + ("\n  ".join(aux_var_count))     
247           
248            singleton_var_count = vars_to_write[2]
249            if len(singleton_var_count) > 0:
250                msg = "\nWrote the following Singleton variables:" + "\n  " + ("\n  ".join(singleton_var_count))
251
252            if len(file_list) > 0:
253                msg = msg + ("\n\nNASA Ames file(s) written successfully: \n%s" % "\n".join(file_list))
254
255            full_file_counter += len(file_list)
256            file_counter += 1
257
258            if DEBUG: print msg
259            self.output_message.append(msg)
260           
261        full_file_count = full_file_counter - 1
262        if full_file_count == 1:
263            plural = ""
264        else:
265            plural = "s"             
266        msg = "\n%s file%s written." % (full_file_count, plural)
267   
268        if DEBUG: print msg
269        self.output_message.append(msg)
270        self.output_files_written = file_list
271
272        return self.output_message
273
274    def _writeNAFileSubsetsWithinSizeLimit(self, this_na_dict, file_name, delimiter, 
275                      float_format, size_limit, annotation):
276        """
277        If self.size_limit is specified and FFI is 1001 we can chunk the output into
278        different files in a NASA Ames compliant way.
279        Returns list of file names of outputs written.
280        """
281        file_names = []
282        var_list = this_na_dict["V"]
283        array_length = len(var_list[0])
284        nvol_info = divmod(array_length, size_limit)
285        nvol = nvol_info[0]
286
287        # create the number of volumes (files) that need to be written.
288        if nvol_info[1] > 0: nvol = nvol + 1
289
290        start = 0
291        letter_count = 0
292        ivol = 0
293
294        # Loop through until full array length has been written to a set of files.
295        while start < array_length:
296            ivol = ivol + 1
297            end = start + size_limit
298
299            if end > array_length:
300                end = array_length
301
302            current_block = []
303            # Write new V array
304            for v in var_list:
305                current_block.append(v[start:end])
306
307            # Adjust X accordingly in the na dictionary, because independent variable has been reduced in size
308            na_dict_copy = nappy.utils.common_utils.modifyNADictCopy(this_na_dict, current_block, 
309                                                                      start, end, ivol, nvol)
310            # Append a letter to the file name for writing this block to
311            file_name_plus_letter = "%s-%.3d.na" % (file_name[:-3], ivol)
312            file_list.append(file_name_plus_letter)
313
314            # Write data to output file
315            x = nappy.openNAFile(file_name_plus_letter, 'w', na_dict_copy)
316            x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
317            x.close()
318
319            msg = "\nOutput files split on size limit: %s\nFilename used: %s" % (size_limit, file_name_plus_letter)
320            if DEBUG: print msg
321            self.output_message.append(msg)
322            letter_count = letter_count + 1
323            start = end
324
325            file_names.append(file_name_plus_letter) 
326
327        return file_names
328
329
330    def _updateWithColumnHeaders(self, na_dict, delimiter):
331        """
332        Updates the NCOM and NCOML parts of the na_dict so that
333        the last normal comments line is in fact a set of headers
334        for the data section. E.g.:
335
336           UTs     Spd  Direc
337           30446.9 305  2592
338           20447.9 204  2596
339
340        The 'delimiter' argument is used to separate out the arguments.
341
342        This option is only compatible with a limited range of FFIs and
343        only works if there are no Auxiliary variables defined.
344        """
345        ffi = na_dict["FFI"]
346        compatible_ffis = (1001, 1010, 2110)
347
348        if ffi not in compatible_ffis or na_dict["NAUXV"] > 0:
349            print "WARNING: Column Headers are not written for FFIs other than: " + str(compatible_ffis)
350            return
351
352        if ffi in (1001, 2110):
353            col_names = [na_dict["XNAME"][0]]
354        elif ffi in (1010,):
355            col_names = []
356
357        col_names.extend(na_dict["VNAME"])
358        col_names_line = ",".join(col_names)
359        na_dict["NCOM"].append(col_names_line) 
360        na_dict["NNCOML"] = len(na_dict["NCOM"])
361        return 
362
363
364    def _cleanWrapComments(self, existing_comments, new_comments, key, comment_override_rule):
365        """
366        Combines new_comments with existing_comments where comments are
367        either Special or Normal. 'key' defines this being defined as either
368        "SCOM" or "NCOM". 'comment_override_rule' is either "insert" (new_comments first)
369        or "extend" (existing_comments first).
370        Returns a new list of combined_comments.
371        """
372        # Strip start header if used
373        c1 = key[0].lower()
374        start_line = hp[c1 + "c_start"]
375        start_used = False
376
377        if existing_comments[0] == start_line:
378            existing_comments = existing_comments[1:]
379            start_used = start_line
380       
381        # Now check last line
382        end_line = hp[c1 + "c_end"]
383        end_used = False
384
385        if existing_comments[-1] == end_line:
386            existing_comments = existing_comments[:-1]
387            end_used = end_line
388
389        # Check for alternative last line in NCOM
390        if end_used == False and key == "NCOM":
391            end_line2 = hp["data_next"]
392            if existing_comments[-1] == end_line2:
393                existing_comments = existing_comments[:-1]
394                end_used = end_line2
395       
396        # Now put back together
397        ordered_comments = [existing_comments, new_comments]
398        if comment_override_rule == "insert":
399            ordered_comments.reverse() 
400
401        combined_comments = ordered_comments[0] + ordered_comments[1]
402        if start_used:
403            combined_comments.insert(0, start_used)
404        if end_used:
405            combined_comments.append(end_used)
406   
407        return combined_comments
408
Note: See TracBrowser for help on using the repository browser.