source: nappy/trunk/nappy/nc_interface/nc_to_na.py @ 5368

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nc_interface/nc_to_na.py@5368
Revision 5368, 16.3 KB checked in by astephen, 11 years ago (diff)

Replace print with logging

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6nc_to_na.py
7=============
8
9Holds the class NCToNA (sub-classing CDMSToNA) that converts a NetCDF file to
10one or more NASA Ames files.
11
12"""
13
14# Imports from python standard library
15import sys
16import logging
17
18# Import from nappy package
19import nappy
20from nappy.na_error import na_error
21import nappy.utils
22import nappy.utils.common_utils
23import nappy.nc_interface.cdms_to_na
24import nappy.nc_interface.na_content_collector
25
26# Import external packages (if available)
27if sys.platform.find("win") > -1:
28    raise na_error.NAPlatformError("Windows does not support CDMS. CDMS is required to convert to CDMS objects and NetCDF.")
29
30try:
31    import cdms2 as cdms
32except:
33    try:
34        import cdms
35    except:
36        raise Exception("Could not import third-party software. Nappy requires the CDMS and Numeric packages to be installed to convert to CDMS and NetCDF.")
37
38cdms.setAutoBounds("off") 
39
40# Define global variables
41DEBUG = nappy.utils.getDebug() 
42default_delimiter = nappy.utils.getDefault("default_delimiter")
43default_float_format = nappy.utils.getDefault("default_float_format")
44comment_override_rule = nappy.utils.getDefault("comment_override_rule")
45add_column_headers = bool(nappy.utils.getDefault("add_column_headers"))
46
47config_dict = nappy.utils.getConfigDict()
48header_partitions = config_dict["header_partitions"]
49hp = header_partitions
50
51# Define global variables
52permitted_overwrite_metadata = ("DATE",  "RDATE", "ANAME", "MNAME",
53           "ONAME", "ORG", "SNAME", "VNAME", "SCOM", "NCOM")
54items_as_lists = ["DATE", "RDATE", "ANAME", "VNAME"]
55
56logging.basicConfig()
57log = logging.getLogger(__name__)
58
59class NCToNA(nappy.nc_interface.cdms_to_na.CDMSToNA):
60    """
61    Converts a NetCDF file to one or more NASA Ames files.
62    """
63
64    def __init__(self, nc_file, var_ids=None, na_items_to_override={}, 
65            only_return_file_names=False, exclude_vars=[],
66            requested_ffi=None,
67            ):
68        """
69        Sets up instance variables.
70        Typical usage is:
71        >>>    import nappy.nc_interface.nc_to_na as nc_to_na
72        >>>    c = nc_to_na.NCToNA("old_file.nc")
73        >>>    c.convert()
74        >>>    c.writeNAFiles("new_file.na", delimiter=",")
75
76        OR:
77        >>>    c = nc_to_na.NCToNA("old_file.nc")
78        >>>    file_names = c.constructNAFileNames()
79        """
80        self.nc_file = nc_file
81
82        # Now need to read CDMS file so parent class methods are compatible
83        (cdms_variables, global_attributes) = self._readCDMSFile(var_ids, exclude_vars)
84        nappy.nc_interface.cdms_to_na.CDMSToNA.__init__(self, cdms_variables, global_attributes=global_attributes, 
85                                                        na_items_to_override=na_items_to_override, 
86                                                        only_return_file_names=only_return_file_names,
87                                                        requested_ffi=requested_ffi)
88 
89
90    def _readCDMSFile(self, var_ids=None, exclude_vars=[]):
91        """
92        Reads the file and returns all the CDMS variables in a list as well
93        as the global attributes: (cdms_variable_list, global_atts_list)
94        If var_ids is defined then only get those.
95        """
96        fin = cdms.open(self.nc_file)
97        cdms_variables = []
98
99        # Make sure var_ids is a list
100        if type(var_ids) == type("string"):
101            var_ids = [var_ids]
102
103        for var_id in fin.listvariables():
104            if var_ids == None or var_id in var_ids:
105                if var_ids not in exclude_vars:
106                    cdms_variables.append(fin(var_id))
107
108        globals = fin.attributes.items()
109        return (cdms_variables, globals) 
110
111    def constructNAFileNames(self, na_file=None):
112        """
113        Works out what the file names of the output NA files will be and
114        returns a list of them.
115        """
116        self.convert()
117
118        file_names = []
119        # create file name if not given
120        if na_file == None:
121            base_name = self.nc_file
122            if base_name[-3:] == ".nc":
123                base_name = base_name[:-3]
124            na_file = base_name + ".na"
125
126        file_counter = 1
127        # Now, create some valid file names
128        for this_na_dict in self.na_dict_list:
129            if len(self.na_dict_list) == 1:
130                suffix = ""
131            else:
132                suffix = "_%s" % file_counter
133
134            # Create file name
135            name_parts = na_file.split(".")   
136            new_name = (".".join(name_parts[:-1])) + suffix + "." + name_parts[-1]
137            file_names.append(new_name)
138            file_counter += 1
139           
140        return file_names
141
142    def writeNAFiles(self, na_file=None, delimiter=default_delimiter, annotation=False,
143                     float_format=default_float_format, size_limit=None, no_header=False):
144        """
145        Writes the self.na_dict_list content to one or more NASA Ames files.
146        Output file names are based on the self.nc_file name unless specified
147        in the na_file_name argument in which case that provides the main name
148        that is appended to if multiple output file names are required.
149
150        TODO: no_header is NOT implemented.
151        """
152        self.convert() # just in case not already called
153
154        # Gets a list of NA file_names that will be produced.
155        file_names = self.constructNAFileNames(na_file)
156
157        # Set up some counters: file_counter is the expected number of files.
158        #      full_file_counter includes any files that have been split across multiple output NA files
159        #              because size_limit exceeded.
160        file_counter = 1
161        full_file_counter = 1
162
163        # Get any NASA Ames dictionary values that should be overwritten with local values
164        local_attributes = nappy.utils.getLocalAttributesConfigDict()
165        local_na_atts = local_attributes["na_attributes"]
166
167        # define final override list by using defaults then locally provided changes
168        overriders = local_na_atts
169        for (okey, ovalue) in self.na_items_to_override.items():
170            overriders[okey] = ovalue
171
172        # Now loop through writing the outputs
173        for na_dict_and_var_ids in self.na_dict_list:
174            file_name = file_names[file_counter - 1]
175            msg = "\nWriting output NASA Ames file: %s" % file_name
176            if DEBUG: log.debug(msg)
177            self.output_message.append(msg)
178
179            # Set up current na dict
180            (this_na_dict, vars_to_write) = na_dict_and_var_ids
181
182            # Override content of NASA Ames if they are permitted
183            for key in overriders.keys():
184
185                if key in permitted_overwrite_metadata:   
186                    if key in items_as_lists:
187                        new_item = overriders[key].split()                 
188                    else:
189                        new_item = overriders[key]
190
191                    # Do specific overwrite for comments by inserting lines at start
192                    if key in ("SCOM", "NCOM"):
193
194                        # Use rule defined in config file in terms of where to put new comments
195                        if comment_override_rule == "replace":
196                            comments_list = new_item[:]
197                        elif comment_override_rule in ("insert", "extend"):
198                            new_comments = new_item[:]
199                            existing_comments = this_na_dict.get(key, [])
200                            comments_list = self._cleanWrapComments(existing_comments, new_comments, key, comment_override_rule)
201                        else:
202                            raise Exception("Did not recognise comment_override_rule: " + str(comment_override_rule))
203
204                        this_na_dict[key] = comments_list
205                        this_na_dict["N%sL" % key] = len(comments_list)
206                         
207                    elif not this_na_dict.has_key(key) or new_item != this_na_dict[key]:
208                        this_na_dict[key] = new_item
209                        msg = "Metadata overwritten in output file: '%s' is now '%s'" % (key, this_na_dict[key])
210                        if DEBUG: log.debug(msg)
211                        self.output_message.append(msg)
212
213            # For certain FFIs create final Normal comments as a list of column headers before data section
214            if add_column_headers == True:
215                self._updateWithColumnHeaders(this_na_dict, delimiter)
216       
217            file_list = []
218            # Cope with size limits if specified and FFI is 1001
219            # Seems to be writing different chunks of a too long array to different na_dicts to then write to separate files.
220            if size_limit is not None and (this_na_dict["FFI"] == 1001 and len(this_na_dict["V"][0]) > size_limit):
221                files_written = self._writeNAFileSubsetsWithinSizeLimit(this_na_dict, file_name, delimiter=delimiter,
222                                                                        float_format=float_format, size_limit=size_limit,
223                                                                        annotation=annotation)
224                file_list.extend(files_written)
225
226            # If not having to split file into multiple outputs (normal condition)
227            else:               
228                log.info("Output NA file name: %s" % file_name)
229                x = nappy.openNAFile(file_name, 'w', this_na_dict)
230                x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
231                x.close()
232                file_list.append(file_name)
233
234            # Report on what has been written
235            msg = "\nWrote the following variables:" + "\n  " + ("\n  ".join(vars_to_write[0]))
236            if DEBUG: log.debug(msg)
237            self.output_message.append(msg)
238       
239            msg = ""
240            aux_var_count = vars_to_write[1]
241            if len(aux_var_count) > 0:
242                msg = "\nWrote the following auxiliary variables:" + "\n  " + ("\n  ".join(aux_var_count))     
243           
244            singleton_var_count = vars_to_write[2]
245            if len(singleton_var_count) > 0:
246                msg = "\nWrote the following Singleton variables:" + "\n  " + ("\n  ".join(singleton_var_count))
247
248            if len(file_list) > 0:
249                msg = msg + ("\n\nNASA Ames file(s) written successfully: \n%s" % "\n".join(file_list))
250
251            full_file_counter += len(file_list)
252            file_counter += 1
253
254            if DEBUG: log.debug(msg)
255            self.output_message.append(msg)
256           
257        full_file_count = full_file_counter - 1
258        if full_file_count == 1:
259            plural = ""
260        else:
261            plural = "s"             
262        msg = "\n%s file%s written." % (full_file_count, plural)
263   
264        if DEBUG: log.debug(msg)
265        self.output_message.append(msg)
266        self.output_files_written = file_list
267
268        return self.output_message
269
270    def _writeNAFileSubsetsWithinSizeLimit(self, this_na_dict, file_name, delimiter, 
271                      float_format, size_limit, annotation):
272        """
273        If self.size_limit is specified and FFI is 1001 we can chunk the output into
274        different files in a NASA Ames compliant way.
275        Returns list of file names of outputs written.
276        """
277        file_names = []
278        var_list = this_na_dict["V"]
279        array_length = len(var_list[0])
280        nvol_info = divmod(array_length, size_limit)
281        nvol = nvol_info[0]
282
283        # create the number of volumes (files) that need to be written.
284        if nvol_info[1] > 0: nvol = nvol + 1
285
286        start = 0
287        letter_count = 0
288        ivol = 0
289
290        # Loop through until full array length has been written to a set of files.
291        while start < array_length:
292            ivol = ivol + 1
293            end = start + size_limit
294
295            if end > array_length:
296                end = array_length
297
298            current_block = []
299            # Write new V array
300            for v in var_list:
301                current_block.append(v[start:end])
302
303            # Adjust X accordingly in the na dictionary, because independent variable has been reduced in size
304            na_dict_copy = nappy.utils.common_utils.modifyNADictCopy(this_na_dict, current_block, 
305                                                                      start, end, ivol, nvol)
306            # Append a letter to the file name for writing this block to
307            file_name_plus_letter = "%s-%.3d.na" % (file_name[:-3], ivol)
308            file_list.append(file_name_plus_letter)
309
310            # Write data to output file
311            x = nappy.openNAFile(file_name_plus_letter, 'w', na_dict_copy)
312            x.write(delimiter=delimiter, float_format=float_format, annotation=annotation)
313            x.close()
314
315            msg = "\nOutput files split on size limit: %s\nFilename used: %s" % (size_limit, file_name_plus_letter)
316            if DEBUG: log.debug(msg)
317            self.output_message.append(msg)
318            letter_count = letter_count + 1
319            start = end
320
321            file_names.append(file_name_plus_letter) 
322
323        return file_names
324
325
326    def _updateWithColumnHeaders(self, na_dict, delimiter):
327        """
328        Updates the NCOM and NCOML parts of the na_dict so that
329        the last normal comments line is in fact a set of headers
330        for the data section. E.g.:
331
332           UTs     Spd  Direc
333           30446.9 305  2592
334           20447.9 204  2596
335
336        The 'delimiter' argument is used to separate out the arguments.
337
338        This option is only compatible with a limited range of FFIs and
339        only works if there are no Auxiliary variables defined.
340        """
341        ffi = na_dict["FFI"]
342        compatible_ffis = (1001, 1010, 2110)
343
344        if ffi not in compatible_ffis or na_dict["NAUXV"] > 0:
345            log.warn("Column Headers are not written for FFIs other than: %s" % str(compatible_ffis))
346            return
347
348        if ffi in (1001, 2110):
349            col_names = [na_dict["XNAME"][0]]
350        elif ffi in (1010,):
351            col_names = []
352
353        col_names.extend(na_dict["VNAME"])
354        col_names_line = ",".join(col_names)
355        na_dict["NCOM"].append(col_names_line) 
356        na_dict["NNCOML"] = len(na_dict["NCOM"])
357        return 
358
359
360    def _cleanWrapComments(self, existing_comments, new_comments, key, comment_override_rule):
361        """
362        Combines new_comments with existing_comments where comments are
363        either Special or Normal. 'key' defines this being defined as either
364        "SCOM" or "NCOM". 'comment_override_rule' is either "insert" (new_comments first)
365        or "extend" (existing_comments first).
366        Returns a new list of combined_comments.
367        """
368        if existing_comments == []:   return new_comments
369        if new_comments == []:        return existing_comments
370
371        # Strip start header if used
372        c1 = key[0].lower()
373        start_line = hp[c1 + "c_start"]
374        start_used = False
375
376        if existing_comments[0] == start_line:
377            existing_comments = existing_comments[1:]
378            start_used = start_line
379       
380        # Now check last line
381        end_line = hp[c1 + "c_end"]
382        end_used = False
383
384        if existing_comments[-1] == end_line:
385            existing_comments = existing_comments[:-1]
386            end_used = end_line
387
388        # Check for alternative last line in NCOM
389        if end_used == False and key == "NCOM":
390            end_line2 = hp["data_next"]
391            if existing_comments[-1] == end_line2:
392                existing_comments = existing_comments[:-1]
393                end_used = end_line2
394       
395        # Now put back together
396        ordered_comments = [existing_comments, new_comments]
397        if comment_override_rule == "insert":
398            ordered_comments.reverse() 
399
400        combined_comments = ordered_comments[0] + ordered_comments[1]
401        if start_used:
402            combined_comments.insert(0, start_used)
403        if end_used:
404            combined_comments.append(end_used)
405   
406        return combined_comments
407
Note: See TracBrowser for help on using the repository browser.