source: nappy/trunk/nappy/nappy_api.py @ 3616

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/nappy/trunk/nappy/nappy_api.py@3616
Revision 3616, 17.5 KB checked in by astephen, 12 years ago (diff)

Massively changed first proper attempt at API.

Line 
1"""
2nappy_api.py
3============
4
5Top-level API module that allows user to access most of the useful stuff in
6nappy. API examples:
7
8 1. Working with NASA Ames file objects
9 2. Converting between formats (NASA Ames, NetCDF and CSV)
10 3. Comparing NASA Ames files (and/or CSV files)
11 4. General NASA Ames utilities
12
13 1. Working with NASA Ames file objects
14
15# Start python interactive shell
16$ python
17
18# Import the nappy package
19import nappy
20
21# Let's open a NASA Ames file and examine its contents
22f = nappy.openNAFile("data_files/2010.na")
23
24# Get number of header lines
25n_lines = f.getNumHeaderLines()
26
27# Get Organisation from header
28org = f.getOrg()
29# Get the Normal Comments (SCOM) lines.
30norm_comms = f.getNormalComments()
31
32# Get the Special Comments (SCOM) lines.
33spec_comms = f.getSpecialComments()
34
35# Get a list of metadata for all main (non-auxiliary or independent) variables
36var_list = getVariables()
37
38# Get Auxiliary variable metadata for auxiliary variable number 2
39(variable, units, miss, scale) = f.getAuxVariable(2)
40
41# Get scale factor for primary variable number 3
42scale_factor = f.getScaleFactor(3)
43
44# Get missing value for primary variable number 1
45missing = f.getMissingValue(1)
46
47# Let's get the contents dictionary of the whole file
48na_dict = f.getNADict()
49
50# Let's write the na_dict object to a new NASA Ames file
51fout = openNAFile("test_outputs/mytest.na", mode="w", na_dict=na_dict)
52fout.write()
53fout.close()
54
55 2. Converting between formats (NASA Ames, NetCDF and CSV)
56
57# Let's convert a NASA Ames file into a NetCDF file, and add some of our own global attributes
58glob_atts = {"Project": "Really important scientific project involving worms",
59             "Errata": "I meant worm holes!"}
60na_file = "data_files/1020.na"
61nc_file = "test_outputs/try_1020.nc"
62nappy.convertNAToNC(na_file, nc_file, global_attributes=glob_atts)
63
64# Let's convert a NASA Ames file to a CSV and add an annotation column to explain the header
65nappy.convertNAToCSV(na_file, annotation=True)
66
67# Let's read a NetCDF and write one (or more) output NASA Ames files,
68# but only including and variables "temp" and "ozone". Also let's write
69# the output using tabs as the delimiters and a float format of "%6.3f".
70nappy.convertNCToNA("data_files/test1.nc", "test_outputs/test1nc.na",
71              var_ids=("temp", "ozone"), delimiter="\t", float_format="%6.3f")
72 
73# Let's convert a NetCDF file to one (or more) CSV files and don't write the header at all
74nappy.convertNCToCSV("data_files/test1.nc", "test_outputs/test1nc_no_header.csv",
75                     no_header=True)
76
77# Let's take some in-memory CDMS objects and write them to one, or more, NASA Ames file(s).
78# We need to give it a list of cdms variables and a global attributes dictionary.
79# We also want to instruct nappy to overwrite the content of its
80# MNAME (Mission Name) header line with our specific mission name.
81# Also, tell nappy to write the output to NASA Ames file format index (FFI) 2310
82# because we know it is compatible.
83nappy.convertCDMSObjectsToNA([cdms_var_1, cdms_var_2], {"Institute": "British Atmospheric Data Centre"},
84              na_file="test_outputs/cdms_to_na.na",
85              na_items_to_override={"MNAME": "Atlantic Divergence Mission 2009"},
86              requested_ffi=2310)
87
88# Let's take a list of cdms variables and a global attributes dictionary and write
89# them to a CSV file.
90nappy.convertCDMSObjectsToCSV(cdms_vars, global_atts_dict, csv_file)
91
92# Let's take a NASA Ames dictionary object, and write it to a NetCDF file
93nappy.writeNADictToNC(na_dict, nc_file, mode="w")
94
95# Let's try and write a second na_dict object to the same NetCDF file using mode="a".
96nappy.writeNADictToNC(na_dict_2, nc_file, mode="a")
97
98# Now let's read in a NASA Ames file and convert the contents in-memory into
99# CDMS objects so that we can manipulate them with NetCDF-compatible tools
100(cdms_vars_primary, cdms_vars_aux, global_attributes) = nappy.readCDMSObjectsFromNA(na_file)
101
102# Actually, I only want to get a single variable from that file, so I'll try
103temp_var = getCDMSVariableFromNA(na_file, "temperature")
104
105 3. Comparing NASA Ames files (and/or CSV files)
106
107# I'd like to compare a NASA Ames and CSV file to check they are the same.
108# It will allow for different formatting of numbers as long as the values
109# are the same. Compare both header and body by setting as True (default).
110result = nappy.compNAFiles(na_file, csv_file, header=True, body=True,
111            number_clever=True, delimiter_1="    ", delimiter_2=",")
112
113 4. General NASA Ames utilities
114
115"""
116
117# Import standard library modules
118
119# Import third-party software
120try:
121    import cdms
122except:
123    print "WARNING: You cannot use NAPpy's NetCDF conversion tools as your system does not have CDMS installed, or it is not in your sys.path."
124    cdms = False
125
126# Import local modules
127import nappy.utils
128
129# Bring some utils into the API
130compareNAFiles = nappy.utils.compareNAFiles
131readFFI = nappy.utils.readFFI
132chooseFFI = nappy.utils.chooseFFI
133getNAFileClass = nappy.utils.getNAFileClass
134__version__ = nappy.utils.getVersion()
135
136
137def openNAFile(filename, mode="r", na_dict=None):
138    """
139    Function wrapper around the NASA Ames File classes. Any NASA Ames
140    file can be opened through this function and the appropriate read or
141    write NASA Ames File class instance is returned.
142    """
143    if mode == "r":
144        ffi = readFFI(filename)
145        return apply(getNAFileClass(ffi), (filename, mode))
146
147    elif mode == "w":
148        if na_dict.has_key('FFI') and type(na_dict['FFI']) == type(3):
149            ffi = na_dict['FFI']
150        else:
151            ffi = chooseFFI(na_dict)
152            na_dict['FFI'] = ffi
153            print "\nFormat identified as:", ffi   
154        return apply(getNAFileClass(ffi), (filename,), {"mode":mode, "na_dict":na_dict})
155    else:
156        raise Exception("File mode not recognised '" + mode + "'.")
157
158
159def convertNAToNC(na_file, nc_file=None, mode="w", variables=None, aux_variables=None,
160                 global_attributes={"Conventions":"CF-1.0"},
161                 time_units=None, time_warning=True,
162                 rename_variables={}):
163    """
164    Takes a NASA Ames file and converts to a NetCDF file. Options are:
165
166    na_file - the input NASA Ames file.
167    nc_file - name for the output NetCDF file (default is to replace ".na" from NASA Ames
168              file with ".nc").
169    mode - is the file mode, either "w" for write or "a" for append
170    variables - is a list of variable names that you wish to be converted. If not set then
171              nappy will attempt to convert all files.
172    aux_var_list - is a list of auxiliary variables names that you wish to be converted.
173              If not set then nappy will use any compatible variables it finds as
174              auxiliary variables.
175    global_attributes - is a dictionary of global attributes to add to the output file.
176    rename_variables - is a dictionary of {old_name: new_name} variable ID pairs that nappy
177              should use to rename variables before it writes them to file. 
178    time_units - is a valid time units string such as "hours since 2003-04-30 10:00:00" to
179              use for time units if there is a valid time axis.
180    time_warning - suppresses the time units warning for invalid time units if set to False.
181    """
182    arg_dict = vars()
183    for arg_out in ("nc_file", "mode"):
184        del arg_dict[arg_out]
185
186    convertor = apply(nappy.nc_convertor.na_to_nc.NAToNC, [], arg_dict)
187    convertor.convert()
188    if nc_file == None:
189        nc_file = nappy.utils.getFileNameWithNewExtension(na_file, "nc")
190    convertor.writeNCFile(nc_file, mode)
191    print "SHould this return nc file path?"
192    return True   
193 
194
195def convertNAToCSV(na_file, csv_file=None, annotation=False, no_header=False):
196    """
197    Reads in a NASA Ames file and writes it out a new CSV file which is identical to the
198    input file except that commas are used as the delimiter. Arguments are:
199
200    na_file - NASA Ames file path
201    csv_file - CSV file path (default is to replace ".na" from NASA Ames file with ".csv").
202    annotation - if set to True write the output file with an additional left-hand column
203                 describing the contents of each header line.
204    no_header - if set to True then only the data blocks are written to file.
205    """
206    fin = openNAFile(na_file)
207    fin.readData()
208    na_dict = fin.getNADict()
209    fin.close()
210
211    if csv_file == None:
212        csv_file = nappy.utils.getFileNameWithNewExtension(nc_file, "csv")
213    fout = openNAFile(csv_file, "w", na_dict=na_dict)
214    fout.write(delimiter=",", annotation=annotation)
215    fout.close()
216    return True
217
218
219def convertNCToNA(nc_file, na_file=None, var_ids=None, na_items_to_override={},
220            only_return_file_names=False, exclude_vars=[],
221            requested_ffi=None, delimiter="    ", float_format="%g", 
222            size_limit=None, annotation=False, no_header=False):
223    """
224    Takes a NetCDF file and converts the contents to one or more NASA Ames files.
225    Arguments are:
226
227    nc_file - is the name of input file (NetCDF).
228    na_file - is the name of output file (default is to replace ".nc" from NASA Ames
229              file with ".na"). If multiple files produced then this name will be used
230              as the base name.
231    var_ids - is a list of variables (as ids) to include in the output file(s).
232    na_items_to_override - is a dictionary of {key: value} pairs to overwrite in output
233              files. Typically the keys are in: 
234              ("DATE", "RDATE", "ANAME", "MNAME","ONAME", "ORG", "SNAME", "VNAME".)
235    only_return_file_names - if set to True then only return a list of file names that
236              would be written (i.e. don't convert actual file).
237    exclude_vars - is a list of variables (as ids) to exclude in the output file(s).
238    requested_ffi - is the NASA Ames File Format Index (FFI) you wish to write to. Note
239              that there are only limited options available depending on the data
240              structures found.
241    delimiter - the delimiter you wish to use between data items in the output file such
242              as "   ", "\t" or ",".
243    float_format - a python formatting string such as "%s", "%g" or "%5.2f" used for
244              formatting floats when written to file.
245    size_limit - if format FFI is 1001 then chop files up into size_limit rows of data.
246    annotation - if set to True write the output file with an additional left-hand column
247              describing the contents of each header line.
248    no_header - if set to True then only the data blocks are written to file.
249    """
250    arg_dict = vars()
251    for arg_out in ("na_file", "only_return_file_names", "delimiter", "float_format", 
252                    "size_limit", "annotation", "no_header"):
253        del arg_dict[arg_out]
254
255    if na_file == None:
256        na_file =  nappy.utils.getFileNameWithNewExtension(nc_file, "na")
257
258    convertor = apply(nappy.nc_convertor.nc_to_na.NCToNA, [], arg_dict)
259    convertor.convert()
260
261    # If user only wants files then only give them that
262    if only_return_file_names == True:
263        return convertor.constructNAFileNames(na_file)
264    else:
265        convertor.writeNAFiles(na_file, delimiter=delimiter, float_format=float_format, 
266                               size_limit=size_limit, annotation=annotation, no_header=no_header)
267        print convertor.output_message
268        print "Should NCToNA return a list of the na file names produced?"
269        return True   
270
271   
272def convertNCToCSV(nc_file, csv_file=None, **arg_dict):
273    """
274    Reads in a NetCDF file and writes the data out to a CSV file following the
275    NASA Ames standard.
276    """
277    if csv_file == None:
278        csv_file = nappy.utils.getFileNameWithNewExtension(nc_file, "csv")
279        arg_dict["na_file"] = csv_file
280        arg_dict["delimiter"] = ","
281 
282    return apply(convertNCToNA, [nc_file], arg_dict)
283   
284
285def convertCDMSObjectsToNA(cdms_vars, global_atts_dict, na_file, 
286              na_items_to_override={}, requested_ffi=None, delimiter="    ", 
287              float_format="%g", size_limit=None, annotation=False, no_header=False):
288    """
289    Takes a list of cdms variables and a global attributes dictionary and
290    writes them to one or more NASA Ames files. Arguments are:
291 
292    cdms_vars - is a list of CDMS variables
293    global_atts_dict - is a dictionary of {key: value} pairs for header
294    na_items_to_override - is a dictionary of {key: value} pairs to overwrite in
295                output files. Typically the keys are in: 
296                ("DATE", "RDATE", "ANAME", "MNAME","ONAME", "ORG", "SNAME", "VNAME".)
297    requested_ffi - is the NASA Ames File Format Index (FFI) you wish to write to.
298                Note that there are only limited options available depending on the data
299                structures found.
300    delimiter - the delimiter you wish to use between data items in the output file
301                such as "   ", "\t" or ",".
302    float_format - a python formatting string such as "%s", "%g" or "%5.2f" used for
303                formatting floats when written to file.
304    size_limit - if format FFI is 1001 then chop files up into size_limit rows of data.
305    annotation - if set to True write the output file with an additional left-hand
306                column describing the contents of each header line.
307    no_header - if set to True then only the data blocks are written to file.
308    """
309    convertor = nappy.nc_interface.cdms_to_na.CDMSToNA(cdms_vars, global_atts=global_atts_dict, 
310                       na_items_to_override=na_items_to_override, requested_ffi=requested_ffi)
311    convertor.convert()
312    na_files = convertor.writeNAFiles(na_file, delimiter=delimiter, float_format=float_format, 
313                                      annotation=annotation, no_header=no_header) 
314    print "SHOULD WE RETURN FILES WRITTEM????"
315    return True
316
317
318def convertCDMSObjectsToCSV(cdms_vars, global_atts_dict, csv_file, **arg_dict):
319    """
320    Takes a list of cdms variables and a global attributes dictionary and
321    writes them to one or more CSV files.
322    """
323    arg_dict["delimiter"] = ","
324    return apply(convertCDMSObjectsToNA, [cdms_vars, global_atts_dict, csv_file], arg_dict)
325
326
327def writeNADictToNC(na_dict, nc_file, mode="w"):
328    """
329    Writes an NASA Ames dictionary object called na_dict to a NetCDF file called nc_file.
330    Can set mode="a" or mode="w" to either append to existing nc_file or write new one.
331    Note that mode="a" might not always work.
332    """
333    # Note, this needs to pretend that the na_dict exists, do this by instantiating NACore and cheating...
334    na_file_obj = nappy.na_file.na_core.NACore()
335    na_file_obj.setNADict(na_dict)
336    # Fake up some required methods
337    def fakeCaller():pass
338    na_file_obj.readData = fakeCaller
339    convertor = nappy.na_to_cdms.NAToCDMS(na_file_obj)
340    (cdms_primary_vars, cdms_aux_vars, global_attributes) = convertor.convert()
341
342    # Now write them out
343    fout = cdms.open(nc_file, mode=mode)
344    for var in (cdms_primary_vars + cdms_aux_vars):
345        fout.write(var)
346
347    # Write global attributes
348    for (att, value) in global_attributes.items():
349        setattr(fout, att, value)
350       
351    fout.close()
352    print "NetCDF file '%s' written successfully." % file_name
353    return True
354
355
356def readCDMSObjectsFromNA(na_file):
357    """
358    Reads the NASA Ames file and converts to CDMS objects.
359    Returns a tuple containing:
360      * a list of primary NASA Ames variables as CDMS variables
361      * a list of auxiliary NASA Ames variables as CDMS variables,
362      * a dictionary of global attributes
363    """
364    cdms_var_list = []
365    global_attributes = {}
366
367    # Open the NA file
368    na_file_obj = openNAFile(na_file)
369    convertor = nappy.nc_interface.na_to_cdms.NADictToCdmsObjects(na_file_obj)
370    (cdms_vars_primary, cdms_vars_aux, global_attributes) = convertor.convert()
371    return (cdms_vars_primary, cdms_vars_aux, global_attributes)
372
373
374def getCDMSVariableFromNA(na_file, var):
375    """
376    Returns a CDMS variable object (TransientVariable) identified by the var argument which
377    can either be an integer index in the list of variables or the name of the variable.
378    The variable is created from the variables found in the NASA Ames file na_file.
379    """
380    na_file_obj = openNAFile(na_file)
381    convertor = nappy.nc_interface.na_to_cdms.NADictToCdmsObjects(na_file_obj, variables=[var])
382    (cdms_primary_vars, cdms_aux_vars, global_attributes) = convertor.convert()
383    # Must now be a primary var
384    return cdms_primary_vars[0]
385
386
387"""
388
389MOVE TO UTILS AND FIX STRING TWOERS
390def getFileNameWithNewExtension(input_file, format):
391    ""
392    Takes an input_file name and applies new extension to it by:
393    (i) replacing initial extension if there is one, OR
394    (ii) just appending new extension.
395    ""
396    base_name = input_file
397    last_four = base_name[-4:]
398    found = last_four.find(".")
399    if found > -1:
400        idx = len(base_name) + found
401        base_name = base_name[:idx]
402    return base_name + "." + format
403
404TODO:
405Need to test all arguments independently and in combinations.
406 * mode = "a"???
407compareNAFiles - do it
408  Check comparing 2001.na and 2001.csv and see why it falls over.
409Error classes
410Command-line utils, are they in-line with all changes?
411Make sure annotation is callable at every level of the system.
412
413 * add no_header = True on write and for all write wrappers.
414* Need uniform float_format and delimiter throughout main code so consistent.
415"""
Note: See TracBrowser for help on using the repository browser.