source: TI02-CSML/trunk/csml/csmllibs/csmlfiles.py @ 2222

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/csmllibs/csmlfiles.py@2222
Revision 2222, 9.9 KB checked in by domlowe, 13 years ago (diff)

fixed bug with duplicate files appearing in onetoseveral mapping

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1#version 0.1
2#Dominic Lowe, BADC 8th December 2005
3#import csmllibs
4import sys
5import csml.csmllibs.csmldirectory
6import csml.csmllibs.csmldataiface
7
8
9class FileMapMaker(object):
10    ''' class which handles the lower level featurefilemap and directorytree classes
11    to build a featurefilemap object of featuretypes/files.
12    current relationships are onetomany, onetoone and mixedft,
13    but others could be created '''
14    def __init__(self,topdirectory, ftorftdictionary):
15            self.topdir=topdirectory
16            if type(ftorftdictionary) is str:
17                    self.ft = ftorftdictionary
18            elif type(ftorftdictionary) is dict:
19                    self.ftdictionary=ftorftdictionary
20            else:
21                    #wrong type
22                    print 'FileMapMaker takes a  featuretype string or dictionary'
23                    sys.exit()
24   
25    def onetomany(self):
26        ''' onetomany means one file per directory/subdirectory is the representative file '''
27        tree = csml.csmllibs.csmldirectory.DirectoryTree()
28        tree.setTopDirectory(self.topdir)
29        tree.readDirectory()
30        subdirs = tree.getSubDirectories()
31        ffmap = csml.csmllibs.csmlfeaturefilemap.FeatureFileMap()
32        for dir in subdirs:
33                file=tree.getFirstInSubDir(dir)
34                if file == None:
35                        continue
36                #create representative file
37                repfile=csml.csmllibs.csmlfeaturefilemap.representativeFile(file)
38                repfile.setFeatureType(self.ft)
39                try:
40                        otherfiles=tree.getAllCSMLFilesExceptFirst(dir)
41                        errtest=otherfiles[0]
42                except:
43                        print 'no other csml files in this directory'
44                        #add representative file with no related files.
45                        ffmap.addRepresentativeFile(repfile)
46                        continue
47               
48                #add otherfiles as related files with feature type, ft
49                #note they could have individual featuretypes.
50
51                for f in otherfiles:
52                        relfile = csml.csmllibs.csmlfeaturefilemap.relatedFile(f)
53                        relfile.setFeatureType(self.ft)
54                        repfile.addRelatedFile(relfile)
55                ffmap.addRepresentativeFile(repfile)
56        return ffmap
57
58    def onetoseveral(self):
59        ''' like one to many, but a directory may contain several representative files e.g. 5 files containing one (or more) feature(s), and then 5 files containing one (or more) different feature(s) etc. Need to examine the contents of each file to see if it's like another. Might be slow on large datsets as it examines the files individually. '''
60        tree = csml.csmllibs.csmldirectory.DirectoryTree()
61        tree.setTopDirectory(self.topdir)
62        tree.readDirectory()
63        subdirs = tree.getSubDirectories()
64        ffmap = csml.csmllibs.csmlfeaturefilemap.FeatureFileMap()
65       
66        def __lookForSame(repFile, otherfiles):
67            '''recursively looks in list of otherfiles for files that match file in terms of dimensions (name + shape) and variables)'''
68            #this section compares the dimensions (name and shape) and variables in the other files with this representative file
69            repShapes=[]
70            DI=csml.csmllibs.csmldataiface.DataInterface()
71            DI=DI.getUnknownInterfaceType(repFile.getRepresentativeFileName())
72            repName=repFile.getRepresentativeFileName()
73            DI.openFile(repName)
74            repVars=DI.getListofVariables()
75            tmpDims=DI.getListOfAxes()
76            repDims=[]
77            for dim in tmpDims:
78                if dim[:5] == 'axis_':  #this is just to ignore the autogenerated axis_n dimensions used by cdms...
79                    pass
80                else:
81                    repDims.append(dim)
82            for dim in repDims:
83                DI.setAxis(dim)
84                print DI.file
85                repShapes.append(DI.getSizeOfAxis(dim))
86                print dim
87                print DI.getSizeOfAxis(dim)
88            DI.closeFile()
89            nextotherfiles=[]
90            otherfilestemp=[]
91            for otherfile in otherfiles:
92                if otherfile == repName:
93                    pass
94                else:
95                    otherfilestemp.append(otherfile)
96            otherfiles = otherfilestemp
97            for otherfile in otherfiles:
98                compareShapes=[]
99                DI=csml.csmllibs.csmldataiface.DataInterface()
100                DI=DI.getUnknownInterfaceType(otherfile)
101                DI.openFile(otherfile)
102                compareVars=DI.getListofVariables()
103                tmpDims=DI.getListOfAxes()
104                compareDims=[]
105                for dim in tmpDims:
106                    if dim[:5] == 'axis_':  #again, skip the auto axis as they aren't a reliable measure
107                        pass
108                    else:
109                        compareDims.append(dim)
110                for dim in compareDims:
111                    DI.setAxis(dim)
112                    compareShapes.append(DI.getSizeOfAxis(dim))
113                if compareShapes + compareVars + compareDims == repShapes + repVars + repDims:
114                    #found a match, add relatedFile to representativeFile
115                    #print 'match between %s and %s'%(repFile.getRepresentativeFileName(), otherfile)
116                    #print file
117                    #print compareShapes + compareVars + compareDims
118                    #print otherfile
119                    #print repShapes + repVars + repDims
120                    relFile = csml.csmllibs.csmlfeaturefilemap.relatedFile(otherfile)
121                    relFile.setFeatureType(self.ft)
122                    repFile.addRelatedFile(relFile)
123                else:
124                    #keep this file for the next check
125                    nextotherfiles.append(otherfile)  #can't delete from list and loop at same time so use new list.
126                DI.closeFile()
127                otherfiles=nextotherfiles
128            ffmap.addRepresentativeFile(repFile)
129            if otherfiles !=[]:
130                nextfile=otherfiles[0]
131                nextrepFile=csml.csmllibs.csmlfeaturefilemap.representativeFile(nextfile)
132                nextrepFile.setFeatureType(self.ft)
133                otherfiles=__lookForSame(nextrepFile, otherfiles)
134            return otherfiles
135       
136
137        for dir in subdirs:
138            file=tree.getFirstInSubDir(dir)
139            if file == None:
140                continue
141            #create first representative file
142            repfile=csml.csmllibs.csmlfeaturefilemap.representativeFile(file)
143            repfile.setFeatureType(self.ft)
144            try:
145                    otherfiles=tree.getAllCSMLFilesExceptFirst(dir)
146                    errtest=otherfiles[0]
147            except:
148                    print 'no other csml files in this directory'
149                    ffmap.addRepresentativeFile(repfile)
150                    continue
151            otherfiles=__lookForSame(repfile, otherfiles)
152            #for repf in ffmap.getRepresentativeFiles():
153                #print '******************'
154                #print 'representative:'
155                #print repf.getRepresentativeFileName()
156                #print 'related:'
157                #for relf in repf.getRelatedFiles():
158                    #print relf.getRelatedFileName()
159        return ffmap
160               
161    def onetoone(self):
162        """ onetoone means each feature is self contained within any individual file
163        """
164        tree = csml.csmllibs.csmldirectory.DirectoryTree()
165        tree.setTopDirectory(self.topdir)
166        tree.readDirectory()
167        subdirs = tree.getSubDirectories()
168        ffmap = csml.csmllibs.csmlfeaturefilemap.FeatureFileMap()
169        for dir in subdirs:     
170                files=tree.getCSMLSupportedFilesInSubDir(dir)
171                for f in files:
172                        repfile=csml.csmllibs.csmlfeaturefilemap.representativeFile(f)
173                        repfile.setFeatureType(self.ft)
174                        ffmap.addRepresentativeFile(repfile)
175        return ffmap   
176       
177    def oneonly(self):
178        """ oneonly means one file represents feature spanning multiple directories
179        assumes no file in toplevel directory, and then lots of subdirectories at next level containing files
180        """
181        tree = csml.csmllibs.csmldirectory.DirectoryTree()
182        tree.setTopDirectory(self.topdir)
183        tree.readDirectory()
184        subdirs = tree.getSubDirectories()
185        allfiles= tree.getAllCSMLSupportedFiles()
186        ffmap = csml.csmllibs.csmlfeaturefilemap.FeatureFileMap()
187       
188        #create representative file from first file
189        repfile=csml.csmllibs.csmlfeaturefilemap.representativeFile(allfiles[0])
190        repfile.setFeatureType(self.ft)
191        #create related files from all other files
192        for f in allfiles[1:]:
193            relfile = csml.csmllibs.csmlfeaturefilemap.relatedFile(f)
194            relfile.setFeatureType(self.ft)
195            repfile.addRelatedFile(relfile)
196        ffmap.addRepresentativeFile(repfile)
197        return ffmap
198
199
200           
201                       
202    def mixedft(self):
203        ''' allows the use of a dictonary object to store feature file map info not well tested '''
204        tree = csml.csmllibs.csmldirectory.DirectoryTree()
205        tree.setTopDirectory(self.topdir)
206        tree.readDirectory()
207        subdirs = tree.getSubDirectories()
208        ffmap = csml.csmllibs.csmlfeaturefilemap.FeatureFileMap()
209        for dir in subdirs:     
210                files=tree.getCSMLSupportedFilesInSubDir(dir)
211                for f in files:
212                        repfile=csml.csmllibs.csmlfeaturefilemap.representativeFile(f)
213                        #look up feature type in dictionary
214                        ft=self.ftdictionary[f]
215                        repfile.setFeatureType(ft)
216                        ffmap.addRepresentativeFile(repfile)   
217        return ffmap
Note: See TracBrowser for help on using the repository browser.