1 | #!/usr/bin/env python |
---|
2 | import cElementTree |
---|
3 | import elementtree.ElementTree as etree |
---|
4 | import molesReadWrite as MRW |
---|
5 | import sys |
---|
6 | import db_funcs |
---|
7 | import os |
---|
8 | |
---|
9 | #connect to db (in separate db functions module) |
---|
10 | connection = db_funcs.db_connect() |
---|
11 | |
---|
12 | def id_exists(Mid): |
---|
13 | sql = "select id from spatiotemp where id = '"+Mid+"';" |
---|
14 | cursor = connection.cursor() |
---|
15 | cursor.execute(sql) |
---|
16 | if len(cursor.fetchall()) <1: |
---|
17 | return False |
---|
18 | else: |
---|
19 | return True |
---|
20 | |
---|
21 | |
---|
22 | def do_insert(Mid,west,south,east,north,startdate,enddate): |
---|
23 | sql = "INSERT INTO spatiotemp (id, coordinates, startdate, enddate) VALUES ( '"+Mid+ "', sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))', '"+startdate+"', '"+enddate+"');" |
---|
24 | print sql |
---|
25 | cursor = connection.cursor() |
---|
26 | cursor.execute(sql) |
---|
27 | connection.commit() |
---|
28 | |
---|
29 | def do_update(Mid,west,south,east,north,startdate,enddate): |
---|
30 | sql = "UPDATE spatiotemp SET coordinates = sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))', startdate='"+startdate+"', enddate= '"+enddate+"' WHERE id='"+Mid+"';" |
---|
31 | print sql |
---|
32 | cursor = connection.cursor() |
---|
33 | cursor.execute(sql) |
---|
34 | connection.commit() |
---|
35 | |
---|
36 | def main(indir): |
---|
37 | if indir == "": |
---|
38 | sys.exit("Usage: argument 1 = full path of directory where MOLES records reside") |
---|
39 | else: |
---|
40 | print "INFO: moles records are in %s" %indir |
---|
41 | |
---|
42 | #this is a fix to the ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc. |
---|
43 | etree._namespace_map.update({'http://ndg.nerc.ac.uk/moles': 'moles', 'http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'}) |
---|
44 | numfilesproc = 0 |
---|
45 | |
---|
46 | filenames = os.listdir(indir) |
---|
47 | for filename in filenames: |
---|
48 | if filename.find('.xml') != -1: |
---|
49 | full_filename = indir + "/" + filename |
---|
50 | dgMeta=MRW.dgMetadata() |
---|
51 | dgMeta.fromXML(cElementTree.ElementTree(file=full_filename).getroot()) |
---|
52 | bbox=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox |
---|
53 | dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange |
---|
54 | coverage= [dates.DateRangeEnd, dates.DateRangeStart, bbox.LimitNorth, bbox.LimitSouth,bbox.LimitEast, bbox.LimitWest] |
---|
55 | #Mid = dgMeta.dgMetadataRecord.dgMetadataID.repositoryIdentifier+"__"+dgMeta.dgMetadataRecord.dgMetadataID.localIdentifier |
---|
56 | Mid = filename |
---|
57 | print coverage, Mid |
---|
58 | #parse the coordinates somewhat |
---|
59 | #west |
---|
60 | west = bbox.LimitWest.strip() |
---|
61 | if west.endswith('E'): |
---|
62 | west=bbox.LimitWest.split('E')[0] |
---|
63 | elif west.endswith('W'): |
---|
64 | if west.startswith('-'): |
---|
65 | west = bbox.LimitWest.split('W')[0] |
---|
66 | else: |
---|
67 | west = "-" +bbox.LimitWest.split('W')[0] |
---|
68 | try: |
---|
69 | float(west) |
---|
70 | except: |
---|
71 | print "Error: Will not process File %s. Contains incorrect West bounding box limit." %full_filename |
---|
72 | continue |
---|
73 | print "West = %s" %west |
---|
74 | #east |
---|
75 | east = bbox.LimitEast.strip() |
---|
76 | if east.endswith('E'): |
---|
77 | east=bbox.LimitEast.split('E')[0] |
---|
78 | elif east.endswith('W'): |
---|
79 | if east.startswith('-'): |
---|
80 | east = bbox.LimitEast.split('W')[0] |
---|
81 | else: |
---|
82 | east = "-" +bbox.LimitEast.split('W')[0] |
---|
83 | try: |
---|
84 | float(east) |
---|
85 | except: |
---|
86 | print "Error: Will not process File %s. Contains incorrect East bounding box limit." %full_filename |
---|
87 | continue |
---|
88 | print "East = %s" %east |
---|
89 | #north |
---|
90 | north = bbox.LimitNorth.strip() |
---|
91 | if north.endswith('N'): |
---|
92 | north=bbox.LimitNorth.split('N')[0] |
---|
93 | elif north.endswith('S'): |
---|
94 | if north.startswith('-'): |
---|
95 | north = bbox.LimitNorth.split('S')[0] |
---|
96 | else: |
---|
97 | north = "-" +bbox.LimitNorth.split('S')[0] |
---|
98 | try: |
---|
99 | float(north) |
---|
100 | except: |
---|
101 | print "Error: Will not process File %s. Contains incorrect North bounding box limit." %full_filename |
---|
102 | continue |
---|
103 | print "North = %s" %north |
---|
104 | #south |
---|
105 | south = bbox.LimitSouth.strip() |
---|
106 | if south.endswith('N'): |
---|
107 | south=bbox.LimitSouth.split('N')[0] |
---|
108 | elif south.endswith('S'): |
---|
109 | if south.startswith('-'): |
---|
110 | south = bbox.LimitSouth.split('S')[0] |
---|
111 | else: |
---|
112 | south = "-" +bbox.LimitSouth.split('S')[0] |
---|
113 | try: |
---|
114 | float(south) |
---|
115 | except: |
---|
116 | print "Error: Will not process File %s. Contains incorrect North bounding box limit." %full_filename |
---|
117 | continue |
---|
118 | print "North = %s" %south |
---|
119 | |
---|
120 | if id_exists( Mid ): |
---|
121 | print "doc %s exists, updating\n" %Mid |
---|
122 | do_update( Mid, west, south, east, north, dates.DateRangeStart, dates.DateRangeEnd ) |
---|
123 | else: |
---|
124 | print "doc %s does not exist, inserting new record\n" %Mid |
---|
125 | do_insert( Mid, west, south, east, north, dates.DateRangeStart, dates.DateRangeEnd ) |
---|
126 | |
---|
127 | numfilesproc += 1 |
---|
128 | |
---|
129 | |
---|
130 | print 'SpaceTimeIngestFromMOLES.py ran to end. files processed= %s' %(numfilesproc) |
---|
131 | |
---|
132 | if __name__=='__main__': |
---|
133 | indir=sys.argv[1] |
---|
134 | main(indir) |
---|