source: TI01-discovery/trunk/ingestAutomation/OAIBatch/SpaceTimeIngestFromMOLES.py @ 1794

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI01-discovery/trunk/ingestAutomation/OAIBatch/SpaceTimeIngestFromMOLES.py@1794
Revision 1794, 2.5 KB checked in by selatham, 13 years ago (diff)

doing ingest of spatio temporal information into postgres

Line 
1#!/usr/bin/env python
2import cElementTree
3import molesReadWrite as MRW
4import sys
5import db_funcs
6import os
7
8#connect to db (in separate db functions module)
9connection = db_funcs.db_connect()
10
11def id_exists(Mid):
12        sql = "select id from spatio where id = '"+Mid+"';"
13        cursor = connection.cursor()
14        cursor.execute(sql)
15        if len(cursor.fetchall()) <1:
16            return False
17        else:
18            return True
19
20
21def do_insert(Mid,west,south,east,north):
22        sql = "INSERT INTO spatio (id, coordinates) VALUES ( "+Mid+ ", sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))' );"
23        cursor = connection.cursor()
24        cursor.execute(sql)
25
26def do_update(Mid,west,south,east,north):
27        sql = "UPDATE spatio SET coordinates = sbox'(("+west+"d , "+south+"d), ("+east+"d , ".north+"d))' WHERE id="+Mid+";"
28        cursor = connection.cursor()
29        cursor.execute(sql)
30
31def main(args=None):
32    indir= sys.argv[1]
33    if indir == "":
34        sys.exit("Usage: argument 1 = full path of directory where MOLES records reside")
35    else:
36        print "INFO: moles records are in %s" %indir
37    numfilesproc = 0
38
39
40    #does record already exist in db?
41    #select_string="select a.fld_data_provider_local_id from ndgb.tbl_data_entities as a where a.fld_data_provider_id='%s'" %repositoryID
42    #cursor     = connection.cursor()
43    #cursor.execute(select_string)
44    #all_ids = cursor.fetchall()
45    #reccount= len(all_ids)
46
47    filenames = os.listdir(indir)
48    for filename in filenames:
49            if filename.find('.xml') != -1:
50                full_filename = indir + "/" + filename
51                dgMeta=MRW.dgMetadata()
52                dgMeta.fromXML(cElementTree.ElementTree(file=full_filename).getroot())
53                bbox=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox
54                dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange
55                coverage= [dates.DateRangeEnd, dates.DateRangeStart, bbox.LimitNorth, bbox.LimitSouth,bbox.LimitEast, bbox.LimitWest]
56                Mid = dgMeta.dgMetadataRecord.dgMetadataID.repositoryIdentifier+"__"+dgMeta.dgMetadataRecord.dgMetadataID.localIdentifier
57                print coverage, Mid
58
59                if id_exists( Mid ):
60                        print "doc %s exists, updating\n" %Mid
61                        do_update( Mid, bbox.LimitWest, bbox.LimitSouth, bbox.LimitEast, bbox.LimitNorth )
62                else:
63                        print "doc %s does not exist, inserting new record\n" %Mid
64                        do_insert( Mid, bbox.LimitWest, bbox.LimitSouth, bbox.LimitEast, bbox.LimitNorth )
65
66                numfilesproc += 1
67
68
69    print 'SpaceTimeIngestFromMOLES.py ran to end. files processed= %s' %(numfilesproc)
70
71if __name__=='__main__':
72    main()
Note: See TracBrowser for help on using the repository browser.