Changeset 2329 for TI01-discovery


Ignore:
Timestamp:
26/03/07 21:30:14 (13 years ago)
Author:
selatham
Message:

Using keywordAdder correctly. sorted spatiotemporal null problem.

Location:
TI01-discovery/trunk/ingestAutomation/OAIBatch
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/SpaceTimeIngestFromMOLES.py

    r2263 r2329  
    3232def do_insert(Mid,west,south,east,north,startdate,enddate): 
    3333        sql = "INSERT INTO spatiotemp (id, coordinates, startdate, enddate) VALUES ( '"+Mid+ "', sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))', '"+startdate+"', '"+enddate+"');" 
    34         sql.replace("nulld","null") 
     34        if str(startdate)=='nostartdate': 
     35            sql=sql.replace("startdate,"," ") 
     36            sql=sql.replace("'"+startdate+"',"," ") 
     37        if str(enddate)=='noenddate': 
     38            sql=sql.replace("enddate,"," ") 
     39            sql=sql.replace("'"+enddate+"',"," ") 
     40        if str(west)=='null' or str(south)=='null' or str(east)=='null' or str(north)=='null': 
     41            sql=sql.replace("coordinates,"," ") 
     42            sql=sql.replace("sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))',"," ") 
    3543        print sql 
    3644        cursor = connection.cursor() 
     
    4351def do_update(Mid,west,south,east,north,startdate,enddate): 
    4452        sql = "UPDATE spatiotemp SET coordinates = sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))', startdate='"+startdate+"', enddate= '"+enddate+"', update_time= now() WHERE id='"+Mid+"';" 
    45         sql.replace("nulld","null") 
     53        if str(startdate)=='nostartdate': 
     54            sql=sql.replace("startdate='"+startdate+"',"," ") 
     55        if str(enddate)=='noenddate': 
     56            sql=sql.replace("enddate= '"+enddate+"',"," ") 
     57        if str(west)=='null' or str(south)=='null' or str(east)=='null' or str(north)=='null': 
     58            sql=sql.replace("coordinates = sbox'(("+west+"d , "+south+"d), ("+east+"d , "+north+"d))',", " ") 
    4659        print sql 
    4760        cursor = connection.cursor() 
     
    7588            north = 'null' 
    7689            south = 'null' 
    77             startdate="null" 
    78             enddate="null" 
     90            startdate='nostartdate' 
     91            enddate='noenddate' 
    7992            dgMeta=MRW.dgMetadata() 
    8093            try: 
     
    91104            try: 
    92105                dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange 
    93                 print dates 
     106                print "startdate = %s" %dates.DateRangeStart 
     107                print "enddate = %s" %dates.DateRangeEnd 
    94108            except: 
    95109                print "INFO: XML moles document %s does not contain temporal info." %full_filename 
     
    106120                startdate=dates.DateRangeStart 
    107121                enddate= dates.DateRangeEnd 
     122                if startdate==None: 
     123                    startdate="nostartdate" 
     124                if enddate==None: 
     125                    enddate="noenddate" 
    108126 
    109127            if no_bbox: 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/bgs_config.properties

    r2300 r2329  
    88# 
    99#Define groups - portal groups for limiting searches by 'group of datacentres'. 
    10 groups NERC_DDC 
     10groups NERC_DDC http://vocab.ndg.nerc.ac.uk/term/N010/0 NDGO0001 
    1111# 
    1212#Define which format is harvested from the data centre (one only) 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r2324 r2329  
    22""" Script oai_ingest.py takes parameter <datacentre>. 
    33The /usr/local/WSClients/OAIBatch directory contains:- 
    4  - this python script, 
     4 - this python script, plus some other modules for parts of the process. 
    55 - a DataProvider specific config file, 
    66 - the d2b.jar moles creator class which creates moles discovery records, 
     
    99 ./data 
    1010 - /DATACENTRE/ 
    11                 - discovery/:         Re-named documents ready to ingest in the discovery service. 
     11                - discovery/:         Re-named documents. 
     12        - discovery_corrected Documents with schema namespaces corrected, ready to ingest in the discovery service. 
    1213                - oai/difYYYYMMDD/    Documents as harvested from OAI 
    1314 Where  /DATACENTRE  varies for the different data providers 
     
    2930import SpaceTimeIngestFromMOLES 
    3031import keywordAdder 
    31 import SchemaNameSpace from SchemaNameSpace 
     32from SchemaNameSpace import SchemaNameSpace 
    3233from DIF import DIF 
    3334from MDIP import MDIP 
     
    197198                print 'File %s is not xml format. Not processed'  %(full_filename) 
    198199 
    199 # ingest the datacentres records into eXist db (backups of exist happen nightly). 
    200 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P xxxxxx -p "+outdir 
    201 print "Executing : actual command to ingest into exist db" 
    202 status = os.system(commandline) 
    203 if status !=0: 
    204     sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
    205  
    206200#replace any namespace declarations with a standard one which we know works in NDG 
    207201indir="/usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery" 
     
    213207                    corrected_filename = outdir + "/" + filename 
    214208                    try: 
    215                        SchemaNameSpace.main(in_filename, corrected_filename,datacentre_format) 
     209                       SchemaNameSpace(in_filename, corrected_filename,datacentre_format) 
    216210                    except: 
    217211                       print "SchemaNameSpace failed on file %s"%in_filename 
    218212 
    219 #are there any old records hanging around.If so, remove. 
     213# ingest the datacentres records into eXist db (backups of exist happen nightly). 
     214commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P xxxxxx -p "+outdir 
     215print "Executing : actual command to ingest into exist db" 
     216status = os.system(commandline) 
     217if status !=0: 
     218    sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
     219 
     220#are there any old moles records hanging around.If so, remove. 
    220221try: 
    221222    os.stat("./DIF2MOLES") 
     
    268269        sys.exit("Failed at moving MOLES to FINAL directory") 
    269270else: 
    270     keywordAdder.main('./DIF2MOLES', './FINALMOLES', ['MDIP', 'http://vocab.ndg.nerc.ac.uk/term/N010/0', 'NDGO0001']) 
     271    keywordAdder.main('./DIF2MOLES', './FINALMOLES', datacentre_groups) 
    271272 
    272273# ingest the created discovery minimum molesrecords into eXist db. 
Note: See TracChangeset for help on using the changeset viewer.