Changeset 2088 for TI01-discovery/trunk


Ignore:
Timestamp:
31/01/07 19:35:50 (13 years ago)
Author:
selatham
Message:

implementing keywordAdder.

Location:
TI01-discovery/trunk/ingestAutomation/OAIBatch
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/SpaceTimeIngestFromMOLES.py

    r2002 r2088  
    109109                # for bbox in bbox_list: 
    110110                #parse the coordinates somewhat - only use the first bounding box. 
    111                 print bbox_list 
     111                #print bbox_list 
    112112                bbox=bbox_list[0] 
    113                 print bbox 
     113                #print bbox 
    114114                #west 
    115115                west = bbox.LimitWest.strip() 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/keywordAdder.py

    r2066 r2088  
    2929 
    3030def main(indir, outdir, keywords): 
    31     if len(sys.argv) < 2 or indir == "" or outdir == "" or keywords == "": 
    32         print "USAGE: keywordAdder(indir, outdir, keywords) " 
     31    if len(sys.argv) < 2 or indir == "" or outdir == "" or keywords == []: 
     32        print "USAGE: keywordAdder indir, outdir, keywords " 
    3333        print " where indir= full path of directory where MOLES records reside," 
    3434        print "       outdir= full path of where you want the updated records to go." 
    35         print "       keywords = nested list of [[keyword, namespace, key],...] which need to be added" 
    36         sys.exit 
     35        print "       keywords = list triples:- keyword, namespace, key. Must be multiple of three." 
     36        sys.exit() 
     37 
     38    if (len(keywords))%3 != 0: 
     39        print "Keywords must be in triples. keyword namespace key." 
     40        sys.exit() 
    3741 
    3842    print "INFO: moles records are in %s" %indir 
     
    4246    # initialise variables 
    4347    numfilesproc = 0 
     48    keywordList=[] 
     49 
     50    #split the keywords into list of triples 
     51    count=0 
     52    while count < len(keywords)/3: 
     53        #print "count = %s. keywordList = %s" %(count, keywordList) 
     54        keywordList.append([]) 
     55        keywordList[count].append(keywords[(count*3)]) 
     56        keywordList[count].append(keywords[(count*3)+1]) 
     57        keywordList[count].append(keywords[(count*3)+2]) 
     58        count=count+1 
     59    #print "Final keywordList = %s" %keywordList 
    4460 
    4561    # moles skeleton for creating new objects 
     
    6076                continue 
    6177 
    62             strValidTerm= str(keywords[1][0]) 
    63             strParentListID=str(keywords[1][1]) 
    64             strTermID=str(keywords[1][2]) 
    65             print strValidTerm, strParentListID, strTermID 
    66             dgVTID=M.dgValidTermID(ParentListID=strParentListID, TermID=strTermID) 
    67             dgSK=M.dgStructuredKeyword(dgValidTerm=strValidTerm, dgValidTermID=dgVTID) 
    68             dgMeta.dgMetadataRecord.addChildElem('dgStructuredKeyword', dgSK) 
     78            for keyword in keywordList: 
     79                strValidTerm= str(keyword[0]) 
     80                strParentListID=str(keyword[1]) 
     81                strTermID=str(keyword[2]) 
     82                print strValidTerm, strParentListID, strTermID 
     83                dgVTID=M.dgValidTermID(ParentListID=strParentListID, TermID=strTermID) 
     84                dgSK=M.dgStructuredKeyword(dgValidTerm=strValidTerm, dgValidTermID=dgVTID) 
     85                dgMeta.dgMetadataRecord.addChildElem('dgStructuredKeyword', dgSK) 
    6986 
    7087            # now write out updated document 
    71             print dir(dgMeta.dgMetadataRecord) 
     88            #print dir(dgMeta.dgMetadataRecord) 
    7289            molestree=dgMeta.toXML() 
    7390            moles=PrettyPrint(molestree) 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r2067 r2088  
    2828import string 
    2929import SpaceTimeIngestFromMOLES 
     30import keywordAdder 
    3031#import oaiClean 
    3132 
     
    185186    sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
    186187 
    187 #are there any old records hanging around.If so, copy away 
     188#are there any old records hanging around.If so, remove. 
    188189try: 
    189190    os.stat("./DIF2MOLES") 
     
    196197    if status !=0: 
    197198        sys.exit("Failed at clearing out DIF2MOLES area.") 
    198     #commandline = "rmdir ./DIF2MOLES" 
    199     #print "Executing : " + commandline 
    200     #status = os.system(commandline) 
    201     i#f status !=0: 
    202      #   sys.exit("Failed at removing DIF2MOLES directory.") 
    203199 
    204200# Then run the minimum moles creator for each discovery record 
     
    237233    sys.exit() 
    238234 
     235#Add keywords if necessary 
     236if datacentre_groups == "": 
     237    commandline = "ls -1 ./DIF2MOLES/ | xargs -i mv ./DIF2MOLES/{\} ./FINALMOLES/" 
     238    print "Executing : " + commandline 
     239    status = os.system(commandline) 
     240    if status !=0: 
     241        sys.exit("Failed at moving MOLES to FINAL directory") 
     242else: 
     243    keywordAdder.main('./DIF2MOLES', './FINALMOLES', ['MDIP', 'http://vocab.ndg.nerc.ac.uk/term/N010/0', 'NDGO0001']) 
     244 
    239245# ingest the created discovery minimum molesrecords into eXist db. 
    240 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P xxxxxx -p ./DIF2MOLES" 
     246commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P xxxxxx -p ./FINALMOLES" 
    241247print "Executing : actual command to ingest into exist db" 
    242248status = os.system(commandline) 
     
    245251 
    246252#Extract the spatiotemporal info from created moles and put in Postgres db 
    247 SpaceTimeIngestFromMOLES.main("./DIF2MOLES") 
     253SpaceTimeIngestFromMOLES.main("./FINALMOLES") 
    248254 
    249255#Make copies of discovery and oai/originals and DIF2MOLES areas to backup area for tape backups 
     
    274280    sys.exit("Failed at copying to backup directory %s" %this_backupdir) 
    275281 
    276 this_backupdir = backupdir + datacentre + "_" + date_string + "_DIF2MOLES" 
     282this_backupdir = backupdir + datacentre + "_" + date_string + "_FINALMOLES" 
    277283commandline = "mkdir " + this_backupdir 
    278284print "Executing : " + commandline 
     
    281287    sys.exit("Failed at creating backup directory %s" %this_backupdir) 
    282288 
    283 commandline = "ls -1 ./DIF2MOLES | xargs -i cp ./DIF2MOLES/{\} " + this_backupdir 
     289commandline = "ls -1 ./FINALMOLES | xargs -i cp ./FINALMOLES/{\} " + this_backupdir 
    284290print "Executing : " + commandline 
    285291status = os.system(commandline) 
     
    294300    sys.exit("Failed at clearing out original harvest records area %s" %harvest_home) 
    295301 
    296 commandline = "ls -1 ./DIF2MOLES | xargs -i rm ./DIF2MOLES/{\}" 
    297 print "Executing : " + commandline 
    298 status = os.system(commandline) 
    299 if status !=0: 
    300     sys.exit("Failed at clearing out DIF2MOLES area %s" %harvest_home) 
     302commandline = "ls -1 ./FINALMOLES | xargs -i rm ./FINALMOLES/{\}" 
     303print "Executing : " + commandline 
     304status = os.system(commandline) 
     305if status !=0: 
     306    sys.exit("Failed at clearing out FINALMOLES area %s" %harvest_home) 
    301307 
    302308#remove the DIF2MOLES directory 
Note: See TracChangeset for help on using the changeset viewer.