Changeset 1869 for TI01-discovery/trunk


Ignore:
Timestamp:
14/12/06 19:03:02 (13 years ago)
Author:
selatham
Message:

further work on ingest

Location:
TI01-discovery/trunk/ingestAutomation/OAIBatch
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/keywordAdder.py

    r1837 r1869  
    66import sys 
    77import os 
     8 
     9def PrettyPrint(elem,indent='',html=0,space='   '): 
     10        '''Lightweight pretty printing of elementTree elements''' 
     11        def estrip(elem): 
     12                ''' Just want to get rid of unwanted whitespace ''' 
     13                if elem is None: 
     14                        return '' 
     15                else: 
     16                        return elem.strip() 
     17        strAttrib='' 
     18        for att in elem.attrib: 
     19                strAttrib+=' %s="%s"'%(att,elem.attrib[att]) 
     20        result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text)) 
     21        children=len(elem) 
     22        if children: 
     23                for item in elem: 
     24                        result+='\n'+PrettyPrint(item,indent=indent+space) 
     25                result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag) 
     26        else: 
     27                result+='</%s>'%(elem.tag) 
     28        return result 
     29 
     30def addNewElement(parentobject, childname, child): 
     31    if hasattr(parentobject, childname): 
     32        currentattribute=getattr(parentobject,childname) 
     33        if getattr(parentobject,childname) is list: 
     34            currentattribute.append(child) 
     35 
     36        else: 
     37            newlist=[currentattribute] 
     38            newlist.append(child) 
     39            setattr(parentobject,childname, newlist) 
     40    else: 
     41        setattr(parentobject,childname, child) 
    842 
    943def main(indir, outdir, keywords): 
     
    1852    print "INFO: moles records output to %s" %outdir 
    1953    print "INFO: keywords to add are %s" %keywords 
     54 
     55    # initialise variables 
     56    numfilesproc = 0 
     57 
     58    # moles skeleton for creating new objects 
     59    M=MRW.MolesDoc() 
    2060 
    2161    #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc. 
     
    3373                    continue 
    3474 
    35                 numfilesproc = 0 
    36                 x = 0 
    37                 y = 0 
    38                 dgValidTerm= keywords[0][0] 
    39                 parentListID=keywords[0][1] 
    40                 TermID=keywords[0][2] 
    41                 TID = dgMeta.dgMetadataRecord.dgStructuredKeyword.dgValidTermID(parentListID, TermID) 
    42                 dgMeta.dgMetadataRecord.dgStructuredKeyword(dgValidTerm, TID) 
     75                strValidTerm= str(keywords[0]) 
     76                strParentListID=str(keywords[1]) 
     77                strTermID=str(keywords[2]) 
     78                print strValidTerm, strParentListID, strTermID 
     79                dgVTID=M.dgValidTermID(ParentListID=strParentListID, TermID=strTermID) 
     80                dgSK=M.dgStructuredKeyword(dgValidTerm=strValidTerm, dgValidTermID=dgVTID) 
     81                addNewElement(dgMeta.dgMetadataRecord, 'dgStructuredKeyword', dgSK) 
     82 
     83                # now write out updated document 
    4384                molestree=dgMeta.toXML() 
    44                 #moles=csml.parser_extra.PrettyPrint(molestree) 
     85                moles=PrettyPrint(molestree) 
    4586                f=open(outdir+"/"+filename,'w') 
    4687                f.write(moles) 
     
    5697        indir=sys.argv[1] 
    5798        outdir=sys.argv[2] 
    58         keywords=sys.argv[3] 
     99        keywords=sys.argv[3:] 
    59100        main(indir,outdir, keywords) 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r1797 r1869  
    140140    sys.exit("Failed at copying config file stage") 
    141141 
    142 #Change os directory to that with the oaiClean.py in it. (need this?) 
     142#Change os directory to that with the other code in it. (need this?) 
    143143os.chdir('/usr/local/WSClients/OAIBatch') 
    144144 
     
    173173 
    174174# Then run the minimum moles creator  which will run over all records in the supplied collection 
    175 # Passes back records to supplied outdir with original name 
    176 #runthething(indir,outdir, 
     175# creates a directory ./DIF2MOLES to pass back records with original filename 
     176commandline = "java -jar d2b.jar repositoryID " +datacentre_namespace+" repositoryLocalID "+datacentre+" format "+datacentre_format+" repository xmldb:exist://glue.badc.rl.ac.uk:8080/exist/xmlrpc userpw secret targetCollection /db/discovery/original/"+format+"/"+datacentre_namespace 
     177print commandline 
     178status= os.system(commandline) 
     179if status!=0: 
     180    print "ERROR: couldn't create the minimum moles records" 
     181    sys.exit 
    177182 
    178183#Extract the spatiotemporal info from created moles and put in Postgres db 
    179 outdir = "./testmoles" 
     184outdir = "./DIF2MOLES" 
    180185try: 
    181186    SpaceTimeIngestFromMOLES.main(outdir) 
    182187except: 
    183     print "SpaceTimeIngestFromMOLES failed. Carrying on to do backups" 
     188    print "ERROR: SpaceTimeIngestFromMOLES failed. Carrying on to do backups" 
    184189 
    185190#Make copies of discovery and oai/originals areas to backup area for tape backups 
Note: See TracChangeset for help on using the changeset viewer.