Thursday, May 27, 2010

Replace words in File (Python)

#batch renamer for contig and genBank Submission
#written by ananta acharya, PBGG, UGA
#May 28, 2010
#This program reads a .csv or .txt file with names to be replaced in each line separated with comma, for example
#oldname, newname
#oldname2, newname2
#Then it replaces the oldnames in provided ace file with newnames
#to start, u should give path of the acefile and replace file
#It reports the number of lines that were replaced
import os.path
import os
def OKtoRead(inputfile):
    """Checks whether the input file is readable"""
    
    if not os.path.exists(inputfile):
        print "File does not Exist"
        return False
        
    elif not os.path.isfile(inputfile):
        print "Is not a file"
        return False
    return True
def OKtoWrite(outputfile):
    """Checks whether the input file is writable"""
 
    if not os.path.exists(os.path.dirname(outputfile)):
        print "Path Does Not Exist"
        return False
        
    elif os.path.isdir(outputfile):
        print "Is not a file but folder"
        return False
    elif os.path.exists(outputfile):
        print "File already exists, Can not Overwite or Merge"
        return False
    else:
        return True
    
    
def makeDict(txtFile):
    
    if OKtoRead(txtFile):
        txtFile=open(txtFile)
        renameDict={}
        for line in txtFile:
            aLine=line.strip() 
           
            sets=aLine.split(",")
            
            if len(aLine)!=0:
                
                renameDict[sets[0]]=sets[1]
                
             
    txtFile.close()
    return renameDict
def replaceNames(renameDict, aceFile):
    aceFileNew=os.path.split(aceFile)[0]+"/renamed"+os.path.split(aceFile)[1]
    
    #if os.path.exists(aceFileNew):
     #   os.remove(aceFileNew)
    if OKtoRead(aceFile):
        aceFile=open(aceFile,"r")
        newFile=open(aceFileNew, "a")
        renameCount=0
        lineCount=0
        for line in aceFile:
            lineCount+=1
            for key, val in renameDict.iteritems():
                if key in line:
                    eachCount=line.count(key)
                    renameCount+=1
                    newline=line.replace(key, val)
                    print "renaming in line %s, %s items found to replace" %(lineCount, eachCount)
                    break
                else:
                    newline=line        
            newFile.write(newline)
            
    aceFile.close()
    newFile.close()
    print "number of replaced line=%s" %renameCount
    print "File written in %s" %aceFileNew
        
def main():
    aceFile=raw_input("File location for .ace: (give full location, or relative starting ../: ")    
    while not OKtoRead(aceFile):
        aceFile=raw_input("File location for .ace: (give full location, or relative starting ../: ")
    renameFile=raw_input("File location for replacement , formatted as .txt or .csv , names separated with comma in each line: (give full location, or relative starting ../: ")    
    while not OKtoRead(renameFile):
        renameFile=raw_input("File location for replacement , formatted as .txt or .csv , names separated with comma in each line: (give full location, or relative starting ../: ")
    
    dictt=makeDict(renameFile)
    replaceNames(dictt,aceFile)
main()