view filterFasta @ 0:146ffed44f3f draft default tip

planemo upload
author rdvelazquez
date Wed, 20 Mar 2019 22:17:40 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python3

import sys

# first argument is script name
# second should be fasta path
# third should be minimum number of non-gap characters required to be in seq
# fourth argument should be the path to save the fasta at
originalFastaPath = sys.argv[1]
minNumNonGapChars = int(sys.argv[2])
newFastaPath = sys.argv[3]

filteredFastaString = ""

with open(originalFastaPath) as fp:  
    line = fp.readline()
    cnt = 1
    while line:
        if line.startswith(">"):
            id = line
        else:
            sequence = line
            numNonGapChars = len( line.replace("-","") )
            if numNonGapChars >= minNumNonGapChars:
              filteredFastaString += id + sequence 
        line = fp.readline()
        cnt += 1

f = open(newFastaPath, "w")
f.write(filteredFastaString)