0
|
1 #!/usr/bin/env python3
|
|
2
|
|
3 import sys
|
|
4
|
|
5 # first argument is script name
|
|
6 # second should be fasta path
|
|
7 # third should be minimum number of non-gap characters required to be in seq
|
|
8 # fourth argument should be the path to save the fasta at
|
|
9 originalFastaPath = sys.argv[1]
|
|
10 minNumNonGapChars = int(sys.argv[2])
|
|
11 newFastaPath = sys.argv[3]
|
|
12
|
|
13 filteredFastaString = ""
|
|
14
|
|
15 with open(originalFastaPath) as fp:
|
|
16 line = fp.readline()
|
|
17 cnt = 1
|
|
18 while line:
|
|
19 if line.startswith(">"):
|
|
20 id = line
|
|
21 else:
|
|
22 sequence = line
|
|
23 numNonGapChars = len( line.replace("-","") )
|
|
24 if numNonGapChars >= minNumNonGapChars:
|
|
25 filteredFastaString += id + sequence
|
|
26 line = fp.readline()
|
|
27 cnt += 1
|
|
28
|
|
29 f = open(newFastaPath, "w")
|
|
30 f.write(filteredFastaString)
|