annotate filterFasta @ 0:146ffed44f3f draft default tip

planemo upload
author rdvelazquez
date Wed, 20 Mar 2019 22:17:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
1 #!/usr/bin/env python3
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
2
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
3 import sys
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
4
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
5 # first argument is script name
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
6 # second should be fasta path
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
7 # third should be minimum number of non-gap characters required to be in seq
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
8 # fourth argument should be the path to save the fasta at
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
9 originalFastaPath = sys.argv[1]
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
10 minNumNonGapChars = int(sys.argv[2])
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
11 newFastaPath = sys.argv[3]
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
12
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
13 filteredFastaString = ""
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
14
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
15 with open(originalFastaPath) as fp:
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
16 line = fp.readline()
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
17 cnt = 1
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
18 while line:
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
19 if line.startswith(">"):
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
20 id = line
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
21 else:
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
22 sequence = line
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
23 numNonGapChars = len( line.replace("-","") )
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
24 if numNonGapChars >= minNumNonGapChars:
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
25 filteredFastaString += id + sequence
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
26 line = fp.readline()
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
27 cnt += 1
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
28
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
29 f = open(newFastaPath, "w")
146ffed44f3f planemo upload
rdvelazquez
parents:
diff changeset
30 f.write(filteredFastaString)