diff filterFasta @ 0:146ffed44f3f draft default tip

planemo upload
author rdvelazquez
date Wed, 20 Mar 2019 22:17:40 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterFasta	Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import sys
+
+# first argument is script name
+# second should be fasta path
+# third should be minimum number of non-gap characters required to be in seq
+# fourth argument should be the path to save the fasta at
+originalFastaPath = sys.argv[1]
+minNumNonGapChars = int(sys.argv[2])
+newFastaPath = sys.argv[3]
+
+filteredFastaString = ""
+
+with open(originalFastaPath) as fp:  
+    line = fp.readline()
+    cnt = 1
+    while line:
+        if line.startswith(">"):
+            id = line
+        else:
+            sequence = line
+            numNonGapChars = len( line.replace("-","") )
+            if numNonGapChars >= minNumNonGapChars:
+              filteredFastaString += id + sequence 
+        line = fp.readline()
+        cnt += 1
+
+f = open(newFastaPath, "w")
+f.write(filteredFastaString)