# HG changeset patch
# User rdvelazquez
# Date 1553134660 14400
# Node ID 146ffed44f3f61b2c4c041ee5f15d393e469f0d5
planemo upload
diff -r 000000000000 -r 146ffed44f3f addFileNameToFastaIDs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/addFileNameToFastaIDs Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import sys
+
+# first argument is script name
+# second should be fasta path
+# third argument should be the text to prepend to the seq ids
+# fourth argument should be the path to save the fasta at
+originalFastaPath = sys.argv[1]
+textToPrepend = sys.argv[2]
+newFastaPath = sys.argv[3]
+
+newFastaString = ""
+
+with open(originalFastaPath) as fp:
+ line = fp.readline()
+ cnt = 1
+ while line:
+ if line.startswith(">"):
+ id = ">" + textToPrepend + "_" + line[1:]
+ else:
+ sequence = line
+ newFastaString += id + sequence
+ line = fp.readline()
+ cnt += 1
+
+f = open(newFastaPath, "w")
+f.write(newFastaString)
diff -r 000000000000 -r 146ffed44f3f addFileNameToFastaIDs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/addFileNameToFastaIDs.xml Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,18 @@
+
+
+ add the fasta filename to all sequence ids in the
+ file
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 146ffed44f3f filterFasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filterFasta Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import sys
+
+# first argument is script name
+# second should be fasta path
+# third should be minimum number of non-gap characters required to be in seq
+# fourth argument should be the path to save the fasta at
+originalFastaPath = sys.argv[1]
+minNumNonGapChars = int(sys.argv[2])
+newFastaPath = sys.argv[3]
+
+filteredFastaString = ""
+
+with open(originalFastaPath) as fp:
+ line = fp.readline()
+ cnt = 1
+ while line:
+ if line.startswith(">"):
+ id = line
+ else:
+ sequence = line
+ numNonGapChars = len( line.replace("-","") )
+ if numNonGapChars >= minNumNonGapChars:
+ filteredFastaString += id + sequence
+ line = fp.readline()
+ cnt += 1
+
+f = open(newFastaPath, "w")
+f.write(filteredFastaString)
diff -r 000000000000 -r 146ffed44f3f filterFasta.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filterFasta.xml Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,19 @@
+
+
+ Remove sequences from a fasta file if less than n
+ non-gap characters
+
+
+
+
+
+
+
+
+
+
+
+
+