changeset 0:146ffed44f3f draft default tip

planemo upload
author rdvelazquez
date Wed, 20 Mar 2019 22:17:40 -0400
parents
children
files addFileNameToFastaIDs addFileNameToFastaIDs.xml filterFasta filterFasta.xml
diffstat 4 files changed, 95 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/addFileNameToFastaIDs	Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+
+import sys
+
+# first argument is script name
+# second should be fasta path
+# third argument should be the text to prepend to the seq ids
+# fourth argument should be the path to save the fasta at
+originalFastaPath = sys.argv[1]
+textToPrepend = sys.argv[2]
+newFastaPath = sys.argv[3]
+
+newFastaString = ""
+
+with open(originalFastaPath) as fp:
+    line = fp.readline()
+    cnt = 1
+    while line:
+        if line.startswith(">"):
+            id = ">" + textToPrepend + "_" + line[1:]
+        else:
+            sequence = line
+            newFastaString += id + sequence
+        line = fp.readline()
+        cnt += 1
+
+f = open(newFastaPath, "w")
+f.write(newFastaString)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/addFileNameToFastaIDs.xml	Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool id="addFileNameToFastaIDs" version="1.0.0" name="addFileNameToFastaIDs">
+  <description>add the fasta filename to all sequence ids in the
+  file</description>
+  <stdio>
+    <exit_code range="1:"/>
+  </stdio>
+  <version_command/>
+  <command><![CDATA[
+                $__tool_directory__/addFileNameToFastaIDs '$input_fasta' '$input_fasta.display_name' '$modified_fasta'
+    ]]></command>
+  <inputs>
+    <param name="input_fasta" label="Input FASTA" type="data" format="fasta"/>
+  </inputs>
+  <outputs>
+    <data format="fasta" name="modified_fasta"/>
+  </outputs>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterFasta	Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+
+import sys
+
+# first argument is script name
+# second should be fasta path
+# third should be minimum number of non-gap characters required to be in seq
+# fourth argument should be the path to save the fasta at
+originalFastaPath = sys.argv[1]
+minNumNonGapChars = int(sys.argv[2])
+newFastaPath = sys.argv[3]
+
+filteredFastaString = ""
+
+with open(originalFastaPath) as fp:  
+    line = fp.readline()
+    cnt = 1
+    while line:
+        if line.startswith(">"):
+            id = line
+        else:
+            sequence = line
+            numNonGapChars = len( line.replace("-","") )
+            if numNonGapChars >= minNumNonGapChars:
+              filteredFastaString += id + sequence 
+        line = fp.readline()
+        cnt += 1
+
+f = open(newFastaPath, "w")
+f.write(filteredFastaString)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterFasta.xml	Wed Mar 20 22:17:40 2019 -0400
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool id="filterFastaOnNumNonGapChars" version="1.0.0" name="filterFastaOnNumNonGapChars">
+  <description>Remove sequences from a fasta file if less than n
+  non-gap characters</description>
+  <stdio>
+    <exit_code range="1:"/>
+  </stdio>
+  <version_command/>
+  <command><![CDATA[
+                $__tool_directory__/filterFasta '$input_fasta' '$minimum_number_of_characters' '$filtered_fasta'
+    ]]></command>
+  <inputs>
+    <param name="input_fasta" label="Input FASTA" type="data" format="fasta"/>
+    <param label="min_num_chars" name="minimum_number_of_characters" type="integer" value="30"/>
+  </inputs>
+  <outputs>
+    <data format="fasta" name="filtered_fasta"/>
+  </outputs>
+</tool>