Mercurial > repos > rdvelazquez > rdv_fasta_tools
changeset 0:146ffed44f3f draft default tip
planemo upload
author | rdvelazquez |
---|---|
date | Wed, 20 Mar 2019 22:17:40 -0400 |
parents | |
children | |
files | addFileNameToFastaIDs addFileNameToFastaIDs.xml filterFasta filterFasta.xml |
diffstat | 4 files changed, 95 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/addFileNameToFastaIDs Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +import sys + +# first argument is script name +# second should be fasta path +# third argument should be the text to prepend to the seq ids +# fourth argument should be the path to save the fasta at +originalFastaPath = sys.argv[1] +textToPrepend = sys.argv[2] +newFastaPath = sys.argv[3] + +newFastaString = "" + +with open(originalFastaPath) as fp: + line = fp.readline() + cnt = 1 + while line: + if line.startswith(">"): + id = ">" + textToPrepend + "_" + line[1:] + else: + sequence = line + newFastaString += id + sequence + line = fp.readline() + cnt += 1 + +f = open(newFastaPath, "w") +f.write(newFastaString)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/addFileNameToFastaIDs.xml Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<tool id="addFileNameToFastaIDs" version="1.0.0" name="addFileNameToFastaIDs"> + <description>add the fasta filename to all sequence ids in the + file</description> + <stdio> + <exit_code range="1:"/> + </stdio> + <version_command/> + <command><![CDATA[ + $__tool_directory__/addFileNameToFastaIDs '$input_fasta' '$input_fasta.display_name' '$modified_fasta' + ]]></command> + <inputs> + <param name="input_fasta" label="Input FASTA" type="data" format="fasta"/> + </inputs> + <outputs> + <data format="fasta" name="modified_fasta"/> + </outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterFasta Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +import sys + +# first argument is script name +# second should be fasta path +# third should be minimum number of non-gap characters required to be in seq +# fourth argument should be the path to save the fasta at +originalFastaPath = sys.argv[1] +minNumNonGapChars = int(sys.argv[2]) +newFastaPath = sys.argv[3] + +filteredFastaString = "" + +with open(originalFastaPath) as fp: + line = fp.readline() + cnt = 1 + while line: + if line.startswith(">"): + id = line + else: + sequence = line + numNonGapChars = len( line.replace("-","") ) + if numNonGapChars >= minNumNonGapChars: + filteredFastaString += id + sequence + line = fp.readline() + cnt += 1 + +f = open(newFastaPath, "w") +f.write(filteredFastaString)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterFasta.xml Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool id="filterFastaOnNumNonGapChars" version="1.0.0" name="filterFastaOnNumNonGapChars"> + <description>Remove sequences from a fasta file if less than n + non-gap characters</description> + <stdio> + <exit_code range="1:"/> + </stdio> + <version_command/> + <command><![CDATA[ + $__tool_directory__/filterFasta '$input_fasta' '$minimum_number_of_characters' '$filtered_fasta' + ]]></command> + <inputs> + <param name="input_fasta" label="Input FASTA" type="data" format="fasta"/> + <param label="min_num_chars" name="minimum_number_of_characters" type="integer" value="30"/> + </inputs> + <outputs> + <data format="fasta" name="filtered_fasta"/> + </outputs> +</tool>