# HG changeset patch # User rdvelazquez # Date 1553134660 14400 # Node ID 146ffed44f3f61b2c4c041ee5f15d393e469f0d5 planemo upload diff -r 000000000000 -r 146ffed44f3f addFileNameToFastaIDs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/addFileNameToFastaIDs Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +import sys + +# first argument is script name +# second should be fasta path +# third argument should be the text to prepend to the seq ids +# fourth argument should be the path to save the fasta at +originalFastaPath = sys.argv[1] +textToPrepend = sys.argv[2] +newFastaPath = sys.argv[3] + +newFastaString = "" + +with open(originalFastaPath) as fp: + line = fp.readline() + cnt = 1 + while line: + if line.startswith(">"): + id = ">" + textToPrepend + "_" + line[1:] + else: + sequence = line + newFastaString += id + sequence + line = fp.readline() + cnt += 1 + +f = open(newFastaPath, "w") +f.write(newFastaString) diff -r 000000000000 -r 146ffed44f3f addFileNameToFastaIDs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/addFileNameToFastaIDs.xml Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,18 @@ + + + add the fasta filename to all sequence ids in the + file + + + + + + + + + + + + diff -r 000000000000 -r 146ffed44f3f filterFasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterFasta Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +import sys + +# first argument is script name +# second should be fasta path +# third should be minimum number of non-gap characters required to be in seq +# fourth argument should be the path to save the fasta at +originalFastaPath = sys.argv[1] +minNumNonGapChars = int(sys.argv[2]) +newFastaPath = sys.argv[3] + +filteredFastaString = "" + +with open(originalFastaPath) as fp: + line = fp.readline() + cnt = 1 + while line: + if line.startswith(">"): + id = line + else: + sequence = line + numNonGapChars = len( line.replace("-","") ) + if numNonGapChars >= minNumNonGapChars: + filteredFastaString += id + sequence + line = fp.readline() + cnt += 1 + +f = open(newFastaPath, "w") +f.write(filteredFastaString) diff -r 000000000000 -r 146ffed44f3f filterFasta.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterFasta.xml Wed Mar 20 22:17:40 2019 -0400 @@ -0,0 +1,19 @@ + + + Remove sequences from a fasta file if less than n + non-gap characters + + + + + + + + + + + + +