Mercurial > repos > drosofff > msp_blastparser_and_hits
changeset 4:22641bb68b91 draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Mon, 29 Jun 2015 06:06:11 -0400 |
parents | fa936e163bbd |
children | a941981a298c |
files | BlastParser_and_hits.py BlastParser_and_hits.xml |
diffstat | 2 files changed, 32 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/BlastParser_and_hits.py Fri Jun 19 13:17:32 2015 -0400 +++ b/BlastParser_and_hits.py Mon Jun 29 06:06:11 2015 -0400 @@ -14,6 +14,9 @@ the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") + the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") + the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") + the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") args = the_parser.parse_args() if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): the_parser.error('argument(s) missing, call the -h option of the script') @@ -122,12 +125,14 @@ leftCoordinate = 1 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) -def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"): +def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): F= open(F, "w") Fasta=open(Fasta, "w") if mode == "verbose": print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): + if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: + continue print >> F, "#\n# %s" % subject print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) @@ -149,6 +154,8 @@ else: print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): + if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: + continue line = [] line.append(subject) line.append(results[subject]["subjectLength"]) @@ -173,5 +180,7 @@ results = defaultdict(dict) for subject in Xblastdict: results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) - outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode) + outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, + filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, + filter_meanScore=args.filter_meanScore, mode=args.mode) if __name__=="__main__": __main__()
--- a/BlastParser_and_hits.xml Fri Jun 19 13:17:32 2015 -0400 +++ b/BlastParser_and_hits.xml Mon Jun 29 06:06:11 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.1.0"> +<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.2.0"> <description>for virus discovery</description> <requirements></requirements> <command interpreter="python"> @@ -9,6 +9,13 @@ --fastaOutput $fastaOutput --flanking $flanking --mode $mode + ## Additional parameters. + #if $additional_filters.use_filters == "yes": + --filter_relativeCov $additional_filters.filter_relativeCov + --filter_maxScore $additional_filters.filter_maxScore + --filter_meanScore $additional_filters.filter_meanScore + #end if + </command> <inputs> <param name="sequences" type="data" format="fasta" label="fasta sequences that have been blasted" /> @@ -18,6 +25,19 @@ <option value="verbose" default="true">verbose</option> <option value="short">do not report oases contigs</option> </param> + <conditional name="additional_filters"> + <param name="use_filters" type="select" label="Use Additional Filters?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/> + <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/> + <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/> + </when> + </conditional> </inputs> <outputs> <data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/>