Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 4:22641bb68b91 draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Mon, 29 Jun 2015 06:06:11 -0400 |
parents | e0985bad7b92 |
children | 3f7cfa1cf90c |
comparison
equal
deleted
inserted
replaced
3:fa936e163bbd | 4:22641bb68b91 |
---|---|
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") | 12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") |
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") | 13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") |
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") | 14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") |
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") | 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") | 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") | |
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") | |
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") | |
17 args = the_parser.parse_args() | 20 args = the_parser.parse_args() |
18 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): | 21 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
19 the_parser.error('argument(s) missing, call the -h option of the script') | 22 the_parser.error('argument(s) missing, call the -h option of the script') |
20 if not args.flanking: | 23 if not args.flanking: |
21 args.flanking = 0 | 24 args.flanking = 0 |
120 leftCoordinate -= FlankingValue | 123 leftCoordinate -= FlankingValue |
121 else: | 124 else: |
122 leftCoordinate = 1 | 125 leftCoordinate = 1 |
123 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 126 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
124 | 127 |
125 def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"): | 128 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): |
126 F= open(F, "w") | 129 F= open(F, "w") |
127 Fasta=open(Fasta, "w") | 130 Fasta=open(Fasta, "w") |
128 if mode == "verbose": | 131 if mode == "verbose": |
129 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" | 132 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
130 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 133 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
134 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
135 continue | |
131 print >> F, "#\n# %s" % subject | 136 print >> F, "#\n# %s" % subject |
132 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) | 137 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
133 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) | 138 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
134 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) | 139 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
135 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) | 140 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) |
147 info = "\t".join(info) | 152 info = "\t".join(info) |
148 print >> F, info | 153 print >> F, info |
149 else: | 154 else: |
150 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" | 155 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" |
151 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 156 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
157 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
158 continue | |
152 line = [] | 159 line = [] |
153 line.append(subject) | 160 line.append(subject) |
154 line.append(results[subject]["subjectLength"]) | 161 line.append(results[subject]["subjectLength"]) |
155 line.append(results[subject]["TotalCoverage"]) | 162 line.append(results[subject]["TotalCoverage"]) |
156 line.append(results[subject]["RelativeSubjectCoverage"]) | 163 line.append(results[subject]["RelativeSubjectCoverage"]) |
171 fastadict = getfasta (args.sequences) | 178 fastadict = getfasta (args.sequences) |
172 Xblastdict = getblast (args.blast) | 179 Xblastdict = getblast (args.blast) |
173 results = defaultdict(dict) | 180 results = defaultdict(dict) |
174 for subject in Xblastdict: | 181 for subject in Xblastdict: |
175 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 182 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
176 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode) | 183 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
184 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | |
185 filter_meanScore=args.filter_meanScore, mode=args.mode) | |
177 if __name__=="__main__": __main__() | 186 if __name__=="__main__": __main__() |