Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 9:86f424753b2d draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
| author | drosofff |
|---|---|
| date | Fri, 15 Jan 2016 12:29:10 -0500 |
| parents | efb051ac0da9 |
| children | cf78a10bcfcf |
comparison
equal
deleted
inserted
replaced
| 8:efb051ac0da9 | 9:86f424753b2d |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # blastn blastx parser revised debugged: 3-4-2015. Commit issue. | 2 # blastn tblastn blastx parser revised 14-1-2016. |
| 3 # drosofff@gmail.com | 3 # drosofff@gmail.com |
| 4 | 4 |
| 5 import sys | 5 import sys |
| 6 import argparse | 6 import argparse |
| 7 from collections import defaultdict | 7 from collections import defaultdict |
| 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") | 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
| 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") | 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
| 17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") | 17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") |
| 18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") | 18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") |
| 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") | 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") |
| 20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") | |
| 21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") | |
| 20 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") | 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") |
| 21 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") | 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") |
| 22 args = the_parser.parse_args() | 24 args = the_parser.parse_args() |
| 23 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): | 25 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
| 24 the_parser.error('argument(s) missing, call the -h option of the script') | 26 the_parser.error('argument(s) missing, call the -h option of the script') |
| 125 leftCoordinate -= FlankingValue | 127 leftCoordinate -= FlankingValue |
| 126 else: | 128 else: |
| 127 leftCoordinate = 1 | 129 leftCoordinate = 1 |
| 128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 130 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
| 129 | 131 |
| 130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): | 132 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): |
| 133 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): | |
| 134 print "###", filter_term_in | |
| 135 for subject in results.keys(): | |
| 136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: | |
| 137 del results[subject] | |
| 138 continue | |
| 139 if results[subject]["maxBitScores"]<filter_maxScore: | |
| 140 del results[subject] | |
| 141 continue | |
| 142 if results[subject]["meanBitScores"]<filter_meanScore: | |
| 143 del results[subject] | |
| 144 continue | |
| 145 if filter_term_in in subject: | |
| 146 pass | |
| 147 else: | |
| 148 del results[subject] | |
| 149 continue | |
| 150 if filter_term_out and filter_term_out in subject: | |
| 151 del results[subject] | |
| 152 continue | |
| 153 return results | |
| 154 | |
| 131 F= open(F, "w") | 155 F= open(F, "w") |
| 132 Fasta=open(Fasta, "w") | 156 Fasta=open(Fasta, "w") |
| 133 blasted_transcripts = [] | 157 blasted_transcripts = [] |
| 158 filter_results (results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out) | |
| 134 for subject in results: | 159 for subject in results: |
| 135 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
| 136 continue | |
| 137 for transcript in Xblastdict[subject]: | 160 for transcript in Xblastdict[subject]: |
| 138 blasted_transcripts.append(transcript) | 161 blasted_transcripts.append(transcript) |
| 139 blasted_transcripts = list( set( blasted_transcripts)) | 162 blasted_transcripts = list( set( blasted_transcripts)) |
| 140 if mode == "verbose": | 163 if mode == "verbose": |
| 141 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" | 164 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
| 142 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 165 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
| 143 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
| 144 continue | |
| 145 print >> F, "#\n# %s" % subject | 166 print >> F, "#\n# %s" % subject |
| 146 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) | 167 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
| 147 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) | 168 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
| 148 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) | 169 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
| 149 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) | 170 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) |
| 161 info = "\t".join(info) | 182 info = "\t".join(info) |
| 162 print >> F, info | 183 print >> F, info |
| 163 else: | 184 else: |
| 164 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" | 185 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" |
| 165 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 186 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
| 166 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
| 167 continue | |
| 168 line = [] | 187 line = [] |
| 169 line.append(subject) | 188 line.append(subject) |
| 170 line.append(results[subject]["subjectLength"]) | 189 line.append(results[subject]["subjectLength"]) |
| 171 line.append(results[subject]["TotalCoverage"]) | 190 line.append(results[subject]["TotalCoverage"]) |
| 172 line.append(results[subject]["RelativeSubjectCoverage"]) | 191 line.append(results[subject]["RelativeSubjectCoverage"]) |
| 201 results = defaultdict(dict) | 220 results = defaultdict(dict) |
| 202 for subject in Xblastdict: | 221 for subject in Xblastdict: |
| 203 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 222 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
| 204 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, | 223 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
| 205 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | 224 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, |
| 206 filter_meanScore=args.filter_meanScore, mode=args.mode) | 225 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, |
| 226 filter_term_out=args.filter_term_out, mode=args.mode) | |
| 207 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) | 227 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) |
| 208 | 228 |
| 209 if __name__=="__main__": __main__() | 229 if __name__=="__main__": __main__() |
