comparison BlastParser_and_hits.py @ 4:22641bb68b91 draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Mon, 29 Jun 2015 06:06:11 -0400
parents e0985bad7b92
children 3f7cfa1cf90c
comparison
equal deleted inserted replaced
3:fa936e163bbd 4:22641bb68b91
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") 12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences")
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") 13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits")
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") 14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis")
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences")
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs")
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)")
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number")
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number")
17 args = the_parser.parse_args() 20 args = the_parser.parse_args()
18 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): 21 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):
19 the_parser.error('argument(s) missing, call the -h option of the script') 22 the_parser.error('argument(s) missing, call the -h option of the script')
20 if not args.flanking: 23 if not args.flanking:
21 args.flanking = 0 24 args.flanking = 0
120 leftCoordinate -= FlankingValue 123 leftCoordinate -= FlankingValue
121 else: 124 else:
122 leftCoordinate = 1 125 leftCoordinate = 1
123 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) 126 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
124 127
125 def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"): 128 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"):
126 F= open(F, "w") 129 F= open(F, "w")
127 Fasta=open(Fasta, "w") 130 Fasta=open(Fasta, "w")
128 if mode == "verbose": 131 if mode == "verbose":
129 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" 132 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n"
130 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 133 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
134 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
135 continue
131 print >> F, "#\n# %s" % subject 136 print >> F, "#\n# %s" % subject
132 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) 137 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])
133 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) 138 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])
134 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) 139 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])
135 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) 140 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"])
147 info = "\t".join(info) 152 info = "\t".join(info)
148 print >> F, info 153 print >> F, info
149 else: 154 else:
150 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" 155 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score"
151 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 156 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
157 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
158 continue
152 line = [] 159 line = []
153 line.append(subject) 160 line.append(subject)
154 line.append(results[subject]["subjectLength"]) 161 line.append(results[subject]["subjectLength"])
155 line.append(results[subject]["TotalCoverage"]) 162 line.append(results[subject]["TotalCoverage"])
156 line.append(results[subject]["RelativeSubjectCoverage"]) 163 line.append(results[subject]["RelativeSubjectCoverage"])
171 fastadict = getfasta (args.sequences) 178 fastadict = getfasta (args.sequences)
172 Xblastdict = getblast (args.blast) 179 Xblastdict = getblast (args.blast)
173 results = defaultdict(dict) 180 results = defaultdict(dict)
174 for subject in Xblastdict: 181 for subject in Xblastdict:
175 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) 182 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
176 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode) 183 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,
184 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,
185 filter_meanScore=args.filter_meanScore, mode=args.mode)
177 if __name__=="__main__": __main__() 186 if __name__=="__main__": __main__()