Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 7:72ef366ef55e draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 319fbe5bf4fe32ab9329a411a0d0bd70b83efd9a
| author | drosofff |
|---|---|
| date | Wed, 16 Sep 2015 06:11:35 -0400 |
| parents | 3f7cfa1cf90c |
| children | efb051ac0da9 |
comparison
equal
deleted
inserted
replaced
| 6:3f7cfa1cf90c | 7:72ef366ef55e |
|---|---|
| 128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
| 129 | 129 |
| 130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): | 130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): |
| 131 F= open(F, "w") | 131 F= open(F, "w") |
| 132 Fasta=open(Fasta, "w") | 132 Fasta=open(Fasta, "w") |
| 133 blasted_transcripts = [] | |
| 134 for subject in results: | |
| 135 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | |
| 136 continue | |
| 137 for transcript in Xblastdict[subject]: | |
| 138 blasted_transcripts.append(transcript) | |
| 139 blasted_transcripts = list( set( blasted_transcripts)) | |
| 133 if mode == "verbose": | 140 if mode == "verbose": |
| 134 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" | 141 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
| 135 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 142 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
| 136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: | 143 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: |
| 137 continue | 144 continue |
| 170 for header in results[subject]["HitDic"]: | 177 for header in results[subject]["HitDic"]: |
| 171 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) | 178 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
| 172 print >> Fasta, "" # final carriage return for the sequence | 179 print >> Fasta, "" # final carriage return for the sequence |
| 173 F.close() | 180 F.close() |
| 174 Fasta.close() | 181 Fasta.close() |
| 182 return blasted_transcripts | |
| 175 | 183 |
| 176 def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): | 184 def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences): |
| 177 '''to output the sequences that matched and did not matched in the blast''' | 185 '''to output the sequences that matched and did not matched in the blast''' |
| 178 blasted_transcripts = [] | |
| 179 for subject in blastdict: | |
| 180 for transcript in blastdict[subject]: | |
| 181 blasted_transcripts.append(transcript) | |
| 182 blasted_transcripts = list( set( blasted_transcripts)) | |
| 183 F_matched = open (matched_sequences, "w") | 186 F_matched = open (matched_sequences, "w") |
| 184 F_unmatched = open (unmatched_sequences, "w") | 187 F_unmatched = open (unmatched_sequences, "w") |
| 185 for transcript in fastadict: | 188 for transcript in fastadict: |
| 186 if transcript in blasted_transcripts: | 189 if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function |
| 187 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | 190 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) |
| 188 else: | 191 else: |
| 189 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | 192 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) |
| 190 F_matched.close() | 193 F_matched.close() |
| 191 F_unmatched.close() | 194 F_unmatched.close() |
| 193 | 196 |
| 194 def __main__ (): | 197 def __main__ (): |
| 195 args = Parser() | 198 args = Parser() |
| 196 fastadict = getfasta (args.sequences) | 199 fastadict = getfasta (args.sequences) |
| 197 Xblastdict = getblast (args.blast) | 200 Xblastdict = getblast (args.blast) |
| 198 sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences) | |
| 199 results = defaultdict(dict) | 201 results = defaultdict(dict) |
| 200 for subject in Xblastdict: | 202 for subject in Xblastdict: |
| 201 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 203 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
| 202 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, | 204 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
| 203 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | 205 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, |
| 204 filter_meanScore=args.filter_meanScore, mode=args.mode) | 206 filter_meanScore=args.filter_meanScore, mode=args.mode) |
| 207 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) | |
| 208 | |
| 205 if __name__=="__main__": __main__() | 209 if __name__=="__main__": __main__() |
