Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 16:75dbe4f77699 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit b6de14061c479f0418cd89e26d6f5ac26e565a07
| author | drosofff | 
|---|---|
| date | Wed, 09 Nov 2016 11:32:52 -0500 | 
| parents | 87d250071bdc | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 15:3bfdbfef5881 | 16:75dbe4f77699 | 
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python | 
| 2 # blastn tblastn blastx parser revised 14-1-2016. | 2 # blastn tblastn blastx parser revised 14-1-2016. | 
| 3 # drosofff@gmail.com | 3 # drosofff@gmail.com | 
| 4 | 4 | 
| 5 import sys | |
| 6 import argparse | 5 import argparse | 
| 7 from collections import defaultdict | 6 from collections import defaultdict | 
| 7 | |
| 8 | 8 | 
| 9 def Parser(): | 9 def Parser(): | 
| 10 the_parser = argparse.ArgumentParser() | 10 the_parser = argparse.ArgumentParser() | 
| 11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)") | 11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)") | 
| 12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") | 12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") | 
| 13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") | 13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") | 
| 14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") | 14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") | 
| 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") | 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") | 
| 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") | 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") | 
| 17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") | 17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") | 
| 18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") | 18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") | 
| 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") | 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") | 
| 20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") | 20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") | 
| 21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") | 21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") | 
| 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") | 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") | 
| 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") | 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") | 
| 24 the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output") | 24 the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output") | 
| 25 args = the_parser.parse_args() | 25 args = the_parser.parse_args() | 
| 26 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): | 26 if not all((args.sequences, args.blast, args.fastaOutput, args.tabularOutput)): | 
| 27 the_parser.error('argument(s) missing, call the -h option of the script') | 27 the_parser.error('argument(s) missing, call the -h option of the script') | 
| 28 if not args.flanking: | 28 if not args.flanking: | 
| 29 args.flanking = 0 | 29 args.flanking = 0 | 
| 30 return args | 30 return args | 
| 31 | 31 | 
| 32 | |
| 32 def median(lst): | 33 def median(lst): | 
| 33 lst = sorted(lst) | 34 lst = sorted(lst) | 
| 34 if len(lst) < 1: | 35 if len(lst) < 1: | 
| 35 return None | 36 return None | 
| 36 if len(lst) %2 == 1: | 37 if len(lst) % 2 == 1: | 
| 37 return lst[((len(lst)+1)/2)-1] | 38 return lst[((len(lst)+1)/2)-1] | 
| 38 if len(lst) %2 == 0: | 39 if len(lst) % 2 == 0: | 
| 39 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 | 40 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 | 
| 41 | |
| 40 | 42 | 
| 41 def mean(lst): | 43 def mean(lst): | 
| 42 if len(lst) < 1: | 44 if len(lst) < 1: | 
| 43 return 0 | 45 return 0 | 
| 44 return sum(lst) / float(len(lst)) | 46 return sum(lst) / float(len(lst)) | 
| 45 | 47 | 
| 46 def getfasta (fastafile): | 48 | 
| 49 def getfasta(fastafile): | |
| 47 fastadic = {} | 50 fastadic = {} | 
| 48 for line in open (fastafile): | 51 for line in open(fastafile): | 
| 49 if line[0] == ">": | 52 if line[0] == ">": | 
| 50 header = line[1:-1] | 53 header = line[1:-1] | 
| 51 fastadic[header] = "" | 54 fastadic[header] = "" | 
| 52 else: | 55 else: | 
| 53 fastadic[header] += line | 56 fastadic[header] += line | 
| 54 for header in fastadic: | 57 for header in fastadic: | 
| 55 fastadic[header] = "".join(fastadic[header].split("\n")) | 58 fastadic[header] = "".join(fastadic[header].split("\n")) | 
| 56 return fastadic | 59 return fastadic | 
| 57 | 60 | 
| 61 | |
| 58 def insert_newlines(string, every=60): | 62 def insert_newlines(string, every=60): | 
| 59 lines = [] | 63 lines = [] | 
| 60 for i in xrange(0, len(string), every): | 64 for i in xrange(0, len(string), every): | 
| 61 lines.append(string[i:i+every]) | 65 lines.append(string[i:i+every]) | 
| 62 return '\n'.join(lines) | 66 return '\n'.join(lines) | 
| 63 | 67 | 
| 64 def getblast (blastfile): | 68 | 
| 69 def getblast(blastfile): | |
| 65 '''blastinfo [0] Percentage of identical matches | 70 '''blastinfo [0] Percentage of identical matches | 
| 66 blastinfo [1] Alignment length | 71 blastinfo [1] Alignment length | 
| 67 blastinfo [2] Number of mismatches | 72 blastinfo [2] Number of mismatches | 
| 68 blastinfo [3] Number of gap openings | 73 blastinfo [3] Number of gap openings | 
| 69 blastinfo [4] Start of alignment in query | 74 blastinfo [4] Start of alignment in query | 
| 71 blastinfo [6] Start of alignment in subject (database hit) | 76 blastinfo [6] Start of alignment in subject (database hit) | 
| 72 blastinfo [7] End of alignment in subject (database hit) | 77 blastinfo [7] End of alignment in subject (database hit) | 
| 73 blastinfo [8] Expectation value (E-value) | 78 blastinfo [8] Expectation value (E-value) | 
| 74 blastinfo [9] Bit score | 79 blastinfo [9] Bit score | 
| 75 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' | 80 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' | 
| 76 blastdic = defaultdict (dict) | 81 blastdic = defaultdict(dict) | 
| 77 for line in open (blastfile): | 82 for line in open(blastfile): | 
| 78 fields = line[:-1].split("\t") | 83 fields = line[:-1].split("\t") | 
| 79 transcript = fields[0] | 84 transcript = fields[0] | 
| 80 subject = fields[1] | 85 subject = fields[1] | 
| 81 blastinfo = [float(fields[2]) ] # blastinfo[0] | 86 blastinfo = [float(fields[2])] # blastinfo[0] | 
| 82 blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7 | 87 blastinfo = blastinfo + [int(i) for i in fields[3:10]] # blastinfo[1:8] insets 1 to 7 | 
| 83 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type | 88 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type | 
| 84 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score | 89 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score | 
| 85 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT | 90 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT | 
| 86 try: | 91 try: | 
| 87 blastdic[subject][transcript].append(blastinfo) | 92 blastdic[subject][transcript].append(blastinfo) | 
| 88 except: | 93 except Exception: | 
| 89 blastdic[subject][transcript] = [ blastinfo ] | 94 blastdic[subject][transcript] = [blastinfo] | 
| 90 return blastdic | 95 return blastdic | 
| 91 | 96 | 
| 92 def getseq (fastadict, transcript, up, down, orientation="direct"): | 97 | 
| 93 def reverse (seq): | 98 def getseq(fastadict, transcript, up, down, orientation="direct"): | 
| 94 revdict = {"A":"T","T":"A","G":"C","C":"G","N":"N"} | 99 def reverse(seq): | 
| 100 revdict = {"A": "T", "T": "A", "G": "C", "C": "G", "N": "N"} | |
| 95 revseq = [revdict[i] for i in seq[::-1]] | 101 revseq = [revdict[i] for i in seq[::-1]] | 
| 96 return "".join(revseq) | 102 return "".join(revseq) | 
| 97 pickseq = fastadict[transcript][up-1:down] | 103 pickseq = fastadict[transcript][up-1:down] | 
| 98 if orientation == "direct": | 104 if orientation == "direct": | 
| 99 return pickseq | 105 return pickseq | 
| 100 else: | 106 else: | 
| 101 return reverse(pickseq) | 107 return reverse(pickseq) | 
| 102 | 108 | 
| 103 def subjectCoverage (fastadict, blastdict, subject, QueriesFlankingNucleotides=0): | 109 | 
| 110 def subjectCoverage(fastadict, blastdict, subject, QueriesFlankingNucleotides=0): | |
| 104 SubjectCoverageList = [] | 111 SubjectCoverageList = [] | 
| 105 HitDic = {} | 112 HitDic = {} | 
| 106 bitScores = [] | 113 bitScores = [] | 
| 107 for transcript in blastdict[subject]: | 114 for transcript in blastdict[subject]: | 
| 108 prefix = "%s--%s_" % (subject, transcript) | 115 prefix = "%s--%s_" % (subject, transcript) | 
| 109 hitNumber = 0 | 116 hitNumber = 0 | 
| 110 for hit in blastdict[subject][transcript]: | 117 for hit in blastdict[subject][transcript]: | 
| 111 hitNumber += 1 | 118 hitNumber += 1 | 
| 112 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8]) | 119 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8]) | 
| 113 HitDic[prefix+suffix] = GetHitSequence (fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) #query coverage by a hit is in hit[4:6] | 120 HitDic[prefix+suffix] = GetHitSequence(fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) # query coverage by a hit is in hit[4:6] | 
| 114 SubjectCoverageList += range (min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8] | 121 SubjectCoverageList += range(min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8] | 
| 115 bitScores.append(hit[9]) | 122 bitScores.append(hit[9]) | 
| 116 subjectLength = hit [10] # always the same value for a given subject. Stupid but simple | 123 subjectLength = hit[10] # always the same value for a given subject. Stupid but simple | 
| 117 TotalSubjectCoverage = len ( set (SubjectCoverageList) ) | 124 TotalSubjectCoverage = len(set(SubjectCoverageList)) | 
| 118 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) | 125 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) | 
| 119 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores) | 126 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores) | 
| 120 | 127 | 
| 121 def GetHitSequence (fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue): | 128 | 
| 129 def GetHitSequence(fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue): | |
| 122 if rightCoordinate > leftCoordinate: | 130 if rightCoordinate > leftCoordinate: | 
| 123 polarity = "direct" | 131 polarity = "direct" | 
| 124 else: | 132 else: | 
| 125 polarity = "reverse" | 133 polarity = "reverse" | 
| 126 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate | 134 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate | 
| 127 if leftCoordinate - FlankingValue > 0: | 135 if leftCoordinate - FlankingValue > 0: | 
| 128 leftCoordinate -= FlankingValue | 136 leftCoordinate -= FlankingValue | 
| 129 else: | 137 else: | 
| 130 leftCoordinate = 1 | 138 leftCoordinate = 1 | 
| 131 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 139 return getseq(fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 
| 132 | 140 | 
| 133 def outputParsing (dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): | 141 | 
| 134 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): | 142 def outputParsing(dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): | 
| 143 def filter_results(results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): | |
| 135 for subject in results.keys(): | 144 for subject in results.keys(): | 
| 136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: | 145 if results[subject]["RelativeSubjectCoverage"] < filter_relativeCov: | 
| 137 del results[subject] | 146 del results[subject] | 
| 138 continue | 147 continue | 
| 139 if results[subject]["maxBitScores"]<filter_maxScore: | 148 if results[subject]["maxBitScores"] < filter_maxScore: | 
| 140 del results[subject] | 149 del results[subject] | 
| 141 continue | 150 continue | 
| 142 if results[subject]["meanBitScores"]<filter_meanScore: | 151 if results[subject]["meanBitScores"] < filter_meanScore: | 
| 143 del results[subject] | 152 del results[subject] | 
| 144 continue | 153 continue | 
| 145 if filter_term_in in subject: | 154 if filter_term_in in subject: | 
| 146 pass | 155 pass | 
| 147 else: | 156 else: | 
| 149 continue | 158 continue | 
| 150 if filter_term_out and filter_term_out in subject: | 159 if filter_term_out and filter_term_out in subject: | 
| 151 del results[subject] | 160 del results[subject] | 
| 152 continue | 161 continue | 
| 153 return results | 162 return results | 
| 154 | 163 | 
| 155 F= open(F, "w") | 164 F = open(F, "w") | 
| 156 Fasta=open(Fasta, "w") | 165 Fasta = open(Fasta, "w") | 
| 157 blasted_transcripts = [] | 166 blasted_transcripts = [] | 
| 158 filter_results (results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out) | 167 filter_results(results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out) | 
| 159 for subject in results: | 168 for subject in results: | 
| 160 for transcript in Xblastdict[subject]: | 169 for transcript in Xblastdict[subject]: | 
| 161 blasted_transcripts.append(transcript) | 170 blasted_transcripts.append(transcript) | 
| 162 blasted_transcripts = list( set( blasted_transcripts)) | 171 blasted_transcripts = list(set(blasted_transcripts)) | 
| 163 if mode == "verbose": | 172 if mode == "verbose": | 
| 164 print >>F, "--- %s ---" % (dataset_name) | 173 print >>F, "--- %s ---" % (dataset_name) | 
| 165 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore" | 174 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore" | 
| 166 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 175 for subject in sorted(results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 
| 167 print >> F, " \n# %s" % subject | 176 print >> F, " \n# %s" % subject | 
| 168 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) | 177 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) | 
| 169 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) | 178 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) | 
| 170 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) | 179 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) | 
| 171 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) | 180 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) | 
| 172 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) | 181 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) | 
| 173 for header in results[subject]["HitDic"]: | 182 for header in results[subject]["HitDic"]: | 
| 174 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) | 183 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header])) | 
| 175 print >> Fasta, "" # final carriage return for the sequence | 184 print >> Fasta, "" # final carriage return for the sequence | 
| 176 for transcript in Xblastdict[subject]: | 185 for transcript in Xblastdict[subject]: | 
| 177 transcriptSize = float(len(fastadict[transcript])) | 186 transcriptSize = float(len(fastadict[transcript])) | 
| 178 for hit in Xblastdict[subject][transcript]: | 187 for hit in Xblastdict[subject][transcript]: | 
| 179 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) | 188 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) | 
| 180 Eval, BitScore = hit[8], hit[9] | 189 Eval, BitScore = hit[8], hit[9] | 
| 183 info = "\t".join(info) | 192 info = "\t".join(info) | 
| 184 print >> F, info | 193 print >> F, info | 
| 185 else: | 194 else: | 
| 186 print >>F, "--- %s ---" % (dataset_name) | 195 print >>F, "--- %s ---" % (dataset_name) | 
| 187 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" | 196 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" | 
| 188 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 197 for subject in sorted(results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 
| 189 line = [] | 198 line = [] | 
| 190 line.append(subject) | 199 line.append(subject) | 
| 191 line.append(results[subject]["subjectLength"]) | 200 line.append(results[subject]["subjectLength"]) | 
| 192 line.append(results[subject]["TotalCoverage"]) | 201 line.append(results[subject]["TotalCoverage"]) | 
| 193 line.append(results[subject]["RelativeSubjectCoverage"]) | 202 line.append(results[subject]["RelativeSubjectCoverage"]) | 
| 194 line.append(results[subject]["maxBitScores"]) | 203 line.append(results[subject]["maxBitScores"]) | 
| 195 line.append(results[subject]["meanBitScores"]) | 204 line.append(results[subject]["meanBitScores"]) | 
| 196 line = [str(i) for i in line] | 205 line = [str(i) for i in line] | 
| 197 print >> F, "\t".join(line) | 206 print >> F, "\t".join(line) | 
| 198 for header in results[subject]["HitDic"]: | 207 for header in results[subject]["HitDic"]: | 
| 199 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) | 208 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header])) | 
| 200 print >> Fasta, "" # final carriage return for the sequence | 209 print >> Fasta, "" # final carriage return for the sequence | 
| 201 F.close() | 210 F.close() | 
| 202 Fasta.close() | 211 Fasta.close() | 
| 203 return blasted_transcripts | 212 return blasted_transcripts | 
| 204 | 213 | 
| 205 def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences): | 214 | 
| 215 def dispatch_sequences(fastadict, blasted_transcripts, matched_sequences, unmatched_sequences): | |
| 206 '''to output the sequences that matched and did not matched in the blast''' | 216 '''to output the sequences that matched and did not matched in the blast''' | 
| 207 F_matched = open (matched_sequences, "w") | 217 F_matched = open(matched_sequences, "w") | 
| 208 F_unmatched = open (unmatched_sequences, "w") | 218 F_unmatched = open(unmatched_sequences, "w") | 
| 209 for transcript in fastadict: | 219 for transcript in fastadict: | 
| 210 if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function | 220 if transcript in blasted_transcripts: # list of blasted_transcripts is generated by the outputParsing function | 
| 211 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | 221 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript])) | 
| 212 else: | 222 else: | 
| 213 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | 223 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript])) | 
| 214 F_matched.close() | 224 F_matched.close() | 
| 215 F_unmatched.close() | 225 F_unmatched.close() | 
| 216 return | 226 return | 
| 217 | 227 | 
| 218 def __main__ (): | 228 | 
| 229 def __main__(): | |
| 219 args = Parser() | 230 args = Parser() | 
| 220 fastadict = getfasta (args.sequences) | 231 fastadict = getfasta(args.sequences) | 
| 221 Xblastdict = getblast (args.blast) | 232 Xblastdict = getblast(args.blast) | 
| 222 results = defaultdict(dict) | 233 results = defaultdict(dict) | 
| 223 for subject in Xblastdict: | 234 for subject in Xblastdict: | 
| 224 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 235 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 
| 225 blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, | 236 blasted_transcripts = outputParsing(args.dataset_name, args.tabularOutput, | 
| 226 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | 237 args.fastaOutput, results, Xblastdict, fastadict, | 
| 227 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, | 238 filter_relativeCov=args.filter_relativeCov, | 
| 228 filter_term_out=args.filter_term_out, mode=args.mode) | 239 filter_maxScore=args.filter_maxScore, | 
| 229 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) | 240 filter_meanScore=args.filter_meanScore, | 
| 230 | 241 filter_term_in=args.filter_term_in, | 
| 231 if __name__=="__main__": __main__() | 242 filter_term_out=args.filter_term_out, | 
| 243 mode=args.mode) | |
| 244 dispatch_sequences(fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) | |
| 245 | |
| 246 if __name__ == "__main__": | |
| 247 __main__() | 
