Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 6:3f7cfa1cf90c draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit e511de70e387d5033ab91f37c8ddb5665fa61a87
author | drosofff |
---|---|
date | Mon, 14 Sep 2015 11:39:51 -0400 |
parents | 22641bb68b91 |
children | 72ef366ef55e |
comparison
equal
deleted
inserted
replaced
5:a941981a298c | 6:3f7cfa1cf90c |
---|---|
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") | 15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") | 16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") | 17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") |
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") | 18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") |
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") | 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") |
20 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") | |
21 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") | |
20 args = the_parser.parse_args() | 22 args = the_parser.parse_args() |
21 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): | 23 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
22 the_parser.error('argument(s) missing, call the -h option of the script') | 24 the_parser.error('argument(s) missing, call the -h option of the script') |
23 if not args.flanking: | 25 if not args.flanking: |
24 args.flanking = 0 | 26 args.flanking = 0 |
169 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) | 171 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
170 print >> Fasta, "" # final carriage return for the sequence | 172 print >> Fasta, "" # final carriage return for the sequence |
171 F.close() | 173 F.close() |
172 Fasta.close() | 174 Fasta.close() |
173 | 175 |
174 | 176 def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): |
177 '''to output the sequences that matched and did not matched in the blast''' | |
178 blasted_transcripts = [] | |
179 for subject in blastdict: | |
180 for transcript in blastdict[subject]: | |
181 blasted_transcripts.append(transcript) | |
182 blasted_transcripts = list( set( blasted_transcripts)) | |
183 F_matched = open (matched_sequences, "w") | |
184 F_unmatched = open (unmatched_sequences, "w") | |
185 for transcript in fastadict: | |
186 if transcript in blasted_transcripts: | |
187 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | |
188 else: | |
189 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) | |
190 F_matched.close() | |
191 F_unmatched.close() | |
192 return | |
175 | 193 |
176 def __main__ (): | 194 def __main__ (): |
177 args = Parser() | 195 args = Parser() |
178 fastadict = getfasta (args.sequences) | 196 fastadict = getfasta (args.sequences) |
179 Xblastdict = getblast (args.blast) | 197 Xblastdict = getblast (args.blast) |
198 sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences) | |
180 results = defaultdict(dict) | 199 results = defaultdict(dict) |
181 for subject in Xblastdict: | 200 for subject in Xblastdict: |
182 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 201 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
183 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, | 202 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
184 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | 203 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, |