Mercurial > repos > dfornika > blast_report
comparison blast_report.py @ 9:2b4f30c6b50a draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/blast_report commit 174f746f44dfdeb18301429116ccc0213c1e091e-dirty
| author | dfornika |
|---|---|
| date | Mon, 02 Mar 2020 23:41:54 +0000 |
| parents | 18b097eb1a51 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:71dd0b1d5511 | 9:2b4f30c6b50a |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 from __future__ import print_function | |
| 3 | |
| 4 '''Report on BLAST results. | 2 '''Report on BLAST results. |
| 5 | 3 |
| 6 python bccdc_blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] | 4 python blast_report.py input_tab cheetah_tmpl output_html output_tab [-f [filter_pident]:[filterkw1,...,filterkwN]] [-b bin1_label=bin1_path[,...binN_label=binN_path]] |
| 7 ''' | 5 ''' |
| 8 | 6 import argparse |
| 9 import optparse | |
| 10 import re | 7 import re |
| 11 import sys | 8 import sys |
| 9 | |
| 10 from Cheetah.Template import Template | |
| 11 | |
| 12 | 12 |
| 13 def stop_err( msg ): | 13 def stop_err( msg ): |
| 14 sys.stderr.write("%s\n" % msg) | 14 sys.stderr.write("%s\n" % msg) |
| 15 sys.exit(1) | 15 sys.exit(1) |
| 16 | |
| 16 | 17 |
| 17 class BLASTBin: | 18 class BLASTBin: |
| 18 def __init__(self, label, file): | 19 def __init__(self, label, file): |
| 19 self.label = label | 20 self.label = label |
| 20 self.dict = {} | 21 self.dict = {} |
| 24 self.dict[line.rstrip().split('.')[0]] = '' | 25 self.dict[line.rstrip().split('.')[0]] = '' |
| 25 file_in.close() | 26 file_in.close() |
| 26 | 27 |
| 27 def __str__(self): | 28 def __str__(self): |
| 28 return "label: %s dict: %s" % (self.label, str(self.dict)) | 29 return "label: %s dict: %s" % (self.label, str(self.dict)) |
| 30 | |
| 29 | 31 |
| 30 class BLASTQuery: | 32 class BLASTQuery: |
| 31 def __init__(self, query_id): | 33 def __init__(self, query_id): |
| 32 self.query_id = query_id | 34 self.query_id = query_id |
| 33 self.matches = [] | 35 self.matches = [] |
| 44 str([bin.label for bin in bins]), | 46 str([bin.label for bin in bins]), |
| 45 str(self.pident_filtered), | 47 str(self.pident_filtered), |
| 46 str(self.kw_filtered), | 48 str(self.kw_filtered), |
| 47 str(self.kw_filtered_breakdown)) | 49 str(self.kw_filtered_breakdown)) |
| 48 | 50 |
| 51 | |
| 49 class BLASTMatch: | 52 class BLASTMatch: |
| 50 def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins): | 53 def __init__(self, subject_acc, subject_descr, score, p_cov, p_ident, subject_bins): |
| 51 self.subject_acc = subject_acc | 54 self.subject_acc = subject_acc |
| 52 self.subject_descr = subject_descr | 55 self.subject_descr = subject_descr |
| 53 self.score = score | 56 self.score = score |
| 61 self.subject_descr, | 64 self.subject_descr, |
| 62 str(self.score), | 65 str(self.score), |
| 63 str(round(self.p_cov,2)), | 66 str(round(self.p_cov,2)), |
| 64 str(round(self.p_ident, 2))) | 67 str(round(self.p_ident, 2))) |
| 65 | 68 |
| 69 | |
| 70 | |
| 66 #PARSE OPTIONS AND ARGUMENTS | 71 #PARSE OPTIONS AND ARGUMENTS |
| 67 parser = optparse.OptionParser(description='Report on BLAST results.', | 72 parser = argparse.ArgumentParser() |
| 68 usage='python bccdc_blast_report_generator.py input_tabut cheetah_tmpl output_html [output_id output_dir] [options]') | 73 |
| 69 | 74 parser.add_argument('-f', '--filter', |
| 70 parser.add_option('-f', '--filter', | |
| 71 type='string', | 75 type='string', |
| 72 dest='filter', | 76 dest='filter', |
| 73 ) | 77 ) |
| 74 parser.add_option('-b', '--bins', | 78 parser.add_argument('-b', '--bins', |
| 75 type='string', | 79 type='string', |
| 76 dest='bins' | 80 dest='bins' |
| 77 ) | 81 ) |
| 78 parser.add_option('-r', '--redundant', | 82 parser.add_argument('-r', '--redundant', |
| 79 dest='hsp', | 83 dest='redundant', |
| 80 default=False, | 84 default=False, |
| 81 action='store_true' | 85 action='store_true' |
| 82 ) | 86 ) |
| 83 options, args = parser.parse_args() | 87 args = parser.parse_args() |
| 84 | 88 |
| 85 try: | 89 try: |
| 86 input_tab, cheetah_tmpl, output_html, output_tab = args | 90 input_tab, cheetah_tmpl, output_html, output_tab = args |
| 87 except: | 91 except: |
| 88 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') | 92 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') |
| 89 #print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) | 93 # print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) |
| 94 | |
| 90 | 95 |
| 91 #BINS | 96 #BINS |
| 92 bins=[] | 97 bins=[] |
| 93 if options.bins != None: | 98 if args.bins != None: |
| 94 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in options.bins.split(',')]) | 99 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')]) |
| 95 print('database bins: %s' % str([bin.label for bin in bins])) | 100 print('database bins: %s' % str([bin.label for bin in bins])) |
| 96 | 101 |
| 97 #FILTERS | 102 #FILTERS |
| 98 filter_pident = 0 | 103 filter_pident = 0 |
| 99 filter_kws = [] | 104 filter_kws = [] |
| 100 if options.filter != None: | 105 if args.filter != None: |
| 101 pident_kws = options.filter.split(':') | 106 pident_kws = args.filter.split(':') |
| 102 filter_pident = float(pident_kws[0]) | 107 filter_pident = float(pident_kws[0]) |
| 103 filter_kws = pident_kws[-1].split(',') | 108 filter_kws = pident_kws[-1].split(',') |
| 104 print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws))) | 109 print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws))) |
| 105 | 110 |
| 106 if options.hsp: | 111 if args.redundant: |
| 107 print('Throwing out redundant hits...') | 112 print('Throwing out redundant hits...') |
| 108 | 113 |
| 109 #RESULTS! | 114 #RESULTS! |
| 110 PIDENT_COL = 2 | 115 PIDENT_COL = 2 |
| 111 DESCR_COL = 25 | 116 DESCR_COL = 25 |
| 113 SCORE_COL = 11 | 118 SCORE_COL = 11 |
| 114 PCOV_COL = 24 | 119 PCOV_COL = 24 |
| 115 queries = [] | 120 queries = [] |
| 116 current_query = '' | 121 current_query = '' |
| 117 output_tab = open(output_tab, 'w') | 122 output_tab = open(output_tab, 'w') |
| 123 | |
| 118 with open(input_tab) as input_tab: | 124 with open(input_tab) as input_tab: |
| 119 for line in input_tab: | 125 for line in input_tab: |
| 120 cols = line.split('\t') | 126 cols = line.split('\t') |
| 121 if cols[0] != current_query: | 127 if cols[0] != current_query: |
| 122 current_query = cols[0] | 128 current_query = cols[0] |
| 123 queries.append(BLASTQuery(current_query)) | 129 queries.append(BLASTQuery(current_query)) |
| 124 | 130 |
| 125 try: | 131 try: |
| 126 accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] | 132 accs = cols[SUBJ_ID_COL].split('|')[1::2][1::2] |
| 127 except IndexError as e: | 133 except IndexError as e: |
| 128 stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) | 134 stop_err("Problem with splitting:" + cols[SUBJ_ID_COL]) |
| 129 | 135 |
| 130 #hsp option: keep best (first) hit only for each query and accession id. | 136 #hsp option: keep best (first) hit only for each query and accession id. |
| 131 if options.hsp: | 137 if args.redundant: |
| 132 if accs[0] in queries[-1].match_accessions: | 138 if accs[0] in queries[-1].match_accessions: |
| 133 continue #don't save the result and skip to the next | 139 continue #don't save the result and skip to the next |
| 134 else: | 140 else: |
| 135 queries[-1].match_accessions[accs[0]] = '' | 141 queries[-1].match_accessions[accs[0]] = '' |
| 136 | 142 |
| 154 queries[-1].kw_filtered_breakdown[kw] = 1 | 160 queries[-1].kw_filtered_breakdown[kw] = 1 |
| 155 if filter_by_kw: #if we are not filtering, for loop will not be entered and this will never be True | 161 if filter_by_kw: #if we are not filtering, for loop will not be entered and this will never be True |
| 156 queries[-1].kw_filtered += 1 | 162 queries[-1].kw_filtered += 1 |
| 157 continue | 163 continue |
| 158 descr = descrs.split(';')[0] | 164 descr = descrs.split(';')[0] |
| 159 | 165 |
| 160 #ATTEMPT BIN | 166 #ATTEMPT BIN |
| 161 subj_bins = [] | 167 subj_bins = [] |
| 162 for bin in bins: #if we are not binning, bins = [] so for loop not entered | 168 for bin in bins: #if we are not binning, bins = [] so for loop not entered |
| 163 for acc in accs: | 169 for acc in accs: |
| 164 if acc.split('.')[0] in bin.dict: | 170 if acc.split('.')[0] in bin.dict: |
| 172 | 178 |
| 173 score = int(float(cols[SCORE_COL])) | 179 score = int(float(cols[SCORE_COL])) |
| 174 p_cov = float(cols[PCOV_COL]) | 180 p_cov = float(cols[PCOV_COL]) |
| 175 | 181 |
| 176 #SAVE RESULT | 182 #SAVE RESULT |
| 177 queries[-1].matches.append(BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins)) | 183 queries[-1].matches.append( |
| 184 BLASTMatch(acc, descr, score, p_cov, p_ident, subj_bins) | |
| 185 ) | |
| 178 output_tab.write(line) | 186 output_tab.write(line) |
| 179 input_tab.close() | 187 input_tab.close() |
| 180 output_tab.close() | 188 output_tab.close() |
| 181 | 189 |
| 182 ''' | 190 ''' |
| 188 print(' bin: %s' % bin) | 196 print(' bin: %s' % bin) |
| 189 for x in query.bins[bin]: | 197 for x in query.bins[bin]: |
| 190 print(' %s' % str(query.matches[x])) | 198 print(' %s' % str(query.matches[x])) |
| 191 ''' | 199 ''' |
| 192 | 200 |
| 193 from Cheetah.Template import Template | |
| 194 namespace = {'queries': queries} | 201 namespace = {'queries': queries} |
| 195 html = Template(file=cheetah_tmpl, searchList=[namespace]) | 202 html = Template(file=cheetah_tmpl, searchList=[namespace]) |
| 196 out_html = open(output_html, 'w') | 203 out_html = open(output_html, 'w') |
| 197 out_html.write(str(html)) | 204 out_html.write(str(html)) |
| 198 out_html.close() | 205 out_html.close() |
| 206 | |
| 207 | |
| 208 if __name__ == '__main__': | |
| 209 main() |
