Mercurial > repos > dfornika > blast_report_basic
comparison blast_report.py @ 7:445a1923bb97 draft
Uploaded
| author | dfornika |
|---|---|
| date | Tue, 03 Mar 2020 05:34:11 +0000 |
| parents | c7ce2cd96546 |
| children | c4e67d856c19 |
comparison
equal
deleted
inserted
replaced
| 6:b8a3578b6445 | 7:445a1923bb97 |
|---|---|
| 69 str(self.score), | 69 str(self.score), |
| 70 str(round(self.p_cov,2)), | 70 str(round(self.p_cov,2)), |
| 71 str(round(self.p_ident, 2))) | 71 str(round(self.p_ident, 2))) |
| 72 | 72 |
| 73 | 73 |
| 74 | |
| 75 #PARSE OPTIONS AND ARGUMENTS | 74 #PARSE OPTIONS AND ARGUMENTS |
| 76 parser = argparse.ArgumentParser() | 75 parser = argparse.ArgumentParser() |
| 77 | 76 |
| 78 parser.add_argument('-f', '--filter', | 77 parser.add_argument('-f', '--filter-keywords', |
| 79 dest='filter', | 78 dest='filter_keywords', |
| 79 ) | |
| 80 parser.add_argument('-i', '--min-identity', | |
| 81 dest='min_identity', | |
| 80 ) | 82 ) |
| 81 parser.add_argument('-b', '--bins', | 83 parser.add_argument('-b', '--bins', |
| 82 dest='bins' | 84 dest='bins' |
| 83 ) | 85 ) |
| 84 parser.add_argument('-r', '--discard-redundant', | 86 parser.add_argument('-r', '--discard-redundant', |
| 85 dest='discard_redundant', | 87 dest='discard_redundant', |
| 86 default=False, | 88 default=False, |
| 87 action='store_true' | 89 action='store_true' |
| 88 ) | 90 ) |
| 91 parser.add_argument('input_tab') | |
| 92 parser.add_argument('cheetah_tmpl') | |
| 93 parser.add_argument('output_html') | |
| 94 parser.add_argument('output_tab') | |
| 89 args = parser.parse_args() | 95 args = parser.parse_args() |
| 90 | 96 |
| 91 try: | 97 try: |
| 92 input_tab, cheetah_tmpl, output_html, output_tab = args | 98 input_tab, cheetah_tmpl, output_html, output_tab = args |
| 93 except: | 99 except: |
| 94 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') | 100 stop_err('you must supply the arguments input_tab, cheetah_tmpl and output_html.') |
| 95 # print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (input_tab, cheetah_tmpl, output_html, output_tab)) | 101 print('input_tab: %s cheetah_tmpl: %s output_html: %s output_tab: %s' % (args.input_tab, args.cheetah_tmpl, args.output_html, args.output_tab)) |
| 96 | 102 |
| 97 | 103 |
| 98 #BINS | 104 #BINS |
| 99 bins=[] | 105 bins=[] |
| 100 if args.bins != None: | 106 if args.bins != None: |
| 101 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')]) | 107 bins = list([BLASTBin(label_file.split('=')[0],label_file.split('=')[-1]) for label_file in args.bins.split(',')]) |
| 102 print('database bins: %s' % str([bin.label for bin in bins])) | 108 print('database bins: %s' % str([bin.label for bin in bins])) |
| 103 | 109 |
| 104 #FILTERS | 110 #FILTERS |
| 105 filter_pident = 0 | 111 filter_pident = 0 |
| 106 filter_kws = [] | 112 filter_kws = [] |
| 107 if args.filter != None: | 113 if args.keyword_filter: |
| 108 pident_kws = args.filter.split(':') | 114 filter_kws = args.keyword_filter.split(',') |
| 109 filter_pident = float(pident_kws[0]) | 115 print('minimum percent identity: %s filter_kws: %s' % (str(args.min_identity), str(filter_kws))) |
| 110 filter_kws = pident_kws[-1].split(',') | |
| 111 print('filter_pident: %s filter_kws: %s' % (str(filter_pident), str(filter_kws))) | |
| 112 | 116 |
| 113 if args.discard_redundant: | 117 if args.discard_redundant: |
| 114 print('Throwing out redundant hits...') | 118 print('Throwing out redundant hits...') |
| 115 | 119 |
| 116 #RESULTS! | 120 |
| 117 PIDENT_COL = 2 | 121 PIDENT_COL = 2 |
| 118 DESCR_COL = 25 | 122 DESCR_COL = 25 |
| 119 SUBJ_ID_COL = 12 | 123 SUBJ_ID_COL = 12 |
| 120 SCORE_COL = 11 | 124 SCORE_COL = 11 |
| 121 PCOV_COL = 24 | 125 PCOV_COL = 24 |
| 122 queries = [] | 126 queries = [] |
| 123 current_query = '' | 127 current_query = '' |
| 124 output_tab = open(output_tab, 'w') | 128 output_tab = open(args.output_tab, 'w') |
| 125 | 129 |
| 126 with open(input_tab) as input_tab: | 130 with open(args.input_tab) as input_tab: |
| 127 for line in input_tab: | 131 for line in input_tab: |
| 128 cols = line.split('\t') | 132 cols = line.split('\t') |
| 129 if cols[0] != current_query: | 133 if cols[0] != current_query: |
| 130 current_query = cols[0] | 134 current_query = cols[0] |
| 131 queries.append(BLASTQuery(current_query)) | 135 queries.append(BLASTQuery(current_query)) |
| 151 | 155 |
| 152 descrs = cols[DESCR_COL] | 156 descrs = cols[DESCR_COL] |
| 153 #FILTER BY KEY WORDS | 157 #FILTER BY KEY WORDS |
| 154 filter_by_kw = False | 158 filter_by_kw = False |
| 155 for kw in filter_kws: | 159 for kw in filter_kws: |
| 156 kw = kw.strip() #Fix by Damion D Nov 2013 | 160 kw = kw.strip() |
| 157 if kw != '' and re.search(kw, descrs, re.IGNORECASE): | 161 if kw != '' and re.search(kw, descrs, re.IGNORECASE): |
| 158 filter_by_kw = True | 162 filter_by_kw = True |
| 159 try: | 163 try: |
| 160 queries[-1].kw_filtered_breakdown[kw] += 1 | 164 queries[-1].kw_filtered_breakdown[kw] += 1 |
| 161 except: | 165 except: |
| 199 for x in query.bins[bin]: | 203 for x in query.bins[bin]: |
| 200 print(' %s' % str(query.matches[x])) | 204 print(' %s' % str(query.matches[x])) |
| 201 ''' | 205 ''' |
| 202 | 206 |
| 203 namespace = {'queries': queries} | 207 namespace = {'queries': queries} |
| 204 html = Template(file=cheetah_tmpl, searchList=[namespace]) | 208 html = Template(file=args.cheetah_tmpl, searchList=[namespace]) |
| 205 out_html = open(output_html, 'w') | 209 out_html = open(args.output_html, 'w') |
| 206 out_html.write(str(html)) | 210 out_html.write(str(html)) |
| 207 out_html.close() | 211 out_html.close() |
| 208 | 212 |
| 209 | 213 |
| 210 if __name__ == '__main__': | 214 if __name__ == '__main__': |
