2
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import argparse
|
|
4 import csv
|
|
5 import json
|
|
6
|
|
7
|
|
8 def main(args):
|
|
9
|
|
10 with open(args.input, 'r') as f:
|
|
11 report = json.load(f)
|
|
12
|
|
13 qc_fieldnames = [
|
|
14 'pct_reads_mapped',
|
|
15 'num_reads_mapped',
|
|
16 'median_coverage',
|
|
17 ]
|
|
18
|
|
19 with open(args.qc, 'w') as f:
|
|
20 writer = csv.DictWriter(f, fieldnames=qc_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
|
|
21 writer.writeheader()
|
|
22 output = {k: report['qc'][k] for k in qc_fieldnames}
|
|
23 writer.writerow(output)
|
|
24
|
|
25 gene_coverage_fieldnames = [
|
|
26 'locus_tag',
|
|
27 'gene',
|
|
28 'fraction',
|
|
29 'cutoff',
|
|
30 ]
|
|
31
|
|
32 with open(args.gene_coverage, 'w') as f:
|
|
33 writer = csv.DictWriter(f, fieldnames=gene_coverage_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
|
|
34 writer.writeheader()
|
|
35 for row in report['qc']['gene_coverage']:
|
|
36 writer.writerow(row)
|
|
37
|
|
38 missing_positions_fieldnames = [
|
|
39 'locus_tag',
|
|
40 'gene',
|
|
41 'position',
|
|
42 'variants',
|
|
43 'drugs'
|
|
44 ]
|
|
45
|
|
46 with open(args.missing_positions, 'w') as f:
|
|
47 writer = csv.DictWriter(f, fieldnames=missing_positions_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
|
|
48 writer.writeheader()
|
|
49 for row in report['qc']['missing_positions']:
|
|
50 writer.writerow(row)
|
|
51
|
|
52 resistance_variants_fieldnames = [
|
|
53 'chrom',
|
|
54 'genome_pos',
|
|
55 'locus_tag',
|
|
56 'feature_id',
|
|
57 'gene',
|
|
58 'type',
|
|
59 'ref',
|
|
60 'alt',
|
|
61 'freq',
|
|
62 'nucleotide_change',
|
|
63 'protein_change',
|
|
64 'change',
|
|
65 'drugs',
|
|
66 ]
|
|
67
|
|
68 with open(args.resistance_variants, 'w') as f:
|
|
69 writer = csv.DictWriter(f, fieldnames=resistance_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
|
|
70 writer.writeheader()
|
|
71 for row in report['dr_variants']:
|
|
72 row['drugs'] = ', '.join([drug['drug'] + ':' + drug['confers'] for drug in row['drugs']])
|
|
73 output = {k: row[k] for k in resistance_variants_fieldnames}
|
|
74 writer.writerow(output)
|
|
75
|
|
76 other_variants_fieldnames = [
|
|
77 'chrom',
|
|
78 'genome_pos',
|
|
79 'locus_tag',
|
|
80 'feature_id',
|
|
81 'gene',
|
|
82 'type',
|
|
83 'ref',
|
|
84 'alt',
|
|
85 'freq',
|
|
86 'nucleotide_change',
|
|
87 'protein_change',
|
|
88 'change',
|
|
89 'gene_associated_drugs',
|
|
90 ]
|
|
91
|
|
92 with open(args.other_variants, 'w') as f:
|
|
93 writer = csv.DictWriter(f, fieldnames=other_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
|
|
94 writer.writeheader()
|
|
95 for row in report['other_variants']:
|
|
96 row['gene_associated_drugs'] = ', '.join(row['gene_associated_drugs'])
|
|
97 output = {k: row[k] for k in other_variants_fieldnames}
|
|
98 writer.writerow(output)
|
|
99
|
|
100 analysis_metadata_fieldnames = [
|
|
101 'timestamp',
|
|
102 'tbprofiler_version',
|
|
103 'mapping_program',
|
|
104 'variant_calling_program',
|
|
105 'db_name',
|
|
106 'db_commit',
|
|
107 'db_date',
|
|
108 ]
|
|
109
|
|
110 with open(args.analysis_metadata, 'w') as f:
|
|
111 writer = csv.DictWriter(f, fieldnames=analysis_metadata_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
|
|
112 writer.writeheader()
|
|
113 output = {}
|
|
114 output['timestamp'] = report['timestamp']
|
|
115 output['tbprofiler_version'] = report['tbprofiler_version']
|
|
116 output['db_name'] = report['db_version']['name']
|
|
117 output['db_commit'] = report['db_version']['commit']
|
|
118 output['db_date'] = report['db_version']['Date']
|
|
119 for pipeline_entry in report['pipeline']:
|
|
120 if pipeline_entry['Analysis'] == "Mapping":
|
|
121 output['mapping_program'] = pipeline_entry['Program']
|
|
122 elif pipeline_entry['Analysis'] == "Variant calling":
|
|
123 output['variant_calling_program'] = pipeline_entry['Program']
|
|
124
|
|
125 writer.writerow(output)
|
|
126
|
|
127 if __name__ == '__main__':
|
|
128 parser = argparse.ArgumentParser()
|
|
129 parser.add_argument('input')
|
|
130 parser.add_argument('--qc')
|
|
131 parser.add_argument('--gene-coverage')
|
|
132 parser.add_argument('--missing-positions')
|
|
133 parser.add_argument('--resistance-variants')
|
|
134 parser.add_argument('--other-variants')
|
|
135 parser.add_argument('--analysis-metadata')
|
|
136 args = parser.parse_args()
|
|
137 main(args)
|