annotate amino2consensus.py @ 20:b1e1f49ebe14 draft

planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
author dfornika
date Wed, 25 Sep 2019 18:36:16 -0400
parents 8e6d3de9c7fa
children d099ef46fd42
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
2
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
3 from __future__ import print_function
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
4
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
5 import csv
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
6 import argparse
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
7
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
8 AMINO_ACIDS = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','*']
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
9
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
10 def determine_amino(amino_counts, threshold):
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
11 amino = ""
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
12 total_count = sum(amino_counts.values())
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
13 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0]
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
14 if total_count == 0:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
15 amino = "#"
20
b1e1f49ebe14 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents: 19
diff changeset
16 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold:
15
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
17 amino = amino_with_max_counts
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
18 else:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
19 amino = "@"
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
20 return amino
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
21
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
22 def determine_first_region(amino_file):
18
43a987c03ec5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents: 15
diff changeset
23 with open(amino_file) as f:
15
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
24 reader = csv.DictReader(f)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
25 row = next(reader)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
26 region = row['region']
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
27 return region
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
28
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
29 def main(args):
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
30 current_region = determine_first_region(args.amino)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
31 seq = []
19
8e6d3de9c7fa planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents: 18
diff changeset
32 with open(args.amino) as f:
15
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
33 reader = csv.DictReader(f)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
34 for row in reader:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
35 if row['region'] == current_region:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
36 amino_counts = {}
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
37 for amino_acid in AMINO_ACIDS:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
38 amino_counts[amino_acid] = int(row[amino_acid])
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
39 amino = determine_amino(amino_counts, args.threshold)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
40 seq.append(amino)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
41 else:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
42 print(">" + current_region)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
43 print(''.join(seq))
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
44 current_region = row['region']
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
45 seq = []
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
46 amino_counts = {}
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
47 for amino_acid in AMINO_ACIDS:
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
48 amino_counts[amino_acid] = int(row[amino_acid])
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
49 amino = determine_amino(amino_counts, args.threshold)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
50 seq.append(amino)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
51 print(">" + current_region)
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
52 print(''.join(seq))
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
53
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
54
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
55 if __name__ == '__main__':
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
56 parser = argparse.ArgumentParser()
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
57 parser.add_argument("amino", help="MiCall amino.csv output file")
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
58 parser.add_argument("--threshold", default=0.15, help="Threshold for calling")
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
59 args = parser.parse_args()
61667ff2c8b5 planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff changeset
60 main(args)