Mercurial > repos > dfornika > micall_lite
annotate amino2consensus.py @ 18:43a987c03ec5 draft
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
| author | dfornika | 
|---|---|
| date | Wed, 25 Sep 2019 18:28:54 -0400 | 
| parents | 61667ff2c8b5 | 
| children | 8e6d3de9c7fa | 
| rev | line source | 
|---|---|
| 
15
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
2 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
3 from __future__ import print_function | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
4 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
5 import csv | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
6 import argparse | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
7 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
8 AMINO_ACIDS = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','*'] | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
9 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
10 def determine_amino(amino_counts, threshold): | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
11 amino = "" | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
12 total_count = sum(amino_counts.values()) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
13 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
14 if total_count == 0: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
15 amino = "#" | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
16 elif (amino_counts[amino_with_max_counts] / total_count) > threshold: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
17 amino = amino_with_max_counts | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
18 else: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
19 amino = "@" | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
20 return amino | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
21 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
22 def determine_first_region(amino_file): | 
| 
18
 
43a987c03ec5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents: 
15 
diff
changeset
 | 
23 with open(amino_file) as f: | 
| 
15
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
24 reader = csv.DictReader(f) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
25 row = next(reader) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
26 region = row['region'] | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
27 return region | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
28 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
29 def main(args): | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
30 current_region = determine_first_region(args.amino) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
31 seq = [] | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
32 with open(args.amino, newline='') as f: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
33 reader = csv.DictReader(f) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
34 for row in reader: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
35 if row['region'] == current_region: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
36 amino_counts = {} | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
37 for amino_acid in AMINO_ACIDS: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
38 amino_counts[amino_acid] = int(row[amino_acid]) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
39 amino = determine_amino(amino_counts, args.threshold) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
40 seq.append(amino) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
41 else: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
42 print(">" + current_region) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
43 print(''.join(seq)) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
44 current_region = row['region'] | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
45 seq = [] | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
46 amino_counts = {} | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
47 for amino_acid in AMINO_ACIDS: | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
48 amino_counts[amino_acid] = int(row[amino_acid]) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
49 amino = determine_amino(amino_counts, args.threshold) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
50 seq.append(amino) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
51 print(">" + current_region) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
52 print(''.join(seq)) | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
53 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
54 | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
55 if __name__ == '__main__': | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
56 parser = argparse.ArgumentParser() | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
57 parser.add_argument("amino", help="MiCall amino.csv output file") | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
58 parser.add_argument("--threshold", default=0.15, help="Threshold for calling") | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
59 args = parser.parse_args() | 
| 
 
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
 
dfornika 
parents:  
diff
changeset
 | 
60 main(args) | 
