Mercurial > repos > dfornika > micall_lite
annotate amino2consensus.py @ 20:b1e1f49ebe14 draft
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
author | dfornika |
---|---|
date | Wed, 25 Sep 2019 18:36:16 -0400 |
parents | 8e6d3de9c7fa |
children | d099ef46fd42 |
rev | line source |
---|---|
15
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
2 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
3 from __future__ import print_function |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
4 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
5 import csv |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
6 import argparse |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
7 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
8 AMINO_ACIDS = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y','*'] |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
9 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
10 def determine_amino(amino_counts, threshold): |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
11 amino = "" |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
12 total_count = sum(amino_counts.values()) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
13 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
14 if total_count == 0: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
15 amino = "#" |
20
b1e1f49ebe14
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
19
diff
changeset
|
16 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: |
15
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
17 amino = amino_with_max_counts |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
18 else: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
19 amino = "@" |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
20 return amino |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
21 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
22 def determine_first_region(amino_file): |
18
43a987c03ec5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
15
diff
changeset
|
23 with open(amino_file) as f: |
15
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
24 reader = csv.DictReader(f) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
25 row = next(reader) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
26 region = row['region'] |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
27 return region |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
28 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
29 def main(args): |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
30 current_region = determine_first_region(args.amino) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
31 seq = [] |
19
8e6d3de9c7fa
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
18
diff
changeset
|
32 with open(args.amino) as f: |
15
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
33 reader = csv.DictReader(f) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
34 for row in reader: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
35 if row['region'] == current_region: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
36 amino_counts = {} |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
37 for amino_acid in AMINO_ACIDS: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
38 amino_counts[amino_acid] = int(row[amino_acid]) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
39 amino = determine_amino(amino_counts, args.threshold) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
40 seq.append(amino) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
41 else: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
42 print(">" + current_region) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
43 print(''.join(seq)) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
44 current_region = row['region'] |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
45 seq = [] |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
46 amino_counts = {} |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
47 for amino_acid in AMINO_ACIDS: |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
48 amino_counts[amino_acid] = int(row[amino_acid]) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
49 amino = determine_amino(amino_counts, args.threshold) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
50 seq.append(amino) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
51 print(">" + current_region) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
52 print(''.join(seq)) |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
53 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
54 |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
55 if __name__ == '__main__': |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
56 parser = argparse.ArgumentParser() |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
57 parser.add_argument("amino", help="MiCall amino.csv output file") |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
58 parser.add_argument("--threshold", default=0.15, help="Threshold for calling") |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
59 args = parser.parse_args() |
61667ff2c8b5
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/tools/micall-lite commit 822e7e1c2de31a72c2a13bcc15b9df06b699561f-dirty
dfornika
parents:
diff
changeset
|
60 main(args) |