Mercurial > repos > dfornika > micall_lite
annotate amino2consensus.py @ 29:4ff24c044fed draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
author | dfornika |
---|---|
date | Thu, 27 Feb 2020 22:31:32 +0000 |
parents | |
children |
rev | line source |
---|---|
29
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
2 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
3 from __future__ import print_function |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
4 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
5 import argparse |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
6 import csv |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
7 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
8 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
9 AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
10 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
11 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
12 def determine_amino(amino_counts, threshold): |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
13 amino = "" |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
14 total_count = sum(amino_counts.values()) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
15 amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
16 if total_count == 0: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
17 amino = "#" |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
18 elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
19 amino = amino_with_max_counts |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
20 else: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
21 amino = "@" |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
22 return amino |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
23 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
24 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
25 def determine_first_region(amino_file): |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
26 with open(amino_file) as f: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
27 reader = csv.DictReader(f) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
28 row = next(reader) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
29 region = row['region'] |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
30 return region |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
31 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
32 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
33 def main(args): |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
34 current_region = determine_first_region(args.amino) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
35 seq = [] |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
36 with open(args.amino) as f: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
37 reader = csv.DictReader(f) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
38 for row in reader: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
39 if row['region'] == current_region: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
40 amino_counts = {} |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
41 for amino_acid in AMINO_ACIDS: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
42 amino_counts[amino_acid] = int(row[amino_acid]) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
43 amino = determine_amino(amino_counts, args.threshold) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
44 seq.append(amino) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
45 else: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
46 print(">" + current_region) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
47 print(''.join(seq)) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
48 current_region = row['region'] |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
49 seq = [] |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
50 amino_counts = {} |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
51 for amino_acid in AMINO_ACIDS: |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
52 amino_counts[amino_acid] = int(row[amino_acid]) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
53 amino = determine_amino(amino_counts, args.threshold) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
54 seq.append(amino) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
55 print(">" + current_region) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
56 print(''.join(seq)) |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
57 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
58 |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
59 if __name__ == '__main__': |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
60 parser = argparse.ArgumentParser() |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
61 parser.add_argument("amino", help="MiCall amino.csv output file") |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
62 parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
63 args = parser.parse_args() |
4ff24c044fed
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
dfornika
parents:
diff
changeset
|
64 main(args) |