changeset 29:4ff24c044fed draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
author dfornika
date Thu, 27 Feb 2020 22:31:32 +0000
parents c0cdaa42b541
children
files amino2consensus.py macros.xml resistance.py tool_data_table_conf.xml.test
diffstat 4 files changed, 140 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/amino2consensus.py	Thu Feb 27 22:31:32 2020 +0000
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import csv
+
+
+AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*']
+
+
+def determine_amino(amino_counts, threshold):
+    amino = ""
+    total_count = sum(amino_counts.values())
+    amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0]
+    if total_count == 0:
+        amino = "#"
+    elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold:
+        amino = amino_with_max_counts
+    else:
+        amino = "@"
+    return amino
+
+
+def determine_first_region(amino_file):
+    with open(amino_file) as f:
+        reader = csv.DictReader(f)
+        row = next(reader)
+        region = row['region']
+    return region
+
+
+def main(args):
+    current_region = determine_first_region(args.amino)
+    seq = []
+    with open(args.amino) as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            if row['region'] == current_region:
+                amino_counts = {}
+                for amino_acid in AMINO_ACIDS:
+                    amino_counts[amino_acid] = int(row[amino_acid])
+                amino = determine_amino(amino_counts, args.threshold)
+                seq.append(amino)
+            else:
+                print(">" + current_region)
+                print(''.join(seq))
+                current_region = row['region']
+                seq = []
+                amino_counts = {}
+                for amino_acid in AMINO_ACIDS:
+                    amino_counts[amino_acid] = int(row[amino_acid])
+                amino = determine_amino(amino_counts, args.threshold)
+                seq.append(amino)
+        print(">" + current_region)
+        print(''.join(seq))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("amino", help="MiCall amino.csv output file")
+    parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling")
+    args = parser.parse_args()
+    main(args)
--- a/macros.xml	Thu Feb 27 18:37:19 2020 +0000
+++ b/macros.xml	Thu Feb 27 22:31:32 2020 +0000
@@ -5,8 +5,6 @@
     </token>
     <xml name="citations">
         <citations>
-            <citation type="bibtex">
-            </citation>
         </citations>
     </xml>
 </macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/resistance.py	Thu Feb 27 22:31:32 2020 +0000
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+
+import argparse
+from pprint import pprint
+
+
+import yaml
+
+
+HCV_RULES_VERSION = "1.8"
+
+
+def load_rules_config(rules_config, genotype, backup_genotype=None):
+    rules = {
+        'drug_class': {},
+        'drugs': {}
+    }
+    rules['alg_name'] = 'HCV_RULES'
+    rules['alg_version'] = HCV_RULES_VERSION
+    rules['level_def'] = {
+        '-1': 'Resistance Interpretation Not Available',
+        '0': 'Sequence does not meet quality-control standards',
+        '1': 'Likely Susceptible',
+        '2': 'Not Indicated',
+        '3': 'Mutations Detected; Effect Unknown',
+        '4': 'Resistance Possible',
+        '5': 'Resistance Likely'
+    }
+    rules['global_range'] = [('-INF', '3', '1'), ('4', '7', '4'), ('8', 'INF', '5')]
+    for drug in rules_config:
+        drug_code = drug['code']
+        drug_rules = []
+        region = None
+        for genotype_config in drug['genotypes']:
+            region = genotype_config['region']
+            rule_text = genotype_config['rules']
+            if genotype_config['genotype'] == genotype:
+                rules['gene_def'][genotype_config['reference']] = [region]
+                break
+            elif genotype_config['genotype'] == backup_genotype:
+                rules['gene_def'].setdefault(genotype_config['reference'], [region])
+                break
+            else:
+                rule_text = 'SCORE FROM ( TRUE => "Not available" )'
+        drug_rules.append((rule_text, [('scorerange', 'useglobalrange')]))
+        try:
+            rules['drug_class'][region].append(drug_code)
+        except KeyError:
+            rules['drug_class'][region] = [drug_code]
+        rules['drugs'][drug_code] = (drug['name'], drug_rules)
+    return rules
+
+
+def main(args):
+    with open(args.rules) as f:
+        rules = load_rules_config(yaml.safe_load(f), None)
+    pprint(rules)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("consensus", help="Consensus fasta")
+    parser.add_argument("--rules", help="Rules file (yaml)")
+    args = parser.parse_args()
+    main(args)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Thu Feb 27 22:31:32 2020 +0000
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of MiCall-Lite projects files in the required format -->
+    <table name="micall_lite_projects_files" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/micall_lite_projects_files.loc" />
+    </table>
+</tables>