Mercurial > repos > dfornika > micall_lite
changeset 29:4ff24c044fed draft default tip
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/micall-lite commit 9d563c366233a5de79429ac1fa8f994f5d8f785d-dirty"
author | dfornika |
---|---|
date | Thu, 27 Feb 2020 22:31:32 +0000 |
parents | c0cdaa42b541 |
children | |
files | amino2consensus.py macros.xml resistance.py tool_data_table_conf.xml.test |
diffstat | 4 files changed, 140 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/amino2consensus.py Thu Feb 27 22:31:32 2020 +0000 @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import argparse +import csv + + +AMINO_ACIDS = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*'] + + +def determine_amino(amino_counts, threshold): + amino = "" + total_count = sum(amino_counts.values()) + amino_with_max_counts = sorted(amino_counts, key=amino_counts.get, reverse=True)[0] + if total_count == 0: + amino = "#" + elif (amino_counts[amino_with_max_counts] / float(total_count)) > threshold: + amino = amino_with_max_counts + else: + amino = "@" + return amino + + +def determine_first_region(amino_file): + with open(amino_file) as f: + reader = csv.DictReader(f) + row = next(reader) + region = row['region'] + return region + + +def main(args): + current_region = determine_first_region(args.amino) + seq = [] + with open(args.amino) as f: + reader = csv.DictReader(f) + for row in reader: + if row['region'] == current_region: + amino_counts = {} + for amino_acid in AMINO_ACIDS: + amino_counts[amino_acid] = int(row[amino_acid]) + amino = determine_amino(amino_counts, args.threshold) + seq.append(amino) + else: + print(">" + current_region) + print(''.join(seq)) + current_region = row['region'] + seq = [] + amino_counts = {} + for amino_acid in AMINO_ACIDS: + amino_counts[amino_acid] = int(row[amino_acid]) + amino = determine_amino(amino_counts, args.threshold) + seq.append(amino) + print(">" + current_region) + print(''.join(seq)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("amino", help="MiCall amino.csv output file") + parser.add_argument("--threshold", default=0.15, type=float, help="Threshold for calling") + args = parser.parse_args() + main(args)
--- a/macros.xml Thu Feb 27 18:37:19 2020 +0000 +++ b/macros.xml Thu Feb 27 22:31:32 2020 +0000 @@ -5,8 +5,6 @@ </token> <xml name="citations"> <citations> - <citation type="bibtex"> - </citation> </citations> </xml> </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resistance.py Thu Feb 27 22:31:32 2020 +0000 @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +from __future__ import print_function + + +import argparse +from pprint import pprint + + +import yaml + + +HCV_RULES_VERSION = "1.8" + + +def load_rules_config(rules_config, genotype, backup_genotype=None): + rules = { + 'drug_class': {}, + 'drugs': {} + } + rules['alg_name'] = 'HCV_RULES' + rules['alg_version'] = HCV_RULES_VERSION + rules['level_def'] = { + '-1': 'Resistance Interpretation Not Available', + '0': 'Sequence does not meet quality-control standards', + '1': 'Likely Susceptible', + '2': 'Not Indicated', + '3': 'Mutations Detected; Effect Unknown', + '4': 'Resistance Possible', + '5': 'Resistance Likely' + } + rules['global_range'] = [('-INF', '3', '1'), ('4', '7', '4'), ('8', 'INF', '5')] + for drug in rules_config: + drug_code = drug['code'] + drug_rules = [] + region = None + for genotype_config in drug['genotypes']: + region = genotype_config['region'] + rule_text = genotype_config['rules'] + if genotype_config['genotype'] == genotype: + rules['gene_def'][genotype_config['reference']] = [region] + break + elif genotype_config['genotype'] == backup_genotype: + rules['gene_def'].setdefault(genotype_config['reference'], [region]) + break + else: + rule_text = 'SCORE FROM ( TRUE => "Not available" )' + drug_rules.append((rule_text, [('scorerange', 'useglobalrange')])) + try: + rules['drug_class'][region].append(drug_code) + except KeyError: + rules['drug_class'][region] = [drug_code] + rules['drugs'][drug_code] = (drug['name'], drug_rules) + return rules + + +def main(args): + with open(args.rules) as f: + rules = load_rules_config(yaml.safe_load(f), None) + pprint(rules) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("consensus", help="Consensus fasta") + parser.add_argument("--rules", help="Rules file (yaml)") + args = parser.parse_args() + main(args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Feb 27 22:31:32 2020 +0000 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of MiCall-Lite projects files in the required format --> + <table name="micall_lite_projects_files" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/micall_lite_projects_files.loc" /> + </table> +</tables>