Mercurial > repos > jowong > prince_galaxy
changeset 0:88748989a22a draft default tip
planemo upload
author | jowong |
---|---|
date | Tue, 13 Nov 2018 11:41:37 -0500 |
parents | |
children | |
files | Galaxy-Workflow-prince_workflow.ga data_path.py data_path.xml filler.py filler.xml prince.xml prince_postprocess.py prince_postprocess.xml |
diffstat | 8 files changed, 214 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy-Workflow-prince_workflow.ga Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,1 @@ +{"uuid": "9f360164-f520-437c-b0e5-3cf7c2b42424", "tags": [], "format-version": "0.1", "name": "prince workflow", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"collection_type\": \"list:paired\"}", "id": 0, "uuid": "f2412c19-6997-4c16-9286-39939c4edac6", "errors": null, "name": "Input dataset collection", "label": null, "inputs": [], "position": {"top": 368.5, "left": 200}, "annotation": "", "content_id": null, "type": "data_collection_input"}, "1": {"tool_id": "data_path", "tool_version": "1.0.0", "outputs": [{"type": "txt", "name": "output"}], "workflow_outputs": [], "input_connections": {"paths": {"output_name": "output", "id": 0}}, "tool_state": "{\"paths\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"__page__\": null}", "id": 1, "uuid": "d67bbfc0-5495-4e71-a0d1-f8856ac4b57e", "errors": null, "name": "Data Path", "post_job_actions": {"HideDatasetActionoutput": {"output_name": "output", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [{"name": "paths", "description": "runtime parameter for tool Data Path"}], "position": {"top": 433.5, "left": 492.5}, "annotation": "", "content_id": "data_path", "type": "tool"}, "2": {"tool_id": "filler", "tool_version": "1.0.0", "outputs": [{"type": "data", "name": "output"}], "workflow_outputs": [], "input_connections": {"data_input|input1": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"data_input\": \"{\\\"data_selector\\\": \\\"paired\\\", \\\"input1\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hdca\\\", \\\"id\\\": 22}]}, \\\"__current_case__\\\": 0}\"}", "id": 2, "uuid": "fcba5578-0995-4ad2-930f-1978b45d1ff9", "errors": null, "name": "Filler", "post_job_actions": {"HideDatasetActionoutput": {"output_name": "output", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [], "position": {"top": 224.5, "left": 725}, "annotation": "", "content_id": "filler", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/jowong/prince_galaxy/prince/0.1.1", "tool_version": "0.1.1", "outputs": [{"type": "txt", "name": "output1"}], "workflow_outputs": [], "input_connections": {"input1": {"output_name": "output", "id": 1}}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 3, "tool_shed_repository": {"owner": "jowong", "changeset_revision": "dde3036465de", "name": "prince_galaxy", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "82f54092-9803-487e-9bc9-8d77a621ecfe", "errors": null, "name": "VNTR copy number approximation", "post_job_actions": {"HideDatasetActionoutput1": {"output_name": "output1", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [{"name": "input1", "description": "runtime parameter for tool VNTR copy number approximation"}], "position": {"top": 370.5, "left": 725}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/jowong/prince_galaxy/prince/0.1.1", "type": "tool"}, "4": {"tool_id": "prince_postprocess", "tool_version": "1.0.0", "outputs": [{"type": "txt", "name": "output"}], "workflow_outputs": [{"output_name": "output", "uuid": "b978568d-5780-46fc-9b94-7dd52decc0ed", "label": null}], "input_connections": {"paths": {"output_name": "output", "id": 2}, "prince_output": {"output_name": "output1", "id": 3}}, "tool_state": "{\"paths\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"prince_output\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__page__\": null}", "id": 4, "uuid": "ad79612c-1076-4fa0-a919-e5f119caa7b8", "errors": null, "name": "Prince Postprocess", "post_job_actions": {}, "label": null, "inputs": [{"name": "paths", "description": "runtime parameter for tool Prince Postprocess"}, {"name": "prince_output", "description": "runtime parameter for tool Prince Postprocess"}], "position": {"top": 374.5, "left": 1039.5}, "annotation": "", "content_id": "prince_postprocess", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_path.py Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +import sys +import argparse as ap + +parser = ap.ArgumentParser(prog='data_path', conflict_handler='resolve', + description="Output the galaxy file path of datasets in a text file") + +input = parser.add_argument_group('Input', '') +input.add_argument('-i', '--input', nargs='+', required=True, help="Paths to data1") +input.add_argument('-j', '--input2', nargs='*', required=True, help="Paths to data2") + +if len(sys.argv) == 0: + parser.print_usage() + sys.exit(1) + +args = parser.parse_args() +output = open('paths.txt', 'w') +if len(args.input2) == 0: + for index,path in enumerate(args.input): + output.write("%s\n" % (path)) +else: + for index,path in enumerate(args.input): + output.write("%s\t%s\n" % (path, args.input2[index]))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_path.xml Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,17 @@ +<tool id="data_path" name="Data Path" version="1.0.0"> + <description>creates a txt file of the paths of items in a data collection</description> + <command interpreter="python"><![CDATA[ + data_path.py -i #for $path in $paths# $path.forward #end for# -j #for $path in $paths# $path.reverse #end for# + ]]></command> + <inputs> + <param name="paths" type="data_collection" format="data" label="Collection of files" help="" optional="False" multiple="True"/> + </inputs> + <outputs> + <data name="output" label="Paths of ${on_string}" format="txt" from_work_dir="paths.txt"/> + </outputs> + <help> +This tool outputs the paths of the files in a data collection + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filler.py Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import sys +import argparse as ap + +parser = ap.ArgumentParser(prog='filler', conflict_handler='resolve', + description="produce filler collection to deal with galaxy handling") + +input = parser.add_argument_group('Input', '') +input.add_argument('-i', '--input', nargs=1, required=True, help="Paths to (forward) reads") +input.add_argument('-o', '--output', nargs=1, required=True, help="output") + +if len(sys.argv) == 0: + parser.print_usage() + sys.exit(1) + +args = parser.parse_args() +output = open(args.output[0], 'w') +output.write(args.input[0]) +output.write("\n")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filler.xml Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,36 @@ +<tool id="filler" name="Filler" version="1.0.1"> + <description>Filler Function to deal with paired collection</description> + <command interpreter="python"><![CDATA[ + filler.py + #if str( $data_input.data_selector ) == "paired" + -i $data_input.input1.forward + #end if + #if str( $data_input.data_selector ) == "single" + -i "$data_input.input2" + #end if + + -o $output + ]]></command> + <inputs> + <conditional name="data_input"> + <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset"> + <option value="paired">Paired</option> + <option value="single">Single</option> + </param> + <when value="paired"> + <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/> + </when> + <when value="single"> + <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" type="data_collection"/> + </outputs> + <help> +This tool processes the PRINCE galaxy output such that it is in line with the command line + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prince.xml Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,48 @@ +<tool id="prince" name="VNTR copy number approximation" version="1.1.0"> + <description>Calculates CNV of genomic data based on template</description> + <requirements> + <requirement type="package" version="1.2">prince</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + prince -tf "$input1" -to "$output1" + ]]></command> + <inputs> + <param type="data" name="input1" format="txt" /> + </inputs> + <outputs> + <data name="output1" format="txt" /> + </outputs> + <tests> + <test> + <param name="input1" value="prince_input.txt"/> + <output name="output1" file="prince_output.txt"/> + </test> + </tests> + <help><![CDATA[ + usage: prince [-h] [-bo BOOST_OUTPUT] [-to TARGET_OUTPUT] [-tmp TEMPLATES] [-tf TARGET_FILE] [-bf BOOSTING_FILE] [-k K] [-cn COPYNUMBER] + + Prince Options. + + optional arguments: + -h, --help Show this help message and exit + -bo, --boost_output Output file for training data / training data used to predict copy numbers for queries + -to, --target_output Output file for query copy number predictions + -tmp, --templates VNTR templates. Default is for M.TB + -tf, --target_file Target genome names in a text file + -bf, --boosting_file Training genome file names in a text file + -k, --k Kmer size used during read recruitment + -cn, --copynumber Copy number used for training genome + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubPythonPRINCE, + author = {Booth, Julian}, + year = {2018}, + title = {PythonPRINCE}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/WGS-TB/PythonPRINCE}, +}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prince_postprocess.py Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import sys +import argparse as ap +import re +parser = ap.ArgumentParser(prog='prince_postprocess', conflict_handler='resolve', + description="Postprocess galaxy PRINCE output") + +input = parser.add_argument_group('Input', '') +input.add_argument('-i', '--input', nargs=1, required=True, help="PRINCE OUTPUT") +input.add_argument('-s', '--sample', nargs='+', required=True, help="Sample names") +input.add_argument('-f', '--file', nargs='+', required=True, help="File of forward reads in galaxy convention") +if len(sys.argv) == 0: + parser.print_usage() + sys.exit(1) + +args = parser.parse_args() + +#print(args.input) +#sample_name = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', args.label.rstrip().lstrip()) +sample_dict={} +path_index = 0 +for path in args.file: + with open(path) as f: + for line in f: + sample_dict[re.sub(".*/","",line)] = sample[path_index] + path_index += 1 + + + + + + + +with open(args.input[0]) as prince_output: + with open('prince_postprocess_output.txt', 'w') as output: + x = 1 + index = 0 + for line in prince_output: + if x%2 == 0: + entries =line.rstrip().split(',') + sample = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', sample_dict[entries[0]]) + output.write(re.sub(entries[0], sample, line)) + index += 1 + else: + output.write(line) + x += 1 + #output.write("\n") + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prince_postprocess.xml Tue Nov 13 11:41:37 2018 -0500 @@ -0,0 +1,18 @@ +<tool id="prince_postprocess" name="Prince Postprocess" version="1.0.0"> + <description>Postprocess PRINCE galaxy output</description> + <command interpreter="python"><![CDATA[ + prince_postprocess.py -i $prince_output -s #for $path in $paths# $path.element_identifier #end for# -f #for $path in $paths# $path #end for# + ]]></command> + <inputs> + <param name="paths" type="data" format="data" label="Collection of files" help="" optional="False" multiple="True"/> + <param name="prince_output" format="txt" type="data" label="PRINCE output" /> + </inputs> + <outputs> + <data name="output" format="txt" from_work_dir="prince_postprocess_output.txt"/> + </outputs> + <help> +This tool processes the PRINCE galaxy output such that it is in line with the command line + </help> + <citations> + </citations> +</tool>