changeset 0:88748989a22a draft default tip

planemo upload
author jowong
date Tue, 13 Nov 2018 11:41:37 -0500
parents
children
files Galaxy-Workflow-prince_workflow.ga data_path.py data_path.xml filler.py filler.xml prince.xml prince_postprocess.py prince_postprocess.xml
diffstat 8 files changed, 214 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Galaxy-Workflow-prince_workflow.ga	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,1 @@
+{"uuid": "9f360164-f520-437c-b0e5-3cf7c2b42424", "tags": [], "format-version": "0.1", "name": "prince workflow", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"collection_type\": \"list:paired\"}", "id": 0, "uuid": "f2412c19-6997-4c16-9286-39939c4edac6", "errors": null, "name": "Input dataset collection", "label": null, "inputs": [], "position": {"top": 368.5, "left": 200}, "annotation": "", "content_id": null, "type": "data_collection_input"}, "1": {"tool_id": "data_path", "tool_version": "1.0.0", "outputs": [{"type": "txt", "name": "output"}], "workflow_outputs": [], "input_connections": {"paths": {"output_name": "output", "id": 0}}, "tool_state": "{\"paths\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"__page__\": null}", "id": 1, "uuid": "d67bbfc0-5495-4e71-a0d1-f8856ac4b57e", "errors": null, "name": "Data Path", "post_job_actions": {"HideDatasetActionoutput": {"output_name": "output", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [{"name": "paths", "description": "runtime parameter for tool Data Path"}], "position": {"top": 433.5, "left": 492.5}, "annotation": "", "content_id": "data_path", "type": "tool"}, "2": {"tool_id": "filler", "tool_version": "1.0.0", "outputs": [{"type": "data", "name": "output"}], "workflow_outputs": [], "input_connections": {"data_input|input1": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"data_input\": \"{\\\"data_selector\\\": \\\"paired\\\", \\\"input1\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hdca\\\", \\\"id\\\": 22}]}, \\\"__current_case__\\\": 0}\"}", "id": 2, "uuid": "fcba5578-0995-4ad2-930f-1978b45d1ff9", "errors": null, "name": "Filler", "post_job_actions": {"HideDatasetActionoutput": {"output_name": "output", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [], "position": {"top": 224.5, "left": 725}, "annotation": "", "content_id": "filler", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/jowong/prince_galaxy/prince/0.1.1", "tool_version": "0.1.1", "outputs": [{"type": "txt", "name": "output1"}], "workflow_outputs": [], "input_connections": {"input1": {"output_name": "output", "id": 1}}, "tool_state": "{\"__page__\": null, \"__rerun_remap_job_id__\": null, \"input1\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 3, "tool_shed_repository": {"owner": "jowong", "changeset_revision": "dde3036465de", "name": "prince_galaxy", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "82f54092-9803-487e-9bc9-8d77a621ecfe", "errors": null, "name": "VNTR copy number approximation", "post_job_actions": {"HideDatasetActionoutput1": {"output_name": "output1", "action_type": "HideDatasetAction", "action_arguments": {}}}, "label": null, "inputs": [{"name": "input1", "description": "runtime parameter for tool VNTR copy number approximation"}], "position": {"top": 370.5, "left": 725}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/jowong/prince_galaxy/prince/0.1.1", "type": "tool"}, "4": {"tool_id": "prince_postprocess", "tool_version": "1.0.0", "outputs": [{"type": "txt", "name": "output"}], "workflow_outputs": [{"output_name": "output", "uuid": "b978568d-5780-46fc-9b94-7dd52decc0ed", "label": null}], "input_connections": {"paths": {"output_name": "output", "id": 2}, "prince_output": {"output_name": "output1", "id": 3}}, "tool_state": "{\"paths\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__rerun_remap_job_id__\": null, \"prince_output\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"__page__\": null}", "id": 4, "uuid": "ad79612c-1076-4fa0-a919-e5f119caa7b8", "errors": null, "name": "Prince Postprocess", "post_job_actions": {}, "label": null, "inputs": [{"name": "paths", "description": "runtime parameter for tool Prince Postprocess"}, {"name": "prince_output", "description": "runtime parameter for tool Prince Postprocess"}], "position": {"top": 374.5, "left": 1039.5}, "annotation": "", "content_id": "prince_postprocess", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_path.py	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import sys
+import argparse as ap
+
+parser = ap.ArgumentParser(prog='data_path', conflict_handler='resolve',
+                           description="Output the galaxy file path of datasets in a text file")
+
+input = parser.add_argument_group('Input', '')
+input.add_argument('-i', '--input', nargs='+', required=True, help="Paths to data1")
+input.add_argument('-j', '--input2', nargs='*', required=True, help="Paths to data2")
+
+if len(sys.argv) == 0:
+    parser.print_usage()
+    sys.exit(1)
+
+args = parser.parse_args()
+output = open('paths.txt', 'w')
+if len(args.input2) == 0:
+	for index,path in enumerate(args.input):
+		output.write("%s\n" % (path))
+else:
+	for index,path in enumerate(args.input):
+		output.write("%s\t%s\n" % (path, args.input2[index]))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_path.xml	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,17 @@
+<tool id="data_path" name="Data Path" version="1.0.0">
+    <description>creates a txt file of the paths of items in a data collection</description>
+    <command interpreter="python"><![CDATA[
+        data_path.py -i #for $path in $paths# $path.forward #end for# -j #for $path in $paths# $path.reverse #end for#
+    ]]></command>
+    <inputs>
+        <param name="paths" type="data_collection" format="data" label="Collection of files" help="" optional="False" multiple="True"/>
+    </inputs>
+    <outputs>
+        <data name="output" label="Paths of ${on_string}" format="txt" from_work_dir="paths.txt"/>
+    </outputs>
+    <help>
+This tool outputs the paths of the files in a data collection
+    </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filler.py	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+import sys
+import argparse as ap
+
+parser = ap.ArgumentParser(prog='filler', conflict_handler='resolve',
+                           description="produce filler collection to deal with galaxy handling")
+
+input = parser.add_argument_group('Input', '')
+input.add_argument('-i', '--input', nargs=1, required=True, help="Paths to (forward) reads")
+input.add_argument('-o', '--output', nargs=1, required=True, help="output")
+
+if len(sys.argv) == 0:
+    parser.print_usage()
+    sys.exit(1)
+
+args = parser.parse_args()
+output = open(args.output[0], 'w')
+output.write(args.input[0])
+output.write("\n")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filler.xml	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,36 @@
+<tool id="filler" name="Filler" version="1.0.1">
+    <description>Filler Function to deal with paired collection</description>
+    <command interpreter="python"><![CDATA[
+      filler.py 
+        #if str( $data_input.data_selector ) == "paired"       
+          -i $data_input.input1.forward      
+        #end if
+        #if str( $data_input.data_selector ) == "single"       
+          -i "$data_input.input2"
+        #end if
+        
+        -o $output
+    ]]></command>
+    <inputs>
+      <conditional name="data_input">
+        <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
+          <option value="paired">Paired</option>
+          <option value="single">Single</option>
+        </param>
+        <when value="paired">
+          <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/>
+        </when>
+        <when value="single">
+          <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/>
+        </when>
+      </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" type="data_collection"/>
+    </outputs>
+    <help>
+This tool processes the PRINCE galaxy output such that it is in line with the command line
+    </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prince.xml	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,48 @@
+<tool id="prince" name="VNTR copy number approximation" version="1.1.0">
+    <description>Calculates CNV of genomic data based on template</description>
+    <requirements>
+        <requirement type="package" version="1.2">prince</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        prince -tf "$input1" -to "$output1"
+    ]]></command>
+    <inputs>
+        <param type="data" name="input1" format="txt" />
+    </inputs>
+    <outputs>
+        <data name="output1" format="txt" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="prince_input.txt"/>
+            <output name="output1" file="prince_output.txt"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        usage: prince [-h] [-bo BOOST_OUTPUT] [-to TARGET_OUTPUT] [-tmp TEMPLATES] [-tf TARGET_FILE] [-bf BOOSTING_FILE] [-k K] [-cn COPYNUMBER]
+
+	Prince Options.
+
+	optional arguments:
+	  -h, --help            Show this help message and exit
+	  -bo, --boost_output   Output file for training data / training data used to predict copy numbers for queries
+          -to, --target_output  Output file for query copy number predictions
+          -tmp, --templates     VNTR templates. Default is for M.TB
+          -tf, --target_file    Target genome names in a text file
+          -bf, --boosting_file  Training genome file names in a text file
+          -k, --k               Kmer size used during read recruitment
+          -cn, --copynumber     Copy number used for training genome
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubPythonPRINCE,
+  author = {Booth, Julian},
+  year = {2018},
+  title = {PythonPRINCE},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/WGS-TB/PythonPRINCE},
+}</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prince_postprocess.py	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import sys
+import argparse as ap
+import re
+parser = ap.ArgumentParser(prog='prince_postprocess', conflict_handler='resolve',
+                           description="Postprocess galaxy PRINCE output")
+
+input = parser.add_argument_group('Input', '')
+input.add_argument('-i', '--input', nargs=1, required=True, help="PRINCE OUTPUT")
+input.add_argument('-s', '--sample', nargs='+', required=True, help="Sample names")
+input.add_argument('-f', '--file', nargs='+', required=True, help="File of forward reads in galaxy convention")
+if len(sys.argv) == 0:
+    parser.print_usage()
+    sys.exit(1)
+
+args = parser.parse_args()
+
+#print(args.input)
+#sample_name = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', args.label.rstrip().lstrip())
+sample_dict={}
+path_index = 0
+for path in args.file:
+	with open(path) as f:
+		for line in f:
+			sample_dict[re.sub(".*/","",line)] = sample[path_index]
+	path_index += 1
+			
+			
+
+
+
+
+
+with open(args.input[0]) as prince_output:
+	with open('prince_postprocess_output.txt', 'w') as output:
+		x = 1
+		index = 0
+		for line in prince_output:
+			if x%2 == 0:
+				entries =line.rstrip().split(',')
+                                sample = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', sample_dict[entries[0]])
+				output.write(re.sub(entries[0], sample, line))
+				index += 1
+			else:
+				output.write(line)
+			x += 1
+		#output.write("\n")
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prince_postprocess.xml	Tue Nov 13 11:41:37 2018 -0500
@@ -0,0 +1,18 @@
+<tool id="prince_postprocess" name="Prince Postprocess" version="1.0.0">
+    <description>Postprocess PRINCE galaxy output</description>
+    <command interpreter="python"><![CDATA[
+        prince_postprocess.py -i $prince_output  -s #for $path in $paths# $path.element_identifier #end for# -f #for $path in $paths# $path #end for#
+    ]]></command>
+    <inputs>
+        <param name="paths" type="data" format="data" label="Collection of files" help="" optional="False" multiple="True"/>
+        <param name="prince_output" format="txt" type="data" label="PRINCE output"  />
+    </inputs>
+    <outputs>
+        <data name="output" format="txt" from_work_dir="prince_postprocess_output.txt"/>
+    </outputs>
+    <help>
+This tool processes the PRINCE galaxy output such that it is in line with the command line
+    </help>
+    <citations>
+    </citations>
+</tool>