comparison savage.xml @ 1:0793badf352c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/savage commit b'6e7f074d80e0f35516e223b76e73f019ad45dd74\n'-dirty
author rdvelazquez
date Wed, 20 Mar 2019 11:05:31 -0400
parents 0e2450894b69
children
comparison
equal deleted inserted replaced
0:0e2450894b69 1:0793badf352c
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="savage" version="@VERSION@+galaxy0" name="Assemble"> 2 <tool id="savage" version="@VERSION@+galaxy0" name="Assemble">
3 <description>viral haplotypes with SAVAGE</description> 3 <description>viral haplotypes with SAVAGE</description>
4 <macros> 4 <macros>
5 <token name="@SINGLE@">-s '$input_type.input'</token> 5 <token name="@SINGLE@">-s '$input_type.input'</token>
6 <token name="@PAIRED@">-p1 '$input_type.forward' -p2 '$input_type.reverse'</token> 6 <token name="@PAIRED@">-p1 '$input_type.forward' -p2
7 <token name="@BOTH@">@SINGLE@ @PAIRED@</token> 7 '$input_type.reverse'</token>
8 <token name="@VERSION@">0.4.0</token> 8 <token name="@BOTH@">@SINGLE@ @PAIRED@</token>
9 <xml name="single_input"> 9 <token name="@VERSION@">0.4.0</token>
10 <param name="input" argument="-s" type="data" format="fastq" label="Single-end reads" /> 10 <xml name="single_input">
11 </xml> 11 <param name="input" argument="-s" type="data" format="fastq" label="Single-end reads"/>
12 <xml name="paired_input"> 12 </xml>
13 <param name="forward" argument="-p1" type="data" format="fastq" label="Forward reads" /> 13 <xml name="paired_input">
14 <param name="reverse" argument="-p2" type="data" format="fastq" label="Reverse reads" /> 14 <param name="forward" argument="-p1" type="data" format="fastq" label="Forward reads"/>
15 </xml> 15 <param name="reverse" argument="-p2" type="data" format="fastq" label="Reverse reads"/>
16 </macros> 16 </xml>
17 <requirements> 17 </macros>
18 <requirement type="package" version="@VERSION@">savage</requirement> 18 <requirements>
19 </requirements> 19 <requirement type="package" version="@VERSION@">
20 <version_command><![CDATA[savage --help | grep '^Version: ' | awk '{print $2}']]></version_command> 20 savage</requirement>
21 <command detect_errors="exit_code"> 21 </requirements>
22 <![CDATA[ 22 <version_command><![CDATA[savage --help | grep '^Version: ' | awk '{print $2}']]></version_command>
23 <command detect_errors="exit_code"><![CDATA[
23 savage 24 savage
24 -t \${GALAXY_SLOTS:-1} $revcomp 25 -t \${GALAXY_SLOTS:-1} $revcomp
25 #if $input_type.singles == 'single': 26 #if $input_type.singles == 'single':
26 @SINGLE@ 27 @SINGLE@
27 #elif $input_type.singles == 'paired': 28 #elif $input_type.singles == 'paired':
66 --max_tip_len $advanced.max_tip_len 67 --max_tip_len $advanced.max_tip_len
67 #end if 68 #end if
68 $advanced.no_stage_a $advanced.no_stage_b $advanced.no_stage_c $advanced.no_overlaps $advanced.no_preprocessing 69 $advanced.no_stage_a $advanced.no_stage_b $advanced.no_stage_c $advanced.no_overlaps $advanced.no_preprocessing
69 $advanced.no_assembly $advanced.count_strains $advanced.ignore_subreads $advanced.keep_branches $advanced.diploid 70 $advanced.no_assembly $advanced.count_strains $advanced.ignore_subreads $advanced.keep_branches $advanced.diploid
70 $advanced.no_filtering 71 $advanced.no_filtering
71 ]]> 72 ]]></command>
72 </command> 73 <inputs>
73 <inputs> 74 <conditional name="input_type" label="Input data formats">
74 <conditional name="input_type" label="Input data formats"> 75 <param name="singles" type="select" label="Type of reads">
75 <param name="singles" type="select" label="Type of reads"> 76 <option value="paired">Paired reads</option>
76 <option value="paired">Paired reads</option> 77 <option value="single">Single-end reads</option>
77 <option value="single">Single-end reads</option> 78 <option value="both">Both single-end and paired</option>
78 <option value="both">Both single-end and paired</option> 79 </param>
79 </param> 80 <when value="single">
80 <when value="single"> 81 <expand macro="single_input"/>
81 <expand macro="single_input" /> 82 </when>
82 </when> 83 <when value="paired">
83 <when value="paired"> 84 <expand macro="paired_input"/>
84 <expand macro="paired_input" /> 85 </when>
85 </when> 86 <when value="both">
86 <when value="both"> 87 <expand macro="single_input"/>
87 <expand macro="single_input" /> 88 <expand macro="paired_input"/>
88 <expand macro="paired_input" /> 89 </when>
89 </when> 90 </conditional>
90 </conditional> 91 <param argument="--ref" type="data" format="fasta" optional="true" label="Optional reference genome in fasta format"/>
91 <param argument="--ref" type="data" format="fasta" optional="true" label="Optional reference genome in fasta format" /> 92 <param name="min_overlap_len" argument="-m" type="integer" value="" optional="true" label="Minimum overlap length between reads" help="By default this parameter is set to 60% of the average length of the sequencing reads used as input for SAVAGE. You can manually change this threshold using the this parameter. Increasing the minimal overlap length speeds up the algorithm and leads to a lower mismatch rate in the final contigs. It is recommended to set the minimal overlap length to be larger than the (expected) largest repetitive element in the target genomes. However, it also results in a lower fraction of the target genomes being reconstructed."/>
92 <param name="min_overlap_len" argument="-m" type="integer" value="" optional="true" label="Minimum overlap length between reads" help="By default this parameter is set to 60% of the average length of the sequencing reads used as input for SAVAGE. You can manually change this threshold using the this parameter. Increasing the minimal overlap length speeds up the algorithm and leads to a lower mismatch rate in the final contigs. It is recommended to set the minimal overlap length to be larger than the (expected) largest repetitive element in the target genomes. However, it also results in a lower fraction of the target genomes being reconstructed." /> 93 <param argument="--split" type="integer" value="1" label="Split the data set into patches" help="In case of (ultra-)deep sequencing data, exceeding a coverage of 1000x, we advise to split the data into patches of coverage between 500x and 1000x and run SAVAGE Stage a on each patch individually. After specifying the number of patches, SAVAGE takes care of the splitting and recombining. Choose the number of patches using such that 500 &lt; read_coverage/patch_num &lt; 1000."/>
93 <param argument="--split" type="integer" value="1" label="Split the data set into patches" help="In case of (ultra-)deep sequencing data, exceeding a coverage of 1000x, we advise to split the data into patches of coverage between 500x and 1000x and run SAVAGE Stage a on each patch individually. After specifying the number of patches, SAVAGE takes care of the splitting and recombining. Choose the number of patches using such that 500 &lt; read_coverage/patch_num &lt; 1000." /> 94 <param argument="--revcomp" type="boolean" truevalue="--revcomp" falsevalue="" label="Input reads are in forward-reverse orientation"/>
94 <param argument="--revcomp" type="boolean" truevalue="--revcomp" falsevalue="" label="Input reads are in forward-reverse orientation" /> 95 <section name="advanced" title="Advanced options">
95 <section name="advanced" title="Advanced options"> 96 <param argument="--no_stage_a" type="boolean" truevalue="--no_stage_a" falsevalue="" label="Skip Stage a" help="(initial contig formation)"/>
96 <param argument="--no_stage_a" type="boolean" truevalue="--no_stage_a" falsevalue="" label="Skip Stage a" help="(initial contig formation)" /> 97 <param argument="--no_stage_b" type="boolean" truevalue="--no_stage_b" falsevalue="" label="Skip Stage b" help="(extending initial contigs)"/>
97 <param argument="--no_stage_b" type="boolean" truevalue="--no_stage_b" falsevalue="" label="Skip Stage b" help="(extending initial contigs)" /> 98 <param argument="--no_stage_c" type="boolean" truevalue="--no_stage_c" falsevalue="" label="Skip Stage c" help="(merging maximized contigs into master strains)"/>
98 <param argument="--no_stage_c" type="boolean" truevalue="--no_stage_c" falsevalue="" label="Skip Stage c" help="(merging maximized contigs into master strains)" /> 99 <param argument="--no_overlaps" type="boolean" truevalue="--no_overlaps" falsevalue="" label="Skip overlap computations" help="(use existing overlaps file instead)"/>
99 <param argument="--no_overlaps" type="boolean" truevalue="--no_overlaps" falsevalue="" label="Skip overlap computations" help="(use existing overlaps file instead)" /> 100 <param argument="--no_preprocessing" type="boolean" truevalue="--no_preprocessing" falsevalue="" label="Skip preprocessing procedure" help="(i.e. creating data patches)"/>
100 <param argument="--no_preprocessing" type="boolean" truevalue="--no_preprocessing" falsevalue="" label="Skip preprocessing procedure" help="(i.e. creating data patches)" /> 101 <param argument="--no_assembly" type="boolean" truevalue="--no_assembly" falsevalue="" label="Skip all assembly steps" help="Only use this option when using --count_strains separate from assembly (e.g. on a denovo assembly)"/>
101 <param argument="--no_assembly" type="boolean" truevalue="--no_assembly" falsevalue="" label="Skip all assembly steps" help="Only use this option when using --count_strains separate from assembly (e.g. on a denovo assembly)" /> 102 <param argument="--count_strains" type="boolean" truevalue="--count_strains" falsevalue="" label="Compute a lower bound on the number of strains in this sample"/>
102 <param argument="--count_strains" type="boolean" truevalue="--count_strains" falsevalue="" label="Compute a lower bound on the number of strains in this sample" /> 103 <param argument="--ignore_subreads" type="boolean" truevalue="--ignore_subreads" falsevalue="" label="Ignore subread info from previous stage" help="When using this flag, you choose not to use subread information from previous stages in the current stage(s). This will speed up the algorithm, but at the cost of less accurate abundance estimates."/>
103 <param argument="--ignore_subreads" type="boolean" truevalue="--ignore_subreads" falsevalue="" label="Ignore subread info from previous stage" help="When using this flag, you choose not to use subread information from previous stages in the current stage(s). This will speed up the algorithm, but at the cost of less accurate abundance estimates." /> 104 <param argument="--merge_contigs" type="float" value="0" optional="true" label="Specify maximal distance between contigs for merging into master strains" help="By default this is set to 0, meaning that in stage c, the final assembly step, overlaps are allowed with a mismatch rate of 0% (i.e. exact overlaps). By increasing this threshold, e.g. to 0.01, virus strains which differ by less than 1% will be merged into one or more master strains, possibly leading to longer contigs and a less fragmented assembly (higher N50)."/>
104 <param argument="--merge_contigs" type="float" value="0" optional="true" label="Specify maximal distance between contigs for merging into master strains" help="By default this is set to 0, meaning that in stage c, the final assembly step, overlaps are allowed with a mismatch rate of 0% (i.e. exact overlaps). By increasing this threshold, e.g. to 0.01, virus strains which differ by less than 1% will be merged into one or more master strains, possibly leading to longer contigs and a less fragmented assembly (higher N50)." /> 105 <param argument="--min_clique_size" type="integer" value="" optional="true" label="Minimum clique size used during error correction"/>
105 <param argument="--min_clique_size" type="integer" value="" optional="true" label="Minimum clique size used during error correction" /> 106 <param argument="--overlap_len_stage_c" type="integer" value="100" optional="true" label="Minimum overlap length used in stage c" help="For Stage c of the algorithm, the final assembly step, it is possible to specify a different minimum overlap length using this option. By default this parameter is set to 100, but depending on the data it can pay off to decrease this parameter further."/>
106 <param argument="--overlap_len_stage_c" type="integer" value="100" optional="true" label="Minimum overlap length used in stage c" help="For Stage c of the algorithm, the final assembly step, it is possible to specify a different minimum overlap length using this option. By default this parameter is set to 100, but depending on the data it can pay off to decrease this parameter further." /> 107 <param argument="--contig_len_stage_c" type="integer" value="100" optional="true" label="Minimum contig length required for stage c input contigs" help="By default, only contigs of at least 100 bp in length are considered for stage c assembly. The user can adjust this threshold by setting this parameter. From the final stage c output, it is usually a good idea to consider only contigs of sufficient length, e.g. 500 bp."/>
107 <param argument="--contig_len_stage_c" type="integer" value="100" optional="true" label="Minimum contig length required for stage c input contigs" help="By default, only contigs of at least 100 bp in length are considered for stage c assembly. The user can adjust this threshold by setting this parameter. From the final stage c output, it is usually a good idea to consider only contigs of sufficient length, e.g. 500 bp." /> 108 <param argument="--keep_branches" type="boolean" truevalue="--keep_branches" falsevalue="" label="Disable merging along branches"/>
108 <param argument="--keep_branches" type="boolean" truevalue="--keep_branches" falsevalue="" label="Disable merging along branches" /> 109 <param argument="--sfo_mm" type="integer" value="50" optional="true" label="SFO maximal mismatch rate" help="This parameter is only relevant when running SAVAGE in de novo mode, i.e. without a reference FASTA. It specifies the error rate allowed when computing approximate suffix-prefix overlaps. By default it is equal to 50, meaning that up to 2% mismatches is allowed in the overlaps. This accounts for 1% sequencing errors. Increasing this parameter will slow down the algorithm, while decreasing will lead to a possibly incomplete overlap graph."/>
109 <param argument="--sfo_mm" type="integer" value="50" optional="true" label="SFO maximal mismatch rate" help="This parameter is only relevant when running SAVAGE in de novo mode, i.e. without a reference FASTA. It specifies the error rate allowed when computing approximate suffix-prefix overlaps. By default it is equal to 50, meaning that up to 2% mismatches is allowed in the overlaps. This accounts for 1% sequencing errors. Increasing this parameter will slow down the algorithm, while decreasing will lead to a possibly incomplete overlap graph." /> 110 <param argument="--diploid" type="boolean" truevalue="--diploid" falsevalue="" label="Diploid genome assembly"/>
110 <param argument="--diploid" type="boolean" truevalue="--diploid" falsevalue="" label="Diploid genome assembly" /> 111 <param argument="--diploid_contig_len" type="integer" value="" optional="true" label="Minimum contig length required for diploid step contigs"/>
111 <param argument="--diploid_contig_len" type="integer" value="" optional="true" label="Minimum contig length required for diploid step contigs" /> 112 <param argument="--diploid_overlap_len" type="integer" value="" optional="true" label="Minimum overlap length used in diploid assembly step"/>
112 <param argument="--diploid_overlap_len" type="integer" value="" optional="true" label="Minimum overlap length used in diploid assembly step" /> 113 <param argument="--average_read_len" type="integer" value="" optional="true" label="Average length of the input reads" help="Will be computed from the input if not specified"/>
113 <param argument="--average_read_len" type="integer" value="" optional="true" label="Average length of the input reads" help="Will be computed from the input if not specified" /> 114 <param argument="--no_filtering" type="boolean" truevalue="--no_filtering" falsevalue="" label="Disable kallisto-based filtering of contigs" help="By default, SAVAGE contigs are filtered after stages b and c based on Kallisto frequency estimates: all zero-abundance contigs are removed from the assembly. To disable this filtering procedure, enable this flag."/>
114 <param argument="--no_filtering" type="boolean" truevalue="--no_filtering" falsevalue="" label="Disable kallisto-based filtering of contigs" help="By default, SAVAGE contigs are filtered after stages b and c based on Kallisto frequency estimates: all zero-abundance contigs are removed from the assembly. To disable this filtering procedure, enable this flag." /> 115 <param argument="--max_tip_len" type="integer" value="100" optional="true" label="Maximum extension length for a sequence to be called a tip" help="Maximum extension length for a sequence to be called a tip in the overlap graph. By default this parameter is set to the average length of the input sequences. If you want to disable tip removal, set this parameter to 0. In general this will lead to a more fragmented assembly and is therefore not recommended."/>
115 <param argument="--max_tip_len" type="integer" value="100" optional="true" label="Maximum extension length for a sequence to be called a tip" help="Maximum extension length for a sequence to be called a tip in the overlap graph. By default this parameter is set to the average length of the input sequences. If you want to disable tip removal, set this parameter to 0. In general this will lead to a more fragmented assembly and is therefore not recommended." /> 116 </section>
116 </section> 117 </inputs>
117 </inputs> 118 <outputs>
118 <outputs> 119 <data name="contigs" format="fasta" from_work_dir="contigs_stage_a.fasta" label="${tool.name} on ${on_string}: Contigs"/>
119 <data name="contigs" format="fasta" from_work_dir="contigs_stage_a.fasta" label="${tool.name} on ${on_string}: Contigs" /> 120 <data name="haplotypes" format="fasta" from_work_dir="contigs_stage_c.fasta" label="${tool.name} on ${on_string}: Haplotyes"/>
120 <data name="log" format="txt" from_work_dir="stage_c/pipeline.log" label="${tool.name} on ${on_string}: Log" /> 121 <data name="log" format="txt" from_work_dir="stage_c/pipeline.log" label="${tool.name} on ${on_string}: Log"/>
121 </outputs> 122 </outputs>
122 <tests> 123 <tests>
123 <test> 124 <test>
124 <param name="singles" value="both" /> 125 <param name="singles" value="both"/>
125 <param name="input" ftype="fastq" value="savage-in1.fq" /> 126 <param name="input" ftype="fastq" value="savage-in1.fq"/>
126 <param name="forward" ftype="fastq" value="savage-in1-forward.fq" /> 127 <param name="forward" ftype="fastq" value="savage-in1-forward.fq"/>
127 <param name="reverse" ftype="fastq" value="savage-in1-reverse.fq" /> 128 <param name="reverse" ftype="fastq" value="savage-in1-reverse.fq"/>
128 <param name="split" value="1" /> 129 <param name="split" value="1"/>
129 <param name="merge_contigs" value="0" /> 130 <param name="merge_contigs" value="0"/>
130 <param name="contig_len_stage_c" value="100" /> 131 <param name="contig_len_stage_c" value="100"/>
131 <param name="overlap_len_stage_c" value="100" /> 132 <param name="overlap_len_stage_c" value="100"/>
132 <output name="contigs" ftype="fasta" file="savage-out1.fa" /> 133 <output name="contigs" ftype="fasta" file="savage-out1.fa"/>
133 <output name="log" ftype="txt" file="savage-out1.txt" lines_diff="4" /> 134 <output name="log" ftype="txt" file="savage-out1.txt" lines_diff="4"/>
134 </test> 135 </test>
135 </tests> 136 </tests>
136 <help> 137 <help><![CDATA[
137 <![CDATA[
138 SAVAGE is a computational tool for reconstructing individual haplotypes of intra-host virus strains (a viral quasispecies) without the need for a high quality reference genome. SAVAGE makes use of either FM-index based data structures or ad-hoc consensus reference sequence for constructing overlap graphs from patient sample data. In this overlap graph, nodes represent reads and/or contigs, while edges reflect that two reads/contigs, based on sound statistical considerations, represent identical haplotypic sequence. Following an iterative scheme, a new overlap assembly algorithm that is based on the enumeration of statistically well-calibrated groups of reads/contigs then efficiently reconstructs the individual haplotypes from this overlap graph. 138 SAVAGE is a computational tool for reconstructing individual haplotypes of intra-host virus strains (a viral quasispecies) without the need for a high quality reference genome. SAVAGE makes use of either FM-index based data structures or ad-hoc consensus reference sequence for constructing overlap graphs from patient sample data. In this overlap graph, nodes represent reads and/or contigs, while edges reflect that two reads/contigs, based on sound statistical considerations, represent identical haplotypic sequence. Following an iterative scheme, a new overlap assembly algorithm that is based on the enumeration of statistically well-calibrated groups of reads/contigs then efficiently reconstructs the individual haplotypes from this overlap graph.
139 ]]> 139 ]]></help>
140 </help> 140 <citations>
141 <citations> 141 <citation type="doi">doi:10.1101/gr.215038.116</citation>
142 <citation type="doi">doi:10.1101/gr.215038.116</citation> 142 </citations>
143 </citations>
144 </tool> 143 </tool>