Mercurial > repos > iuc > valet
comparison valet.xml @ 0:4872c51a696a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/valet commit 30ecbd2ebd336d7002ca11abd69d600a24986156
| author | iuc |
|---|---|
| date | Thu, 16 Nov 2017 08:54:24 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4872c51a696a |
|---|---|
| 1 <tool id="valet" name="VALET" version="@WRAPPER_VERSION@.0"> | |
| 2 <description>to detect mis-assemblies in metagenomic assemblies</description> | |
| 3 <macros> | |
| 4 <token name="@WRAPPER_VERSION@">1.0</token> | |
| 5 <token name="@INPUT_@"> | |
| 6 1.0 | |
| 7 </token> | |
| 8 <xml name="insert_size"> | |
| 9 <param argument="--minins" type="integer" min="0" value="0" label="Min insert sizes for mate pairs" /> | |
| 10 <param argument="--maxins" type="integer" min="0" value="500" label="Max insert sizes for mate pairs" /> | |
| 11 </xml> | |
| 12 <xml name="orientation"> | |
| 13 <param argument="--orientation" type="select" label="Orientation of the mates" > | |
| 14 <option value="fr">fr: mate 1 appears upstream of the reverse complement of mate 2 or mate 2 appears upstream of the reverse | |
| 15 complement of mate 1</option> | |
| 16 <option value="rf">rf: reverse-complemented mate 1 is upstream and forward-oriented mate 2 is downstream</option> | |
| 17 <option value="ff">ff: both upstream mate 1 and downstream mate 2 are forward-oriented</option> | |
| 18 </param> | |
| 19 </xml> | |
| 20 </macros> | |
| 21 <requirements> | |
| 22 <requirement type="package" version="@WRAPPER_VERSION@">valet</requirement> | |
| 23 </requirements> | |
| 24 <version_command>echo @WRAPPER_VERSION@</version_command> | |
| 25 <command detect_errors="exit_code"> | |
| 26 <![CDATA[ | |
| 27 valet.py | |
| 28 #set assembly_fasta = [] | |
| 29 #set assembly_names = [] | |
| 30 #for $repeat in $assembly | |
| 31 $assembly_fasta.append(str($repeat.assembly_fasta)) | |
| 32 $assembly_names.append(str($repeat.assembly_names)) | |
| 33 #end for | |
| 34 --assembly-fasta ${','.join($assembly_fasta)} | |
| 35 --assembly-names ${','.join($assembly_names)} | |
| 36 #if $input_reads.type == 'single' | |
| 37 $input_reads.single_input_reads.type | |
| 38 --reads '$input_reads.single_input_reads.reads' | |
| 39 #else if $input_reads.type == 'paired' | |
| 40 #set mate_1 = [] | |
| 41 #set mate_2 = [] | |
| 42 #set minins = [] | |
| 43 #set maxins = [] | |
| 44 #for $repeat in $input_reads.paired_input_reads.paired_reads | |
| 45 $mate_1.append(str($repeat.mate_1)) | |
| 46 $mate_2.append(str($repeat.mate_2)) | |
| 47 $minins.append(str($repeat.minins)) | |
| 48 $maxins.append(str($repeat.maxins)) | |
| 49 #end for | |
| 50 --1 ${','.join($mate_1)} | |
| 51 --2 ${','.join($mate_2)} | |
| 52 --minins ${','.join($minins)} | |
| 53 --maxins ${','.join($maxins)} | |
| 54 $input_reads.paired_input_reads.type | |
| 55 --orientation '$input_reads.orientation' | |
| 56 #else if $input_reads.type == 'paired_collection' | |
| 57 #set mate_1 = [] | |
| 58 #set mate_2 = [] | |
| 59 #set minins = [] | |
| 60 #set maxins = [] | |
| 61 #for $repeat in $input_reads.paired_coll_input_reads.paired_collection_reads | |
| 62 $mate_1.append(str($repeat.input.forward)) | |
| 63 $mate_2.append(str($repeat.input.reverse)) | |
| 64 $minins.append(str($repeat.minins)) | |
| 65 $maxins.append(str($repeat.maxins)) | |
| 66 #end for | |
| 67 --1 ${','.join($mate_1)} | |
| 68 --2 ${','.join($mate_2)} | |
| 69 --minins ${','.join($minins)} | |
| 70 --maxins ${','.join($maxins)} | |
| 71 $input_reads.paired_coll_input_reads.type | |
| 72 --orientation '$input_reads.orientation' | |
| 73 #end if | |
| 74 --output-dir output | |
| 75 --window-size '$window_size' | |
| 76 --threads \${GALAXY_SLOTS:-4} | |
| 77 --max-alignments '$max_alignments' | |
| 78 --min-coverage '$min_coverage' | |
| 79 --coverage-multiplier '$coverage_multiplier' | |
| 80 --min-suspicious '$min_suspicious' | |
| 81 --suspicious-flank-size '$suspicious_flank_size' | |
| 82 --min-contig-length '$min_contig_length' | |
| 83 --ignore-ends '$ignore_ends' | |
| 84 --breakpoint-bin '$breakpoint_bin' | |
| 85 #if $orf_file | |
| 86 --orf-file '$orf_file' | |
| 87 #end if | |
| 88 #if $coverage_file | |
| 89 --coverage-file '$coverage_file' | |
| 90 #end if | |
| 91 --kmer '$kmer' | |
| 92 --skip-reapr | |
| 93 #for $repeat in $assembly | |
| 94 && mv output/${repeat.assembly_names}/summary.bed output/${repeat.assembly_names}_summary.bed | |
| 95 && mv output/${repeat.assembly_names}/summary.tsv output/${repeat.assembly_names}_summary.tsv | |
| 96 && mv output/${repeat.assembly_names}/suspicious.bed output/${repeat.assembly_names}_suspicious.bed | |
| 97 #end for | |
| 98 ]]></command> | |
| 99 <inputs> | |
| 100 <repeat name="assembly" title="Candidate assemblies"> | |
| 101 <param name="assembly_fasta" argument="--assembly-fasta" type="data" format="fasta" label="Candidate assembly file" /> | |
| 102 <param name="assembly_names" argument="--assembly-names" type="text" value="" label="Name of the assembly"> | |
| 103 <validator type="empty_field" message="A name is required"/> | |
| 104 </param> | |
| 105 </repeat> | |
| 106 <conditional name="input_reads"> | |
| 107 <param name="type" type="select" label="Type of input reads used for the assembly"> | |
| 108 <option value="single">Single</option> | |
| 109 <option value="paired">Paired</option> | |
| 110 <option value="paired_collection">Paired-collection</option> | |
| 111 </param> | |
| 112 <when value="single"> | |
| 113 <conditional name="single_input_reads"> | |
| 114 <param name="type" type="select" label="Input format"> | |
| 115 <option value="--fasta">Fasta</option> | |
| 116 <option value="--fastq">FastQ</option> | |
| 117 </param> | |
| 118 <when value="--fasta"> | |
| 119 <param argument="--reads" type="data" format="fasta" label="Assembly input reads" /> | |
| 120 </when> | |
| 121 <when value="--fastq"> | |
| 122 <param argument="--reads" type="data" format="fastq" label="Assembly input reads" /> | |
| 123 </when> | |
| 124 </conditional> | |
| 125 </when> | |
| 126 <when value="paired"> | |
| 127 <conditional name="paired_input_reads"> | |
| 128 <param name="type" type="select" label="Input format"> | |
| 129 <option value="--fasta">Fasta</option> | |
| 130 <option value="--fastq">FastQ</option> | |
| 131 </param> | |
| 132 <when value="--fasta"> | |
| 133 <repeat name="paired_reads" title="Mate pair reads"> | |
| 134 <param name="mate_1" argument="--1" type="data" format="fasta" label="Assembly input first mate reads" /> | |
| 135 <param name="mate_2" argument="--2" type="data" format="fasta" label="Assembly input second mate reads" /> | |
| 136 <expand macro="insert_size"/> | |
| 137 </repeat> | |
| 138 </when> | |
| 139 <when value="--fastq"> | |
| 140 <repeat name="paired_reads" title="Mate pair reads"> | |
| 141 <param name="mate_1" argument="--1" type="data" format="fastq" label="Assembly input first mate reads" /> | |
| 142 <param name="mate_2" argument="--2" type="data" format="fastq" label="Assembly input second mate reads" /> | |
| 143 <expand macro="insert_size"/> | |
| 144 </repeat> | |
| 145 </when> | |
| 146 </conditional> | |
| 147 <expand macro="orientation"/> | |
| 148 </when> | |
| 149 <when value="paired_collection"> | |
| 150 <conditional name="paired_coll_input_reads"> | |
| 151 <param name="type" type="select" label="Input format"> | |
| 152 <option value="--fasta">Fasta</option> | |
| 153 <option value="--fastq">FastQ</option> | |
| 154 </param> | |
| 155 <when value="--fasta"> | |
| 156 <repeat name="paired_collection_reads" title="Mate paired read collections"> | |
| 157 <param name="input" format="fasta" type="data_collection" collection_type="paired" label="Assembly input reads" /> | |
| 158 <expand macro="insert_size"/> | |
| 159 </repeat> | |
| 160 </when> | |
| 161 <when value="--fastq"> | |
| 162 <repeat name="paired_collection_reads" title="Mate paired read collections"> | |
| 163 <param name="input" format="fastq" type="data_collection" collection_type="paired" label="Assembly input reads" /> | |
| 164 <expand macro="insert_size"/> | |
| 165 </repeat> | |
| 166 </when> | |
| 167 </conditional> | |
| 168 <expand macro="orientation"/> | |
| 169 </when> | |
| 170 </conditional> | |
| 171 <param name="window_size" argument="--window-size" type="integer" min="0" value="501" label="Sliding window size when determining misassemblies" /> | |
| 172 <param name="max_alignments" argument="--max-alignments" type="integer" min="0" value="10000" label="Bowtie2 parameter to set the max number of alignments" /> | |
| 173 <param name="min_coverage" argument="--min-coverage" type="integer" min="0" value="0" label="Minimum average coverage to run misassembly detection" /> | |
| 174 <param name="coverage_multiplier" argument="--coverage-multiplier" type="float" min="0" value="0" label="When binning by coverage, the new high = high + high * multiplier" /> | |
| 175 <param name="min_suspicious" argument="--min-suspicious" type="integer" min="0" value="2" label="Minimum number of overlapping flagged miassemblies to mark region as suspicious" /> | |
| 176 <param name="suspicious_flank_size" argument="--suspicious-flank-size" type="integer" min="0" value="2000" label="Mark region as suspicious if multiple signatures occur within this window size" /> | |
| 177 <param name="min_contig_length" argument="--min-contig-length" type="integer" min="0" value="1000" label="Ignore contigs smaller than this length" /> | |
| 178 <param name="ignore_ends" argument="--ignore-ends" type="integer" min="0" value="0" label="Ignore flagged regions within b bps from the ends of the contigs" /> | |
| 179 <param name="breakpoint_bin" argument="--breakpoint-bin" type="integer" min="0" value="50" label="Bin sized used to find breakpoints" /> | |
| 180 <param name="kmer" argument="--kmer" type="integer" min="0" value="15" label="Kmer length used for abundance estimation" /> | |
| 181 <param name="coverage_file" argument="--coverage-file" type="data" format="tabular,txt" optional="true" label="Assembly created per-contig coverage file" /> | |
| 182 <param name="orf_file" argument="--orf-file" type="data" format="gff,gtf" optional="true" label="File containing ORFs" /> | |
| 183 </inputs> | |
| 184 <outputs> | |
| 185 <collection name="flagged" type="list" label="${tool.name} on ${on_string}: Flagged regions"> | |
| 186 <discover_datasets pattern="(?P<designation>.+)_summary.bed" format="bed" directory="output"/> | |
| 187 </collection> | |
| 188 <collection name="suspicious" type="list" label="${tool.name} on ${on_string}: Suspicious regions"> | |
| 189 <discover_datasets pattern="(?P<designation>.+)_suspicious.bed" format="bed" directory="output"/> | |
| 190 </collection> | |
| 191 <collection name="summary" type="list" label="${tool.name} on ${on_string}: Summary"> | |
| 192 <discover_datasets pattern="(?P<designation>.+)_summary.tsv" format="tabular" directory="output"/> | |
| 193 </collection> | |
| 194 <data name="comparison_plot" format="pdf" from_work_dir="output/comparison_plots.pdf" label="${tool.name} on ${on_string}: Comparison plot" /> | |
| 195 </outputs> | |
| 196 <tests> | |
| 197 <test> | |
| 198 <repeat name="assembly"> | |
| 199 <param name="assembly_fasta" value="c_rudii_reference.fna"/> | |
| 200 <param name="assembly_names" value="reference"/> | |
| 201 </repeat> | |
| 202 <repeat name="assembly"> | |
| 203 <param name="assembly_fasta" value="c_rudii_dup.fna"/> | |
| 204 <param name="assembly_names" value="duplication"/> | |
| 205 </repeat> | |
| 206 <repeat name="assembly"> | |
| 207 <param name="assembly_fasta" value="c_rudii_relocation.fna"/> | |
| 208 <param name="assembly_names" value="relocation"/> | |
| 209 </repeat> | |
| 210 <repeat name="assembly"> | |
| 211 <param name="assembly_fasta" value="c_rudii_reloc_dup.fna"/> | |
| 212 <param name="assembly_names" value="reloc-dup"/> | |
| 213 </repeat> | |
| 214 <conditional name="input_reads"> | |
| 215 <param name="type" value="paired"/> | |
| 216 <conditional name="paired_input_reads"> | |
| 217 <param name="type" value="--fastq"/> | |
| 218 <repeat name="paired_reads"> | |
| 219 <param name="mate_1" value="lib1.1.fastq" /> | |
| 220 <param name="mate_2" value="lib1.2.fastq" /> | |
| 221 <param name="minins" value="0"/> | |
| 222 <param name="maxins" value="500" /> | |
| 223 </repeat> | |
| 224 </conditional> | |
| 225 <param name="orientation" value="fr" /> | |
| 226 </conditional> | |
| 227 <param name="window_size" value="501"/> | |
| 228 <param name="max_alignments" value="10000"/> | |
| 229 <param name="min_coverage" value="0" /> | |
| 230 <param name="coverage_multiplier" value="0"/> | |
| 231 <param name="min_suspicious" value="2" /> | |
| 232 <param name="suspicious_flank_size" value="2000" /> | |
| 233 <param name="min_contig_length" value="1000"/> | |
| 234 <param name="ignore_ends" value="0"/> | |
| 235 <param name="breakpoint_bin" value="50" /> | |
| 236 <param name="kmer" value="15" /> | |
| 237 <param name="coverage_file" value="carsonella_asm.cvg" /> | |
| 238 <output_collection name="flagged" type="list"> | |
| 239 <element name="reference" ftype="bed" file="flagged_reference.bed"/> | |
| 240 <element name="duplication" ftype="bed" file="flagged_duplication.bed"/> | |
| 241 <element name="relocation" ftype="bed" file="flagged_relocation.bed"/> | |
| 242 <element name="reloc-dup" ftype="bed" file="flagged_reloc-dup.bed"/> | |
| 243 </output_collection> | |
| 244 <output_collection name="suspicious" type="list"> | |
| 245 <element name="reference" ftype="bed" file="suspicious_reference.bed"/> | |
| 246 <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/> | |
| 247 <element name="relocation" ftype="bed" file="suspicious_relocation.bed"/> | |
| 248 <element name="reloc-dup" ftype="bed" file="suspicious_reloc-dup.bed"/> | |
| 249 </output_collection> | |
| 250 <output_collection name="summary" type="list"> | |
| 251 <element name="reference" ftype="tabular" file="summary_reference.tabular"/> | |
| 252 <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/> | |
| 253 <element name="relocation" ftype="tabular" file="summary_relocation.tabular"/> | |
| 254 <element name="reloc-dup" ftype="tabular" file="summary_reloc-dup.tabular"/> | |
| 255 </output_collection> | |
| 256 <output name="comparison_plot" file="test1_comparison_plot.pdf" compare="sim_size"/> | |
| 257 </test> | |
| 258 <test> | |
| 259 <repeat name="assembly"> | |
| 260 <param name="assembly_fasta" value="c_rudii_dup.fna"/> | |
| 261 <param name="assembly_names" value="duplication"/> | |
| 262 </repeat> | |
| 263 <conditional name="input_reads"> | |
| 264 <param name="type" value="paired_collection"/> | |
| 265 <conditional name="paired_coll_input_reads"> | |
| 266 <param name="type" value="--fastq"/> | |
| 267 <repeat name="paired_collection_reads"> | |
| 268 <param name="input"> | |
| 269 <collection type="paired"> | |
| 270 <element name="forward" value="lib1.1.fastq" ftype="fastq" /> | |
| 271 <element name="reverse" value="lib1.2.fastq" ftype="fastq" /> | |
| 272 </collection> | |
| 273 </param> | |
| 274 <param name="minins" value="0"/> | |
| 275 <param name="maxins" value="500" /> | |
| 276 </repeat> | |
| 277 </conditional> | |
| 278 <param name="orientation" value="fr" /> | |
| 279 </conditional> | |
| 280 <param name="window_size" value="501"/> | |
| 281 <param name="max_alignments" value="10000"/> | |
| 282 <param name="min_coverage" value="0" /> | |
| 283 <param name="coverage_multiplier" value="0"/> | |
| 284 <param name="min_suspicious" value="2" /> | |
| 285 <param name="suspicious_flank_size" value="2000" /> | |
| 286 <param name="min_contig_length" value="1000"/> | |
| 287 <param name="ignore_ends" value="0"/> | |
| 288 <param name="breakpoint_bin" value="50" /> | |
| 289 <param name="kmer" value="15" /> | |
| 290 <output_collection name="flagged" type="list"> | |
| 291 <element name="duplication" ftype="bed" file="flagged_duplication.bed"/> | |
| 292 </output_collection> | |
| 293 <output_collection name="suspicious" type="list"> | |
| 294 <element name="duplication" ftype="bed" file="suspicious_duplication.bed"/> | |
| 295 </output_collection> | |
| 296 <output_collection name="summary" type="list"> | |
| 297 <element name="duplication" ftype="tabular" file="summary_duplication.tabular"/> | |
| 298 </output_collection> | |
| 299 <output name="comparison_plot" file="test2_comparison_plot.pdf" compare="sim_size"/> | |
| 300 </test> | |
| 301 </tests> | |
| 302 <help><![CDATA[ | |
| 303 **What it does** | |
| 304 | |
| 305 VALET is a de novo pipeline for detecting all types of mis-assemblies in metagenomic data sets. | |
| 306 | |
| 307 Its primarily adapts the approaches developed in the context of isolate genomes. To avoid false positives and false | |
| 308 negatives because of uneven depth of coverage, VALET bins contig by coverage before applying these methods. | |
| 309 | |
| 310 Possible break points in the assembly are found by examining regions, where a large number of parts of the reads are | |
| 311 unable to align. To identify break points, VALET uses the first and last third of each unaligned read, called sister | |
| 312 reads. The sister reads are aligned independently to the reference genome, and then regions where the sister reads | |
| 313 align to nonadjacent segments of the genome are flagged as mis-assemblies. | |
| 314 | |
| 315 For more details about the tool, please check: https://github.com/marbl/VALET | |
| 316 ]]></help> | |
| 317 <citations/> | |
| 318 </tool> |
