Mercurial > repos > iuc > rasusa
comparison rasusa.xml @ 0:6a2965f39e3b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rasusa commit 3a1b13f3f0845f60b4a023fd547a9d2ad0170072
| author | iuc |
|---|---|
| date | Wed, 10 Jul 2024 17:01:03 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6a2965f39e3b |
|---|---|
| 1 <tool id="rasusa" name="rasusa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> | |
| 2 <description>Randomly subsample reads to a specified coverage</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">2.0.0</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 <token name="@FORMATS@">fastqsanger,fastqsanger.gz,fasta,fasta.gz</token> | |
| 7 <xml name="size_units"> | |
| 8 <option value="b">bases</option> | |
| 9 <option value="k">Kilo bases</option> | |
| 10 <option value="m">Mega bases</option> | |
| 11 <option value="g">Giga bases</option> | |
| 12 <option value="t">Tera bases</option> | |
| 13 </xml> | |
| 14 <xml name="params_fastq"> | |
| 15 <conditional name="subsample"> | |
| 16 <param name="type" type="select" label="Subsample reads based on"> | |
| 17 <option value="coverage">Coverage</option> | |
| 18 <option value="num_bases">Number of bases</option> | |
| 19 <option value="num_reads">Number of reads</option> | |
| 20 <option value="frac_reads" selected="true">Fraction of reads</option> | |
| 21 </param> | |
| 22 <when value="coverage"> | |
| 23 <param name="genome_size_unit" type="select" label="Specify genome size in"> | |
| 24 <expand macro="size_units" /> | |
| 25 </param> | |
| 26 <param name="genome_size" type="float" min="0" value="" label="Genome size to calculate coverage with respect to"/> | |
| 27 <param argument="--coverage" type="float" min="0" value="" label="The desired coverage to subsample the reads to"/> | |
| 28 </when> | |
| 29 <when value="num_bases"> | |
| 30 <param name="num_bases_unit" type="select" label="Specify number of bases in"> | |
| 31 <expand macro="size_units" /> | |
| 32 </param> | |
| 33 <param name="bases" type="float" min="0" value="" label="Explicitly set the number of bases required"/> | |
| 34 </when> | |
| 35 <when value="num_reads"> | |
| 36 <param argument="--num" type="integer" value="" min="1"/> | |
| 37 </when> | |
| 38 <when value="frac_reads"> | |
| 39 <param argument="--frac" type="float" value="0.1" min="0" max="1"/> | |
| 40 </when> | |
| 41 </conditional> | |
| 42 </xml> | |
| 43 <token name="@FASTQ_SUBSAMPLE_OPTIONS@"><![CDATA[ | |
| 44 #if str( $subsample.type ) == "coverage": | |
| 45 --genome-size '$subsample.genome_size$subsample.genome_size_unit' | |
| 46 --coverage $subsample.coverage | |
| 47 #elif str( $subsample.type ) == "num_bases": | |
| 48 --bases '$subsample.bases$subsample.num_bases_unit' | |
| 49 #elif str( $subsample.type ) == "num_reads": | |
| 50 --num $subsample.num | |
| 51 #elif str( $subsample.type ) == "frac_reads": | |
| 52 --frac $subsample.frac | |
| 53 #end if | |
| 54 #if $r1_ext.endswith(".gz") or $r2_ext.endswith(".gz") | |
| 55 --output-type g | |
| 56 #end if ]]> | |
| 57 </token> | |
| 58 </macros> | |
| 59 <xrefs> | |
| 60 <xref type='bio.tools'>rasusa</xref> | |
| 61 </xrefs> | |
| 62 <requirements> | |
| 63 <requirement type="package" version="@TOOL_VERSION@">rasusa</requirement> | |
| 64 <requirement type="package" version="1.20">samtools</requirement> | |
| 65 </requirements> | |
| 66 | |
| 67 <command detect_errors="exit_code"><![CDATA[ | |
| 68 #if str( $input.input_selector ) == "aligned": | |
| 69 ln -s '$bam' 'input.bam' && | |
| 70 ln -s '$bam.metadata.bam_index' 'input.bam.bai' && | |
| 71 rasusa aln | |
| 72 --coverage $input.coverage | |
| 73 --step-size $input.step_size | |
| 74 #else: | |
| 75 rasusa reads | |
| 76 #end if | |
| 77 | |
| 78 #if $seed | |
| 79 -s $seed | |
| 80 #end if | |
| 81 | |
| 82 #if str( $input.input_selector ) == "paired": | |
| 83 #set r1_ext = $input.reads1.extension | |
| 84 #set r2_ext = $input.reads2.extension | |
| 85 -o 'paired_out1.$r1_ext' | |
| 86 -o 'paired_out2.$r2_ext' | |
| 87 @FASTQ_SUBSAMPLE_OPTIONS@ | |
| 88 '${input.reads1}' | |
| 89 '${input.reads2}' && | |
| 90 mv 'paired_out1.$r1_ext' '$paired_output1' && | |
| 91 mv 'paired_out2.$r2_ext' '$paired_output2' | |
| 92 | |
| 93 #elif str( $input.input_selector ) == "paired_collection": | |
| 94 #set r1_ext = $input.collection.forward.extension | |
| 95 #set r2_ext = $input.collection.reverse.extension | |
| 96 -o 'paired_out1.$r1_ext' | |
| 97 -o 'paired_out2.$r2_ext' | |
| 98 @FASTQ_SUBSAMPLE_OPTIONS@ | |
| 99 '${input.collection.forward}' | |
| 100 '${input.collection.reverse}' && | |
| 101 mv 'paired_out1.$r1_ext' '${collection_output.forward}' && | |
| 102 mv 'paired_out2.$r2_ext' '${collection_output.reverse}' | |
| 103 | |
| 104 #elif str( $input.input_selector ) == "single": | |
| 105 #set r1_ext = $input.reads.extension | |
| 106 -o 'single_out.$r1_ext' | |
| 107 @FASTQ_SUBSAMPLE_OPTIONS@ | |
| 108 '${input.reads}' && | |
| 109 mv 'single_out.$r1_ext' '$single_output' | |
| 110 | |
| 111 #elif str( $input.input_selector ) == "aligned": | |
| 112 'input.bam' | samtools sort --no-PG -@ 1 -T '\${TMPDIR:-.}' -O bam -o '$bam_output' - | |
| 113 #end if | |
| 114 ]]></command> | |
| 115 <inputs> | |
| 116 <conditional name="input"> | |
| 117 <param name="input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> | |
| 118 <option value="paired">Paired-end FASTQ</option> | |
| 119 <option value="single">Single-end FASTQ</option> | |
| 120 <option value="paired_collection">Paired FASTQ Collection</option> | |
| 121 <option value="aligned">BAM file of aligned reads</option> | |
| 122 </param> | |
| 123 <when value="paired"> | |
| 124 <param name="reads1" type="data" format="@FORMATS@" label="Select first set of reads" help="Specify dataset with forward reads"/> | |
| 125 <param name="reads2" type="data" format="@FORMATS@" label="Select second set of reads" help="Specify dataset with reverse reads"/> | |
| 126 <expand macro="params_fastq" /> | |
| 127 </when> | |
| 128 <when value="single"> | |
| 129 <param name="reads" type="data" format="@FORMATS@" label="Select fasta/fastq dataset" help="Specify dataset with single reads"/> | |
| 130 <expand macro="params_fastq" /> | |
| 131 </when> | |
| 132 <when value="paired_collection"> | |
| 133 <param name="collection" format="@FORMATS@" type="data_collection" collection_type="paired" label="Select a paired collection"/> | |
| 134 <expand macro="params_fastq" /> | |
| 135 </when> | |
| 136 <when value="aligned"> | |
| 137 <param name="bam" format="sam,bam" type="data" label="Select BAM file(s) with alignments"/> | |
| 138 <param argument="--coverage" type="integer" min="0" optional="true" value="" label="The desired depth of coverage to subsample the alignment to"/> | |
| 139 <param type="integer" argument="--step-size" value="100" label="When a region has less than the desired coverage, the step size to move along the chromosome to find more reads." | |
| 140 help="The lowest of the step and the minimum end coordinate of the reads in the region will be used. This parameter can have a significant impact on the runtime of the subsampling process."/> | |
| 141 </when> | |
| 142 </conditional> | |
| 143 <param type="integer" argument="--seed" optional="true" label="Random seed to use"/> | |
| 144 </inputs> | |
| 145 <outputs> | |
| 146 <data name="paired_output1" label="${tool.name} on ${on_string}: paired-end r1" format_source="reads1"> | |
| 147 <filter>input['input_selector'] == "paired"</filter> | |
| 148 </data> | |
| 149 <data name="paired_output2" label="${tool.name} on ${on_string}: paired-end R2" format_source="reads2"> | |
| 150 <filter>input['input_selector'] == "paired"</filter> | |
| 151 </data> | |
| 152 <data name="single_output" label="${tool.name} on ${on_string}: single-end" format_source="reads"> | |
| 153 <filter>input['input_selector'] == 'single'</filter> | |
| 154 </data> | |
| 155 <collection name="collection_output" type="paired" label="${tool.name} on ${on_string}: paired-collection"> | |
| 156 <filter>input['input_selector'] == "paired_collection"</filter> | |
| 157 <data name="forward" label="${tool.name} on ${input.collection.forward.name}: paired-end r1" format_source="collection['forward']"/> | |
| 158 <data name="reverse" label="${tool.name} on ${input.collection.reverse.name}: paired-end R2" format_source="collection['reverse']"/> | |
| 159 </collection> | |
| 160 <data name="bam_output" label="${tool.name} on ${on_string}: BAM" format="bam"> | |
| 161 <filter>input['input_selector'] == 'aligned'</filter> | |
| 162 </data> | |
| 163 </outputs> | |
| 164 <tests> | |
| 165 <test expect_num_outputs="1"> | |
| 166 <!-- test 1: single-end fastq by coverage in bases --> | |
| 167 <conditional name="input"> | |
| 168 <param name="input_selector" value="single"/> | |
| 169 <param name="reads" value="r1.fastq.gz"/> | |
| 170 </conditional> | |
| 171 <conditional name="subsample"> | |
| 172 <param name="type" value="coverage"/> | |
| 173 <param name="genome_size_unit" value="b"/> | |
| 174 <param name="genome_size" value="1000"/> | |
| 175 <param name="coverage" value="1"/> | |
| 176 </conditional> | |
| 177 <param name="seed" value="1"/> | |
| 178 <output name="single_output" value="single_by_coverage_b.fastq.gz" ftype="fastqsanger.gz"/> | |
| 179 </test> | |
| 180 <test expect_num_outputs="2"> | |
| 181 <!-- test 2: paired-end fastq by coverage in kb --> | |
| 182 <conditional name="input"> | |
| 183 <param name="input_selector" value="paired"/> | |
| 184 <param name="reads1" value="r1.fastq.gz"/> | |
| 185 <param name="reads2" value="r2.fastq.gz"/> | |
| 186 </conditional> | |
| 187 <conditional name="subsample"> | |
| 188 <param name="type" value="coverage"/> | |
| 189 <param name="genome_size_unit" value="k"/> | |
| 190 <param name="genome_size" value="1"/> | |
| 191 <param name="coverage" value="1"/> | |
| 192 </conditional> | |
| 193 <param name="seed" value="1"/> | |
| 194 <output name="paired_output1" value="paired1_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/> | |
| 195 <output name="paired_output2" value="paired2_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/> | |
| 196 </test> | |
| 197 <test expect_num_outputs="3"> | |
| 198 <!-- test 3: paired-collection fastq by coverage in mb --> | |
| 199 <conditional name="input"> | |
| 200 <param name="input_selector" value="paired_collection"/> | |
| 201 <param name="collection"> | |
| 202 <collection type="paired"> | |
| 203 <element name="forward" value="r1.fastq.gz"/> | |
| 204 <element name="reverse" value="r2.fastq.gz"/> | |
| 205 </collection> | |
| 206 </param> | |
| 207 </conditional> | |
| 208 <conditional name="subsample"> | |
| 209 <param name="type" value="coverage"/> | |
| 210 <param name="genome_size_unit" value="m"/> | |
| 211 <param name="genome_size" value="0.001"/> | |
| 212 <param name="coverage" value="1"/> | |
| 213 </conditional> | |
| 214 <param name="seed" value="1"/> | |
| 215 <output_collection name="collection_output" type="paired"> | |
| 216 <element name="forward" file="paired1_by_coverage_m.fastq.gz" ftype="fastqsanger.gz"/> | |
| 217 <element name="reverse" file="paired2_by_coverage_m.fastq.gz" ftype="fastqsanger.gz"/> | |
| 218 </output_collection> | |
| 219 </test> | |
| 220 <test expect_num_outputs="1"> | |
| 221 <!-- test 4: single-end fasta by coverage in gb --> | |
| 222 <conditional name="input"> | |
| 223 <param name="input_selector" value="single"/> | |
| 224 <param name="reads" value="r1.fasta.gz"/> | |
| 225 </conditional> | |
| 226 <conditional name="subsample"> | |
| 227 <param name="type" value="coverage"/> | |
| 228 <param name="genome_size_unit" value="g"/> | |
| 229 <param name="genome_size" value="0.001"/> | |
| 230 <param name="coverage" value="0.001"/> | |
| 231 </conditional> | |
| 232 <param name="seed" value="1"/> | |
| 233 <output name="single_output" value="single_end_by_coverage_g.fasta" ftype="fasta.gz"/> | |
| 234 </test> | |
| 235 <test expect_num_outputs="2"> | |
| 236 <!-- test 5: paired-end fastq by number of bases --> | |
| 237 <conditional name="input"> | |
| 238 <param name="input_selector" value="paired"/> | |
| 239 <param name="reads1" value="r1.fastq"/> | |
| 240 <param name="reads2" value="r2.fastq"/> | |
| 241 </conditional> | |
| 242 <conditional name="subsample"> | |
| 243 <param name="type" value="num_bases"/> | |
| 244 <param name="num_bases_unit" value="k"/> | |
| 245 <param name="bases" value="2"/> | |
| 246 </conditional> | |
| 247 <param name="seed" value="1"/> | |
| 248 <output name="paired_output1" value="paired1_by_num_bases_k.fastq" ftype="fastqsanger"/> | |
| 249 <output name="paired_output2" value="paired2_by_num_bases_k.fastq" ftype="fastqsanger"/> | |
| 250 </test> | |
| 251 <test expect_num_outputs="2"> | |
| 252 <!-- test 6: paired-end fasta by number of reads --> | |
| 253 <conditional name="input"> | |
| 254 <param name="input_selector" value="paired"/> | |
| 255 <param name="reads1" value="r1.fasta.gz"/> | |
| 256 <param name="reads2" value="r2.fasta.gz"/> | |
| 257 </conditional> | |
| 258 <conditional name="subsample"> | |
| 259 <param name="type" value="num_reads"/> | |
| 260 <param name="num" value="5"/> | |
| 261 </conditional> | |
| 262 <param name="seed" value="1"/> | |
| 263 <output name="paired_output1" value="paired1_by_num_reads.fasta.gz" ftype="fasta.gz"/> | |
| 264 <output name="paired_output2" value="paired2_by_num_reads.fasta.gz" ftype="fasta.gz"/> | |
| 265 </test> | |
| 266 <test expect_num_outputs="3"> | |
| 267 <!-- test 7: paired-collection fasta by fraction reads --> | |
| 268 <conditional name="input"> | |
| 269 <param name="input_selector" value="paired_collection"/> | |
| 270 <param name="collection"> | |
| 271 <collection type="paired"> | |
| 272 <element name="forward" value="r1.fasta"/> | |
| 273 <element name="reverse" value="r2.fasta"/> | |
| 274 </collection> | |
| 275 </param> | |
| 276 </conditional> | |
| 277 <conditional name="subsample"> | |
| 278 <param name="type" value="frac_reads"/> | |
| 279 <param name="frac" value="0.6"/> | |
| 280 </conditional> | |
| 281 <param name="seed" value="1"/> | |
| 282 <output_collection name="collection_output" type="paired"> | |
| 283 <element name="forward" file="paired1_by_frac_reads.fasta" ftype="fasta"/> | |
| 284 <element name="reverse" file="paired2_by_frac_reads.fasta" ftype="fasta"/> | |
| 285 </output_collection> | |
| 286 </test> | |
| 287 <test expect_num_outputs="1"> | |
| 288 <!-- test 8: bam input --> | |
| 289 <conditional name="input"> | |
| 290 <param name="input_selector" value="aligned"/> | |
| 291 <param name="bam" value="input.bam" /> | |
| 292 </conditional> | |
| 293 <param name="coverage" value="1"/> | |
| 294 <param name="seed" value="1"/> | |
| 295 <output name="bam_output" value="output.bam" ftype="bam"/> | |
| 296 </test> | |
| 297 </tests> | |
| 298 <help><![CDATA[ | |
| 299 | |
| 300 Randomly subsample reads to a specified coverage. Rasusa provides a random subsample of a read file (FASTA or FASTQ), with two ways of | |
| 301 specifying the size of the subset: | |
| 302 | |
| 303 * takes a genome size and the desired coverage | |
| 304 * takes a target number of bases (nucleotides) or fraction of reads to be sampled | |
| 305 ]]></help> | |
| 306 <citations> | |
| 307 <citation type="doi">10.21105/joss.03941</citation> | |
| 308 </citations> | |
| 309 </tool> |
