comparison rasusa.xml @ 0:6a2965f39e3b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rasusa commit 3a1b13f3f0845f60b4a023fd547a9d2ad0170072
author iuc
date Wed, 10 Jul 2024 17:01:03 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6a2965f39e3b
1 <tool id="rasusa" name="rasusa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
2 <description>Randomly subsample reads to a specified coverage</description>
3 <macros>
4 <token name="@TOOL_VERSION@">2.0.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@FORMATS@">fastqsanger,fastqsanger.gz,fasta,fasta.gz</token>
7 <xml name="size_units">
8 <option value="b">bases</option>
9 <option value="k">Kilo bases</option>
10 <option value="m">Mega bases</option>
11 <option value="g">Giga bases</option>
12 <option value="t">Tera bases</option>
13 </xml>
14 <xml name="params_fastq">
15 <conditional name="subsample">
16 <param name="type" type="select" label="Subsample reads based on">
17 <option value="coverage">Coverage</option>
18 <option value="num_bases">Number of bases</option>
19 <option value="num_reads">Number of reads</option>
20 <option value="frac_reads" selected="true">Fraction of reads</option>
21 </param>
22 <when value="coverage">
23 <param name="genome_size_unit" type="select" label="Specify genome size in">
24 <expand macro="size_units" />
25 </param>
26 <param name="genome_size" type="float" min="0" value="" label="Genome size to calculate coverage with respect to"/>
27 <param argument="--coverage" type="float" min="0" value="" label="The desired coverage to subsample the reads to"/>
28 </when>
29 <when value="num_bases">
30 <param name="num_bases_unit" type="select" label="Specify number of bases in">
31 <expand macro="size_units" />
32 </param>
33 <param name="bases" type="float" min="0" value="" label="Explicitly set the number of bases required"/>
34 </when>
35 <when value="num_reads">
36 <param argument="--num" type="integer" value="" min="1"/>
37 </when>
38 <when value="frac_reads">
39 <param argument="--frac" type="float" value="0.1" min="0" max="1"/>
40 </when>
41 </conditional>
42 </xml>
43 <token name="@FASTQ_SUBSAMPLE_OPTIONS@"><![CDATA[
44 #if str( $subsample.type ) == "coverage":
45 --genome-size '$subsample.genome_size$subsample.genome_size_unit'
46 --coverage $subsample.coverage
47 #elif str( $subsample.type ) == "num_bases":
48 --bases '$subsample.bases$subsample.num_bases_unit'
49 #elif str( $subsample.type ) == "num_reads":
50 --num $subsample.num
51 #elif str( $subsample.type ) == "frac_reads":
52 --frac $subsample.frac
53 #end if
54 #if $r1_ext.endswith(".gz") or $r2_ext.endswith(".gz")
55 --output-type g
56 #end if ]]>
57 </token>
58 </macros>
59 <xrefs>
60 <xref type='bio.tools'>rasusa</xref>
61 </xrefs>
62 <requirements>
63 <requirement type="package" version="@TOOL_VERSION@">rasusa</requirement>
64 <requirement type="package" version="1.20">samtools</requirement>
65 </requirements>
66
67 <command detect_errors="exit_code"><![CDATA[
68 #if str( $input.input_selector ) == "aligned":
69 ln -s '$bam' 'input.bam' &&
70 ln -s '$bam.metadata.bam_index' 'input.bam.bai' &&
71 rasusa aln
72 --coverage $input.coverage
73 --step-size $input.step_size
74 #else:
75 rasusa reads
76 #end if
77
78 #if $seed
79 -s $seed
80 #end if
81
82 #if str( $input.input_selector ) == "paired":
83 #set r1_ext = $input.reads1.extension
84 #set r2_ext = $input.reads2.extension
85 -o 'paired_out1.$r1_ext'
86 -o 'paired_out2.$r2_ext'
87 @FASTQ_SUBSAMPLE_OPTIONS@
88 '${input.reads1}'
89 '${input.reads2}' &&
90 mv 'paired_out1.$r1_ext' '$paired_output1' &&
91 mv 'paired_out2.$r2_ext' '$paired_output2'
92
93 #elif str( $input.input_selector ) == "paired_collection":
94 #set r1_ext = $input.collection.forward.extension
95 #set r2_ext = $input.collection.reverse.extension
96 -o 'paired_out1.$r1_ext'
97 -o 'paired_out2.$r2_ext'
98 @FASTQ_SUBSAMPLE_OPTIONS@
99 '${input.collection.forward}'
100 '${input.collection.reverse}' &&
101 mv 'paired_out1.$r1_ext' '${collection_output.forward}' &&
102 mv 'paired_out2.$r2_ext' '${collection_output.reverse}'
103
104 #elif str( $input.input_selector ) == "single":
105 #set r1_ext = $input.reads.extension
106 -o 'single_out.$r1_ext'
107 @FASTQ_SUBSAMPLE_OPTIONS@
108 '${input.reads}' &&
109 mv 'single_out.$r1_ext' '$single_output'
110
111 #elif str( $input.input_selector ) == "aligned":
112 'input.bam' | samtools sort --no-PG -@ 1 -T '\${TMPDIR:-.}' -O bam -o '$bam_output' -
113 #end if
114 ]]></command>
115 <inputs>
116 <conditional name="input">
117 <param name="input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
118 <option value="paired">Paired-end FASTQ</option>
119 <option value="single">Single-end FASTQ</option>
120 <option value="paired_collection">Paired FASTQ Collection</option>
121 <option value="aligned">BAM file of aligned reads</option>
122 </param>
123 <when value="paired">
124 <param name="reads1" type="data" format="@FORMATS@" label="Select first set of reads" help="Specify dataset with forward reads"/>
125 <param name="reads2" type="data" format="@FORMATS@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
126 <expand macro="params_fastq" />
127 </when>
128 <when value="single">
129 <param name="reads" type="data" format="@FORMATS@" label="Select fasta/fastq dataset" help="Specify dataset with single reads"/>
130 <expand macro="params_fastq" />
131 </when>
132 <when value="paired_collection">
133 <param name="collection" format="@FORMATS@" type="data_collection" collection_type="paired" label="Select a paired collection"/>
134 <expand macro="params_fastq" />
135 </when>
136 <when value="aligned">
137 <param name="bam" format="sam,bam" type="data" label="Select BAM file(s) with alignments"/>
138 <param argument="--coverage" type="integer" min="0" optional="true" value="" label="The desired depth of coverage to subsample the alignment to"/>
139 <param type="integer" argument="--step-size" value="100" label="When a region has less than the desired coverage, the step size to move along the chromosome to find more reads."
140 help="The lowest of the step and the minimum end coordinate of the reads in the region will be used. This parameter can have a significant impact on the runtime of the subsampling process."/>
141 </when>
142 </conditional>
143 <param type="integer" argument="--seed" optional="true" label="Random seed to use"/>
144 </inputs>
145 <outputs>
146 <data name="paired_output1" label="${tool.name} on ${on_string}: paired-end r1" format_source="reads1">
147 <filter>input['input_selector'] == "paired"</filter>
148 </data>
149 <data name="paired_output2" label="${tool.name} on ${on_string}: paired-end R2" format_source="reads2">
150 <filter>input['input_selector'] == "paired"</filter>
151 </data>
152 <data name="single_output" label="${tool.name} on ${on_string}: single-end" format_source="reads">
153 <filter>input['input_selector'] == 'single'</filter>
154 </data>
155 <collection name="collection_output" type="paired" label="${tool.name} on ${on_string}: paired-collection">
156 <filter>input['input_selector'] == "paired_collection"</filter>
157 <data name="forward" label="${tool.name} on ${input.collection.forward.name}: paired-end r1" format_source="collection['forward']"/>
158 <data name="reverse" label="${tool.name} on ${input.collection.reverse.name}: paired-end R2" format_source="collection['reverse']"/>
159 </collection>
160 <data name="bam_output" label="${tool.name} on ${on_string}: BAM" format="bam">
161 <filter>input['input_selector'] == 'aligned'</filter>
162 </data>
163 </outputs>
164 <tests>
165 <test expect_num_outputs="1">
166 <!-- test 1: single-end fastq by coverage in bases -->
167 <conditional name="input">
168 <param name="input_selector" value="single"/>
169 <param name="reads" value="r1.fastq.gz"/>
170 </conditional>
171 <conditional name="subsample">
172 <param name="type" value="coverage"/>
173 <param name="genome_size_unit" value="b"/>
174 <param name="genome_size" value="1000"/>
175 <param name="coverage" value="1"/>
176 </conditional>
177 <param name="seed" value="1"/>
178 <output name="single_output" value="single_by_coverage_b.fastq.gz" ftype="fastqsanger.gz"/>
179 </test>
180 <test expect_num_outputs="2">
181 <!-- test 2: paired-end fastq by coverage in kb -->
182 <conditional name="input">
183 <param name="input_selector" value="paired"/>
184 <param name="reads1" value="r1.fastq.gz"/>
185 <param name="reads2" value="r2.fastq.gz"/>
186 </conditional>
187 <conditional name="subsample">
188 <param name="type" value="coverage"/>
189 <param name="genome_size_unit" value="k"/>
190 <param name="genome_size" value="1"/>
191 <param name="coverage" value="1"/>
192 </conditional>
193 <param name="seed" value="1"/>
194 <output name="paired_output1" value="paired1_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/>
195 <output name="paired_output2" value="paired2_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/>
196 </test>
197 <test expect_num_outputs="3">
198 <!-- test 3: paired-collection fastq by coverage in mb -->
199 <conditional name="input">
200 <param name="input_selector" value="paired_collection"/>
201 <param name="collection">
202 <collection type="paired">
203 <element name="forward" value="r1.fastq.gz"/>
204 <element name="reverse" value="r2.fastq.gz"/>
205 </collection>
206 </param>
207 </conditional>
208 <conditional name="subsample">
209 <param name="type" value="coverage"/>
210 <param name="genome_size_unit" value="m"/>
211 <param name="genome_size" value="0.001"/>
212 <param name="coverage" value="1"/>
213 </conditional>
214 <param name="seed" value="1"/>
215 <output_collection name="collection_output" type="paired">
216 <element name="forward" file="paired1_by_coverage_m.fastq.gz" ftype="fastqsanger.gz"/>
217 <element name="reverse" file="paired2_by_coverage_m.fastq.gz" ftype="fastqsanger.gz"/>
218 </output_collection>
219 </test>
220 <test expect_num_outputs="1">
221 <!-- test 4: single-end fasta by coverage in gb -->
222 <conditional name="input">
223 <param name="input_selector" value="single"/>
224 <param name="reads" value="r1.fasta.gz"/>
225 </conditional>
226 <conditional name="subsample">
227 <param name="type" value="coverage"/>
228 <param name="genome_size_unit" value="g"/>
229 <param name="genome_size" value="0.001"/>
230 <param name="coverage" value="0.001"/>
231 </conditional>
232 <param name="seed" value="1"/>
233 <output name="single_output" value="single_end_by_coverage_g.fasta" ftype="fasta.gz"/>
234 </test>
235 <test expect_num_outputs="2">
236 <!-- test 5: paired-end fastq by number of bases -->
237 <conditional name="input">
238 <param name="input_selector" value="paired"/>
239 <param name="reads1" value="r1.fastq"/>
240 <param name="reads2" value="r2.fastq"/>
241 </conditional>
242 <conditional name="subsample">
243 <param name="type" value="num_bases"/>
244 <param name="num_bases_unit" value="k"/>
245 <param name="bases" value="2"/>
246 </conditional>
247 <param name="seed" value="1"/>
248 <output name="paired_output1" value="paired1_by_num_bases_k.fastq" ftype="fastqsanger"/>
249 <output name="paired_output2" value="paired2_by_num_bases_k.fastq" ftype="fastqsanger"/>
250 </test>
251 <test expect_num_outputs="2">
252 <!-- test 6: paired-end fasta by number of reads -->
253 <conditional name="input">
254 <param name="input_selector" value="paired"/>
255 <param name="reads1" value="r1.fasta.gz"/>
256 <param name="reads2" value="r2.fasta.gz"/>
257 </conditional>
258 <conditional name="subsample">
259 <param name="type" value="num_reads"/>
260 <param name="num" value="5"/>
261 </conditional>
262 <param name="seed" value="1"/>
263 <output name="paired_output1" value="paired1_by_num_reads.fasta.gz" ftype="fasta.gz"/>
264 <output name="paired_output2" value="paired2_by_num_reads.fasta.gz" ftype="fasta.gz"/>
265 </test>
266 <test expect_num_outputs="3">
267 <!-- test 7: paired-collection fasta by fraction reads -->
268 <conditional name="input">
269 <param name="input_selector" value="paired_collection"/>
270 <param name="collection">
271 <collection type="paired">
272 <element name="forward" value="r1.fasta"/>
273 <element name="reverse" value="r2.fasta"/>
274 </collection>
275 </param>
276 </conditional>
277 <conditional name="subsample">
278 <param name="type" value="frac_reads"/>
279 <param name="frac" value="0.6"/>
280 </conditional>
281 <param name="seed" value="1"/>
282 <output_collection name="collection_output" type="paired">
283 <element name="forward" file="paired1_by_frac_reads.fasta" ftype="fasta"/>
284 <element name="reverse" file="paired2_by_frac_reads.fasta" ftype="fasta"/>
285 </output_collection>
286 </test>
287 <test expect_num_outputs="1">
288 <!-- test 8: bam input -->
289 <conditional name="input">
290 <param name="input_selector" value="aligned"/>
291 <param name="bam" value="input.bam" />
292 </conditional>
293 <param name="coverage" value="1"/>
294 <param name="seed" value="1"/>
295 <output name="bam_output" value="output.bam" ftype="bam"/>
296 </test>
297 </tests>
298 <help><![CDATA[
299
300 Randomly subsample reads to a specified coverage. Rasusa provides a random subsample of a read file (FASTA or FASTQ), with two ways of
301 specifying the size of the subset:
302
303 * takes a genome size and the desired coverage
304 * takes a target number of bases (nucleotides) or fraction of reads to be sampled
305 ]]></help>
306 <citations>
307 <citation type="doi">10.21105/joss.03941</citation>
308 </citations>
309 </tool>