comparison hybpiper.xml @ 0:91a16438e849 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hybpiper commit b439a8bebdd20955135572a15672a12a166d7ff8
author iuc
date Sat, 23 Sep 2023 16:49:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:91a16438e849
1 <tool id="hybpiper" name="HybPiper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Analyse targeted sequence capture data</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9
10 ## sample name checking
11 #import re
12 #def check_sample_name($sample_name):
13 #if re.search(r'[^A-Za-z0-9_\-]', $sample_name):
14 printf '%s\n'
15 'ERROR: special characters detected in sample identifier.'
16 'Identifiers may only contain letters, numbers, underscores and hyphens.'
17 'Check the identifier for the following sample:'
18 '${sample_name}'
19 1>&2
20 &&
21 exit 1
22 &&
23 #end if
24 #end def
25
26 ## set up files
27 ln -s '${targetfile_dna}' ./target_file.fasta
28 &&
29
30 ###############################
31 ## hybpiper check_targetfile ##
32 ###############################
33
34 #if str( $job_conditional.hybpiper_job ) == "check_and_fix_targetfile":
35 hybpiper check_targetfile
36 --targetfile_dna target_file.fasta
37 &&
38
39 mv fix_targetfile*.ctl hybpiper.ctl
40 &&
41
42 hybpiper fix_targetfile
43 --targetfile_dna target_file.fasta
44 --allow_gene_removal
45 hybpiper.ctl
46 &&
47
48 #######################
49 ## hybpiper assemble ##
50 #######################
51
52 #elif str( $job_conditional.hybpiper_job ) == "assemble":
53 #set sample_prefix = str($job_conditional.paired_input.element_identifier)
54
55 $check_sample_name($sample_prefix)
56
57 hybpiper assemble
58 --readfiles
59 '${job_conditional.paired_input.forward}'
60 '${job_conditional.paired_input.reverse}'
61 --targetfile_dna target_file.fasta
62 --diamond
63 --cpu \${GALAXY_SLOTS:-1}
64 --prefix '${sample_prefix}'
65 &&
66
67 tar -cvf '${hybpiper_archive}' --directory='${sample_prefix}' .
68 &&
69
70 #######################################
71 ## hybpiper stats/retrieve_sequences ##
72 #######################################
73
74 #elif str( $job_conditional.hybpiper_job ) == "stats":
75
76 ## check logic of requested items
77 #unless $job_conditional.stats_type_select or $job_conditional.sequence_type_select:
78 printf '%s\n'
79 'ERROR: No outputs selected.'
80 1>&2
81 &&
82 exit 1
83 &&
84 #end unless
85 #if $job_conditional.heatmap and not $job_conditional.stats_type_select:
86 printf '%s\n'
87 'ERROR: heatmap requested, but no stats selected.'
88 1>&2
89 &&
90 exit 1
91 &&
92 #end if
93
94 #for $sample in $job_conditional.hybpiper_results
95 #set sample_prefix = str($sample.element_identifier)
96
97 $check_sample_name($sample_prefix)
98
99 mkdir -p '${sample_prefix}'
100 &&
101
102 tar -xf '${sample}' -C '${sample_prefix}'
103 &&
104
105 echo '${sample_prefix}' >> namelist.txt
106 &&
107 #end for
108
109 ## Produce a stats file for each requested output type
110 #for $stats_output in $job_conditional.stats_type_select:
111 hybpiper stats
112 --targetfile_dna target_file.fasta
113 --stats_filename 'stats.${stats_output}'
114 --seq_lengths_filename 'seq_lengths.${stats_output}'
115 '${stats_output}'
116 namelist.txt
117 &&
118
119 ## Produce heatmaps if selected
120 #if $job_conditional.heatmap:
121 hybpiper recovery_heatmap
122 --heatmap_filename 'heatmap.${stats_output}'
123 --heatmap_filetype svg
124 'seq_lengths.${stats_output}.tsv'
125 &&
126 #end if
127 #end for
128
129 ## Produce sequences for each requested type
130 #for $sequence_output in $job_conditional.sequence_type_select:
131 mkdir 'fasta.${sequence_output}'
132 &&
133 hybpiper retrieve_sequences
134 --targetfile_dna target_file.fasta
135 --sample_names namelist.txt
136 --fasta_dir 'fasta.${sequence_output}'
137 '${sequence_output}'
138 &&
139 #end for
140 #end if
141
142 wait
143
144 ]]></command>
145
146 <inputs>
147 <param argument="--targetfile_dna" type="data" format="fasta" label="Target file" help="Target file in FASTA format" />
148
149 <conditional name="job_conditional">
150 <param name="hybpiper_job" type="select" label="Type of hybpiper run">
151 <option value="check_and_fix_targetfile">Check and fix targetfile</option>
152 <option value="assemble" selected="true">Assemble target loci</option>
153 <option value="stats">Extract sequences and/or stats from Hybpiper runs</option>
154 </param>
155
156 <when value="check_and_fix_targetfile"/>
157
158 <when value="assemble">
159 <param name="paired_input" format="fastqsanger" type="data_collection" collection_type="paired" label="Input reads" help="Your reads must be in a paired collection. See below for more information." />
160 </when>
161
162 <when value="stats">
163 <param name="hybpiper_results" type="data_collection" collection_type="list" format="tar" multiple="true" label="Results from Hybpiper assemble runs" />
164 <param name="stats_type_select" type="select" label="Choose statistics to report" display="checkboxes" multiple="true" optional="true">
165 <option value="gene" selected="true">Gene</option>
166 <option value="supercontig">Supercontig</option>
167 </param>
168 <param name="heatmap" type="boolean" checked="false" label="Produce a heatmap for each of the selected statistics" />
169 <param name="sequence_type_select" type="select" display="checkboxes" label="Choose sequences to extract" multiple="true" optional="true">
170 <option value="dna" selected="true">DNA</option>
171 <option value="aa">Amino acid</option>
172 <option value="intron">Intron</option>
173 <option value="supercontig">Supercontig</option>
174 </param>
175 </when>
176 </conditional>
177 </inputs>
178
179 <outputs>
180 <!-- check_targetfile output -->
181 <data name="fixed_targetfile" label="${targetfile_dna.element_identifier} (fixed)" format="fasta" from_work_dir="target_file_fixed.fasta">
182 <filter>job_conditional['hybpiper_job'] == 'check_and_fix_targetfile'</filter>
183 </data>
184 <collection type="list" name="output_targetfile" label="Hybpiper logs for ${targetfile_dna.element_identifier}">
185 <data name="targetfile_ctl_file" label="Hybpiper .ctl file for ${on_string}" format="txt" from_work_dir="hybpiper.ctl" />
186 <data name="targetfile_report" label="Hybpiper targetfile report" format="tabular" from_work_dir="fix_targetfile_report.tsv" />
187 <filter>job_conditional['hybpiper_job'] == 'check_and_fix_targetfile'</filter>
188 </collection>
189
190 <!-- assemble output -->
191 <data name="hybpiper_archive" format="tar">
192 <filter>job_conditional['hybpiper_job'] == 'assemble'</filter>
193 </data>
194
195 <!-- stats / stats output -->
196 <collection name="hybpiper_stats" type="list" label="Hybpiper statistics">
197 <data name="stats_gene" label="Hybpiper statistics (gene)" format="tabular" from_work_dir="stats.gene.tsv">
198 <actions>
199 <action name="column_names" type="metadata" default="Name,NumReads,ReadsMapped,PctOnTarget,GenesMapped,GenesWithContigs,GenesWithSeqs,GenesAt25pct,GenesAt50pct,GenesAt75pct,GenesAt150pct,ParalogWarningsLong,ParalogWarningsDepth,GenesWithoutStitchedContigs,GenesWithStitchedContigs,GenesWithStitchedContigsSkipped,GenesWithChimeraWarning,TotalBasesRecovered" />
200 </actions>
201 </data>
202 <data name="stats_supercontig" label="Hybpiper statistics (supercontig)" format="tabular" from_work_dir="stats.supercontig.tsv">
203 <actions>
204 <action name="column_names" type="metadata" default="Name,NumReads,ReadsMapped,PctOnTarget,GenesMapped,GenesWithContigs,GenesWithSeqs,GenesAt25pct,GenesAt50pct,GenesAt75pct,GenesAt150pct,ParalogWarningsLong,ParalogWarningsDepth,GenesWithoutStitchedContigs,GenesWithStitchedContigs,GenesWithStitchedContigsSkipped,GenesWithChimeraWarning,TotalBasesRecovered" />
205 </actions>
206 </data>
207 <data name="seqlengths_gene" label="Assembled sequence lengths (gene)" format="tabular" from_work_dir="seq_lengths.gene.tsv"/>
208 <data name="seqlengths_supercontig" label="Assembled sequence lengths (supercontig)" format="tabular" from_work_dir="seq_lengths.supercontig.tsv">
209 </data>
210 <filter>job_conditional['hybpiper_job'] == 'stats' and ('gene' in job_conditional['stats_type_select'] or 'supercontig' in job_conditional['stats_type_select'])</filter>
211 </collection>
212
213 <!-- stats/heatmap output -->
214 <collection name="hybpiper_heatmaps" type="list" label="Hybpiper heatmaps">
215 <discover_datasets pattern="heatmap\.(?P&lt;designation&gt;.+)\.svg" format="svg" recurse="false" />
216 <filter>job_conditional['hybpiper_job'] == 'stats' and job_conditional['heatmap'] and job_conditional['heatmap'] is true</filter>
217 </collection>
218
219 <!-- stats/sequences output -->
220 <collection name="dna_sequences" type="list" label="DNA sequences">
221 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.FNA" format="fasta" directory="fasta.dna" recurse="false" />
222 <filter>job_conditional['hybpiper_job'] == 'stats' and 'dna' in job_conditional['sequence_type_select']</filter>
223 </collection>
224 <collection name="aa_sequences" type="list" label="Amino acid sequences">
225 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.FAA" format="fasta" directory="fasta.aa" recurse="false" />
226 <filter>job_conditional['hybpiper_job'] == 'stats' and 'aa' in job_conditional['sequence_type_select']</filter>
227 </collection>
228 <collection name="intron_sequences" type="list" label="Intron sequences">
229 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" format="fasta" directory="fasta.intron" recurse="false" />
230 <filter>job_conditional['hybpiper_job'] == 'stats' and 'intron' in job_conditional['sequence_type_select']</filter>
231 </collection>
232 <collection name="supercontig_sequences" type="list" label="Supercontig sequences">
233 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" format="fasta" directory="fasta.supercontig" recurse="false" />
234 <filter>job_conditional['hybpiper_job'] == 'stats' and 'supercontig' in job_conditional['sequence_type_select']</filter>
235 </collection>
236
237 <!-- dummy output, in case the user deselects everything -->
238 <data name="dummy_output" label="Stats or sequences from Hybpiper runs" from_work_dir="namelist.txt" format="txt">
239 <filter>job_conditional['hybpiper_job'] == 'stats' and not (job_conditional['stats_type_select'] or job_conditional['sequence_type_select']) </filter>
240 </data>
241
242 </outputs>
243 <tests>
244
245 <!-- test1: check and fix targetfile -->
246 <test expect_num_outputs="4">
247 <param name="targetfile_dna" value="test_targets.fasta.gz"/>
248 <conditional name="job_conditional">
249 <param name="hybpiper_job" value="check_and_fix_targetfile"/>
250 </conditional>
251 <output name="fixed_targetfile" file="test1_out.fasta"/>
252 <output_collection name="output_targetfile" type="list" count="2">
253 <element name="targetfile_ctl_file" file="test1_out.ctl"/>
254 <element name="targetfile_report" file="test1_out.tsv"/>
255 </output_collection>
256 </test>
257
258 <!-- test2: assemble with paired collection -->
259 <!-- Not possible to test stats unless element_identifier can be set. -->
260 <test expect_failure="true">
261 <param name="targetfile_dna" value="test_targets.fasta.gz"/>
262 <conditional name="job_conditional">
263 <param name="hybpiper_job" value="assemble"/>
264 <param name="paired_input">
265 <collection type="paired">
266 <element name="forward" ftype="fastqsanger.gz" value="NZ874_R1_test.fastq.gz" />
267 <element name="reverse" ftype="fastqsanger.gz" value="NZ874_R2_test.fastq.gz" />
268 </collection>
269 </param>
270 </conditional>
271 <!-- <output name="hybpiper_archive">
272 <assert_contents>
273 <has_size value="2386944" delta="200000" />
274 </assert_contents>
275 </output> -->
276 </test>
277
278 <!-- test3: all stats output -->
279 <test expect_num_outputs="10">
280 <param name="targetfile_dna" value="test_targets.fasta.gz"/>
281 <conditional name="job_conditional">
282 <param name="hybpiper_job" value="stats"/>
283 <param name="hybpiper_results" >
284 <collection type="list">
285 <element name="NZ874" value="NZ874.tar.gz" />
286 </collection>
287 </param>
288 <param name="stats_type_select" value="gene,supercontig"/>
289 <param name="heatmap" value="true"/>
290 <param name="sequence_type_select" value="dna,aa,intron,supercontig"/>
291 </conditional>
292 <output_collection name="hybpiper_stats" type="list" count="4" />
293 <output_collection name="hybpiper_heatmaps" type="list" count="2">
294 </output_collection>
295 <output_collection name="dna_sequences" type="list" count="13">
296 </output_collection>
297 <output_collection name="aa_sequences" type="list" count="13">
298 </output_collection>
299 <output_collection name="intron_sequences" type="list" count="13">
300 </output_collection>
301 <output_collection name="supercontig_sequences" type="list" count="13">
302 </output_collection>
303 </test>
304
305 <!-- test4: no output selected -->
306 <test expect_failure="true">
307 <param name="targetfile_dna" value="test_targets.fasta.gz"/>
308 <conditional name="job_conditional">
309 <param name="hybpiper_job" value="stats"/>
310 <param name="hybpiper_results" >
311 <collection type="list">
312 <element name="NZ874" value="NZ874.tar.gz" />
313 </collection>
314 </param>
315 <param name="stats_type_select" value=""/>
316 <param name="heatmap" value="true"/>
317 <param name="sequence_type_select" value=""/>
318 </conditional>
319 </test>
320
321 </tests>
322 <help><![CDATA[
323
324 Using HybPiper on Galaxy
325 ------------------------
326
327 Input
328 ~~~~~
329
330 On Galaxy, **you have to use paired collections as input** for
331 HybPiper assemblies. HybPiper relies on the directory hierarchy it creates for each
332 sample during assembly. The hierarchy is based on the name of the
333 sample, which you provide to Galaxy as the identifier in the collection.
334
335 Using paired collections
336 ~~~~~~~~~~~~~~~~~~~~~~~~
337
338 If you have your sequencing reads in individual datasets, you can easily organise them into a paired
339 collection. See the Galaxy training material on `using dataset
340 collections <https://gxy.io/GTN:T00146>`__
341 for a step-by-step guide.
342
343 **Note**: because HybPiper uses sample
344 identifiers to create directories, you **can't use special characters**
345 in your sample identifiers. The only allowed characters are letters,
346 numbers, underscores and hyphens.
347
348 You can't use single-end and unpaired reads as input to Hybpiper on Galaxy.
349
350 Running HybPiper
351 ~~~~~~~~~~~~~~~~
352
353 The following HybPiper analyses are available on Galaxy:
354
355 1. Check your target file and fix issues (optional)
356 2. Assemble target loci per-sample
357 3. Extract sequences and summary statistics
358
359 Use the *Type of hybpiper run* drop-down to select an analysis.
360
361 .. class:: infomark
362
363 What it does
364 ------------
365
366 HybPiper was designed for processing targeted sequence capture data. In
367 targeted sequence capture, DNA sequencing libraries are enriched for
368 gene regions of interest. This is used for sequencing many loci
369 simultaneously based on bait sequences.
370
371 HybPiper is a suite of scripts that wrap and connect other tools to
372 extract target sequences from the sequencing reads. The HybPiper
373 pipeline starts with high-throughput sequencing reads (for example from
374 Illumina MiSeq), and assigns them to target genes using DIAMOND. The
375 reads are distributed to separate directories, where they are assembled
376 separately using SPAdes. The main output is a collection of FASTA files
377 of the (in frame) CDS portion of the sample for each target region. You
378 can also generate a separate collections of files with the translated
379 protein sequences, the intronic regions flanking each exon, and putative
380 paralog sequences.
381
382 For more information, please see `the HybPiper
383 wiki <https://github.com/mossmatters/HybPiper/wiki>`__.
384
385
386 ]]></help>
387 <expand macro="citations"/>
388 </tool>