comparison smudgeplot.xml @ 0:4c91967b3e6d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc commit cbe90253166d9908b68beb36b9488478178d225b
author iuc
date Thu, 30 Jun 2022 22:44:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4c91967b3e6d
1 <tool id="smudgeplot" name="Smudgeplot" version="@TOOL_VERSION@+galaxy+@VERSION_SUFFIX@" profile="21.05">
2 <description>inference of ploidy and heterozygosity structure using whole genome sequencing</description>
3
4 <macros>
5 <token name="@TOOL_VERSION@">0.2.5</token>
6 <token name="@VERSION_SUFFIX@">1</token>
7 </macros>
8
9 <xrefs>
10 <xref type="bio.tools">smudgeplots</xref>
11 </xrefs>
12
13 <requirements>
14 <requirement type="package" version="@TOOL_VERSION@">smudgeplot</requirement>
15 <requirement type="package" version="2.3.0">kmer-jellyfish</requirement>
16 </requirements>
17
18 <command detect_errors="exit_code"><![CDATA[
19
20 #if $file.input.input_select == 'reads'
21
22 ## ~~~~~~~~~~~~~~~ Generate kmer-dump with presets ~~~~~~~~~~~~~~~~~~~~~
23
24 ## Jellyfish kmer count
25 ## ---------------------------------------------------------------------
26
27 #if $file.input.reads[0].is_of_type("fastqsanger.gz") or $file.input.reads[0].is_of_type("fasta.gz")
28 gunzip -c
29 #for $f in $file.input.reads
30 #if $f
31 '$f'
32 #end if
33 | jellyfish count -m 21 -t 4 -s 1M -o 1_counts.jf -C /dev/stdin
34 #end for
35
36 #else
37 jellyfish count -m 21 -t 4 -s 1M -o 1_counts.jf -C
38 #for $f in $file.input.reads
39 #if $f
40 '$f'
41 #end if
42 #end for
43 #end if
44
45 && jellyfish histo 1_counts.jf > 1_kmer_k21.hist
46
47 ## Calculate lower and upper kmer count cutoffs
48 ## ---------------------------------------------------------------------
49
50 #if $file.input.lower_cutoff is not None:
51 && L=$file.input.lower_cutoff
52 #else
53 && L=\$(smudgeplot.py cutoff 1_kmer_k21.hist L)
54 #end if
55
56 #if $file.input.upper_cutoff is not None:
57 && U=$file.input.upper_cutoff
58 #else
59 && U=\$(smudgeplot.py cutoff 1_kmer_k21.hist U)
60 #end if
61
62 ## ---------------------------------------------------------------------
63 ## Dump and extract coverage
64
65 && echo "Dump with cutoffs L=\$L, U=\$U"
66 && jellyfish dump -c -L \$L -U \$U 1_counts.jf > 2_dump.jf
67 && smudgeplot.py hetkmers -o 2_kmer_pairs 2_dump.jf
68
69 #else
70
71 ## ~~~~~~~~~~~~~~~~~~~ Use provided kmer dump ~~~~~~~~~~~~~~~~~~~~~~~~~~
72
73 smudgeplot.py hetkmers -o 2_kmer_pairs '$file.input.dump'
74
75 #end if
76
77 ## ---------------------------------------------------------------------
78 ## Plot
79
80 && smudgeplot.py plot 2_kmer_pairs_coverages.tsv -o my_genome
81
82 ]]></command>
83
84 <inputs>
85 <section name="file" title="File inputs" expanded="true">
86 <conditional name="input">
87 <param
88 name="input_select" type="select" label="Select input type"
89 help="For more control, create your own Kmer dump using Jellyfish.
90 See Smudgeplot on GitHub for more details: https://github.com/KamilSJaron/smudgeplot"
91 >
92 <option value="reads" selected="true">Sequencing reads</option>
93 <option value="dump">Kmer dump file</option>
94 </param>
95
96 <when value="reads">
97 <param
98 name="reads" type="data" format="fastqsanger,fastqsanger.gz,fasta.gz,fasta"
99 label="Sequencing reads" multiple="true"
100 help="Sequencing reads corresponding to your genome.
101 Don't worry about read pairing as it is not used in Kmer-counting.
102 If selecting multiple datasets, please do not mix datatypes!"
103 />
104
105 <param
106 name="lower_cutoff"
107 label="Lower kmer cutoff"
108 type="integer"
109 optional="true"
110 help="Optionally set a manual lower limit for filtering kmers with
111 smudgeplot hetkmers. If no value is set, a cutoff will be
112 estimated with smudgeplot cutoff. Use the GenomeScope tool to
113 visualize your kmer histogram when choosing cutoff values."
114 />
115
116 <param
117 name="upper_cutoff"
118 label="Upper kmer cutoff"
119 type="integer"
120 optional="true"
121 help="Optionally set a manual upper limit for filtering kmers with
122 smudgeplot hetkmers. If no value is set, a cutoff will be
123 estimated with smudgeplot cutoff. Use the GenomeScope tool to
124 visualize your kmer histogram when choosing cutoff values."
125 />
126 </when>
127
128 <when value="dump">
129 <param
130 name="dump" type="data" format="txt"
131 label="Kmer dump"
132 help="Upload your own Kmer dump file created with the Jellyfish or KMC tool.
133 This enables control over kmer-counting parameters."
134 />
135 </when>
136 </conditional>
137 </section>
138
139 <param name="table_output" type="boolean" label="Output summary table"></param>
140 <param name="verbose_output" type="boolean" label="Output verbose summary"></param>
141 <param name="warnings_output" type="boolean" label="Output genome warnings"></param>
142 </inputs>
143
144 <outputs>
145 <data
146 name="smudgeplot" format="png"
147 from_work_dir="my_genome_smudgeplot.png"
148 label="${tool.name} on ${on_string}: Smudgeplot"
149 />
150 <data
151 name="smudgeplot_log" format="png"
152 from_work_dir="my_genome_smudgeplot_log10.png"
153 label="${tool.name} on ${on_string}: Smudgeplot (log10)"
154 />
155 <data
156 name="genome_summary" format="tabular"
157 from_work_dir="my_genome_summary_table.tsv"
158 label="${tool.name} on ${on_string}: Genome summary table"
159 >
160 <filter>table_output</filter>
161 </data>
162 <data
163 name="genome_summary_verbose" format="txt"
164 from_work_dir="my_genome_verbose_summary.txt"
165 label="${tool.name} on ${on_string}: Genome verbose summary"
166 >
167 <filter>verbose_output</filter>
168 </data>
169 <data
170 name="genome_warnings" format="txt"
171 from_work_dir="my_genome_warnings.txt"
172 label="${tool.name} on ${on_string}: Genome warnings"
173 >
174 <filter>warnings_output</filter>
175 </data>
176 </outputs>
177
178 <tests>
179 <!-- Standard run -->
180 <test expect_num_outputs="2">
181 <param name="input_select" value="reads"/>
182 <param name="reads" value="test_reads.fasta" ftype="fasta"/>
183 <param name="lower_cutoff" value="2"/>
184 <param name="upper_cutoff" value="25"/>
185 <output name="smudgeplot" ftype="png" file="my_genome_smudgeplot.png" compare="sim_size"/>
186 </test>
187 <!-- Standard run with gzipped input -->
188 <test expect_num_outputs="2">
189 <param name="input_select" value="reads"/>
190 <param name="reads" value="test_reads.fasta.gz" ftype="fasta.gz"/>
191 <param name="lower_cutoff" value="2"/>
192 <param name="upper_cutoff" value="25"/>
193 <output name="smudgeplot" ftype="png" file="my_genome_smudgeplot.png" compare="sim_size"/>
194 </test>
195 <!-- Multiple input read files -->
196 <test expect_num_outputs="2">
197 <param name="input_select" value="reads"/>
198 <param name="lower_cutoff" value="2"/>
199 <param name="upper_cutoff" value="80"/>
200 <param
201 name="reads"
202 value="test_reads.fasta,test_reads_2.fasta,test_reads_3.fasta"
203 ftype="fasta"
204 />
205 <output name="smudgeplot" ftype="png" file="my_genome_smudgeplot.png" compare="sim_size"/>
206 </test>
207 <!-- With additional outputs-->
208 <test expect_num_outputs="5">
209 <param name="input_select" value="reads"/>
210 <param name="reads" value="test_reads.fasta" ftype="fasta"/>
211 <param name="lower_cutoff" value="2"/>
212 <param name="upper_cutoff" value="25"/>
213 <param name="table_output" value="true"/>
214 <param name="verbose_output" value="true"/>
215 <param name="warnings_output" value="true"/>
216 <output name="smudgeplot" ftype="png" file="my_genome_smudgeplot.png" compare="sim_size"/>
217 <output name="genome_summary" ftype="tabular" file="my_genome_summary_table.tsv"/>
218 <output name="genome_summary_verbose" ftype="txt" file="my_genome_verbose_summary.txt"/>
219 <output name="genome_warnings" ftype="txt" file="my_genome_warnings.txt"/>
220 </test>
221 <!-- K-mer dump input -->
222 <test expect_num_outputs="2">
223 <param name="input_select" value="dump"/>
224 <param name="dump" value="dump.jf" ftype="txt"/>
225 <output name="smudgeplot" ftype="png" file="my_genome_smudgeplot.png" compare="sim_size"/>
226 </test>
227 </tests>
228
229 <help><![CDATA[
230
231 .. class:: infomark
232
233 **What it does**
234
235 This tool extracts heterozygous kmer pairs from kmer count databases and performs gymnastics with them. We are able to disentangle genome structure by comparing the sum of kmer pair coverages (CovA + CovB) to their relative coverage (CovB / (CovA + CovB)). Such an approach also allows us to analyze obscure genomes with duplications, various ploidy levels, etc.
236
237 Smudgeplots are computed from raw or even better from trimmed reads and show the haplotype structure using heterozygous kmer pairs. For example:
238
239 .. image:: $PATH_TO_IMAGES/smudge.png
240 :height: 520
241 :alt: Example smudgeplot graph
242
243 Every haplotype structure has a unique smudge on the graph and the heat of the smudge indicates how frequently the haplotype structure is represented in the genome compared to the other structures. The image above is an ideal case, where the sequencing coverage is sufficient to beautifully separate all the smudges, providing very strong and clear evidence of triploidy.
244
245 Please see `Smudgeplot on GitHub <https://github.com/KamilSJaron/smudgeplot>`_
246 for further documentation and tutorials.
247
248 **Inputs**
249
250 You have two choices when running Smudgeplot in Galaxy:
251
252 1. Input reads file(s) for default kmer-counting with Jellyfish
253
254 This should be at least one file which providing coverage of your genome of interest.
255 The tool accepts compressed (.gz) inputs. If choosing this option, you can
256 (optionally) specify manual cutoff values for the kmer dump step. The Smudgeplot
257 docs suggest that you can use GenomeScope on a kmer histogram in order to choose
258 reasonable lower and upper cutoff values.
259
260 2. Input your own kmer dump file for more control of kmer counting parameters
261
262 This file would be created by running ``jellyfish count`` and then ``jellyfish dump`` - the process is well described
263 `on GitHub <https://github.com/KamilSJaron/smudgeplot>`_.
264
265 **Outputs**
266
267 - ``smudgeplot.png`` smudgeplot image
268 - ``smudgeplot_log10.png`` smudgeplot with log scale
269 - ``my_genome_summary.tsv`` summarized genome statistics
270 - ``my_genome_verbose.txt`` detailed genome statistics
271 - ``my_genome_warnings.txt`` warnings emitted from the Smudgeplot tool
272
273 **Default operation**
274
275 If choosing reads as the input, a default kmer counting procedure will be used
276 to create a kmer dump. This default process is summarized as follows:
277
278 - ``jellyfish count -m 21 > counts.jf``
279 - ``jellyfish histo counts.jf > counts.hist``
280 - ``smudgeplot.py cutoff counts.hist`` to get kmer cutoff values (U & L)
281 - ``jellyfish dump -c -L <L> -U <U> counts.jf > dump.jf``
282
283 The kmer dump file is then used to create a smudgeplot:
284
285 - ``smudgeplot.py hetkmers -o kmer_pairs dump.jf``
286 - ``smudgeplot.py plot kmer_pairs_coverages.tsv -o my_genome``
287
288 ]]></help>
289 <citations>
290 <citation type="doi">10.1038/s41467-020-14998-3</citation>
291 </citations>
292 </tool>