comparison sequali.xml @ 0:51c7fadc13e4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sequali commit 035d59f19f3a55032be215a3a226c83ad167059c
author iuc
date Thu, 10 Apr 2025 16:42:20 +0000
parents
children 845bd6c1c0c1
comparison
equal deleted inserted replaced
-1:000000000000 0:51c7fadc13e4
1 <tool id="sequali" name="sequali" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>Fast sequencing data quality metrics for short and long reads</description>
3 <macros>
4 <token name="@TOOL_VERSION@">0.12.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <xrefs>
8 <xref type="bio.tools">sequali</xref>
9 </xrefs>
10 <requirements>
11 <requirement type="package" version="@TOOL_VERSION@">sequali</requirement>
12 </requirements>
13 <version_command><![CDATA[
14 sequali --version
15 ]]>
16 </version_command>
17 <command detect_errors="exit_code"><![CDATA[
18 #import re
19 #if $input_reads.is_collection:
20 #set $input_1 = re.sub('[^\w\-_\.]', '_', $input_reads[0].element_identifier)
21 #set $input_2 = re.sub('[^\w\-_\.]', '_', $input_reads[1].element_identifier)
22 ln -s '${input_reads[0]}' ${input_1} &&
23 ln -s '${input_reads[1]}' ${input_2} &&
24 #else if $input_type_selector == 'single':
25 #set $input_1 = re.sub('[^\w\-_\.]', '_', str($input_reads.element_identifier))
26 ln -s '${input_reads}' ${input_1} &&
27 #set $input_2 = ''
28 #else if $input_type_selector == 'paired':
29 #set $input_1 = re.sub('[^\w\-_\.]', '_', str($input_reads.element_identifier))
30 ln -s '${input_reads}' '$input_1' &&
31 #set $input_2 = re.sub('[^\w\-_\.]', '_', str($input_reads_rev.element_identifier))
32 ln -s '${input_reads_rev}' '$input_2' &&
33 #end if
34
35 mkdir -p '${html_report.files_path}' &&
36 sequali
37 #if $adapter:
38 --adapter-file '${adapter}'
39 #end if
40 #if str($overrepresentation_threshold) != '':
41 --overrepresentation-threshold-fraction '${overrepresentation_threshold}'
42 #end if
43 #if str($overrep_min_threshold) != '':
44 --overrepresentation-min-threshold '${overrep_min_threshold}'
45 #end if
46 #if str($overrep_max_threshold) != '':
47 --overrepresentation-max-threshold '${overrep_max_threshold}'
48 #end if
49 #if str($overrep_max_unique_fragments) != '':
50 --overrepresentation-max-unique-fragments '${overrep_max_unique_fragments}'
51 #end if
52 #if str($overrep_fragment_length) != '':
53 --overrepresentation-fragment-length '${overrep_fragment_length}'
54 #end if
55 #if str($overrep_sample_every) != '':
56 --overrepresentation-sample-every '${overrep_sample_every}'
57 #end if
58 #if str($dup_max_stored_fingerprints) != '':
59 --duplication-max-stored-fingerprints '${dup_max_stored_fingerprints}'
60 #end if
61 #if str($fp_front_length) != '':
62 --fingerprint-front-length '${fp_front_length}'
63 #end if
64 #if str($fp_back_length) != '':
65 --fingerprint-back-length '${fp_back_length}'
66 #end if
67 #if str($fp_front_offset) != '':
68 --fingerprint-front-offset '${fp_front_offset}'
69 #end if
70 #if str($fp_back_offset) != '':
71 --fingerprint-back-offset '${fp_back_offset}'
72 #end if
73 --html '$html_report'
74 --json '$json_report'
75 --threads \${GALAXY_SLOTS:-2}
76 '${input_1}'
77 #if $input_2:
78 '${input_2}'
79 #end if
80
81 ]]></command>
82 <inputs>
83 <conditional name="input_type">
84 <param name="input_type_selector" type="select" label="Single file or paired-end reads" help="Select between paired-end reads and single file (fastq, ubam) to process">
85 <option value="single">Single file</option>
86 <option value="paired">Paired-end</option>
87 <option value="paired_collection">Paired Collection</option>
88 </param>
89 <when value="single">
90 <param name="input_reads" type="data" format="fastq,fastq.gz,unsorted.bam" label="Select fastq dataset" help="Specify dataset with single reads (fastq or unaligned bam)."/>
91 </when>
92 <when value="paired">
93 <param name="input_reads" type="data" format="fastq,fastq.gz" label="Forward reads" help="Specify FASTQ(.gz) dataset with forward reads."/>
94 <param name="input_reads_rev" type="data" format="fastq,fastq.gz" label="Reverse reads" help="Specify FASTQ(.gz) dataset with reverse reads."/>
95 </when>
96 <when value="paired_collection">
97 <param name="input_reads" type="data_collection" format="fastq,fastq.gz" label="Collection with paired-end reads" help="Specify collection with paired-end reads."/>
98 </when>
99 </conditional>
100 <param argument="--adapter-file" name="adapter" format="tabular" type="data" optional= "true" label="Adapters to search for"
101 help="TSV file with header: Name, Sequencing Technology, Probe sequence, sequence position. Default: https://github.com/rhpvorderman/sequali/tree/develop/src/sequali/adapters/adapter_list.tsv"/>
102 <section name="overrepresentation-param" title="Overrepresentation parameters" expanded="False">
103 <param argument="--overrepresentation-threshold-fraction" name="overrepresentation_threshold" type="float" optional="true" min="0" max="1"
104 label="Fraction to be determined as overrepresented" help="The threshold is calculated as fraction times the number of sampled sequences. Default: 0.001 (1 in 1,000)."/>
105 <param argument="--overrepresentation-min-threshold" name="overrep_min_threshold" type="integer" optional="true" min="0"
106 label="Minimum occurrences to be considered overrepresented"
107 help="The minimum amount of occurrences for a sequence to be considered overrepresented, regardless of the bound set by the threshold fraction. Useful for smaller files. Default: 100."/>
108 <param argument="--overrepresentation-max-threshold" name="overrep_max_threshold" type="integer" optional="true" min="0"
109 label="Amount of occurrences to be considered overrepresented"
110 help="The amount of occurrences for a sequence to be considered overrepresented, regardless of the bound set by the threshold fraction. Useful for very large files. Default: unlimited."/>
111 <param argument="--overrepresentation-max-unique-fragments" name="overrep_max_unique_fragments" type="integer" optional="true" min="0"
112 label="Maximum amount of unique fragments to store"
113 help="Larger amounts increase the sensitivity of finding overrepresented sequences at the cost of increasing memory usage. Default: 5,000,000."/>
114 <param argument="--overrepresentation-fragment-length" name="overrep_fragment_length" type="integer" optional="true" min="3" max="31"
115 label="Length of fragments to sample"
116 help="The maximum is 31. Default: 21."/>
117 <param argument="--overrepresentation-sample-every" name="overrep_sample_every" type="integer" optional="true"
118 label="Sample one every N sequences"
119 help="More sequences sampled leads to better precision, lower speed, and also more bias towards the beginning of the file as the fragment store gets filled up with more sequences from the beginning. Default is N=8, so one in 8 sequences is analysed."/>
120 </section>
121 <section name="duplication-param" title="Duplication parameters" expanded="False">
122 <param argument="--duplication-max-stored-fingerprints" name="dup_max_stored_fingerprints" type="integer" optional="true" min="0"
123 label="Maximum number of fingerprints stored"
124 help="Determines how many fingerprints are maximally stored to estimate the duplication rate. More fingerprints leads to a more accurate estimate, but also more memory usage. Default: 1,000,000."/>
125 <param argument="--fingerprint-front-length" name="fp_front_length" type="integer" optional="true" min="0"
126 label="Number of bases from the front of the sequence"
127 help="Number of bases to be taken for the deduplication fingerprint from the front of the sequence. Default: 8."/>
128 <param argument="--fingerprint-back-length" name="fp_back_length" type="integer" optional="true" min="0"
129 label="Number of bases from the back of the sequence"
130 help="Number of bases to be taken for the deduplication fingerprint from the back of the sequence. Default: 8."/>
131 <param argument="--fingerprint-front-offset" name="fp_front_offset" type="integer" optional="true" min="0"
132 label="Front offset of the deduplication fingerprint"
133 help="Useful for avoiding adapter sequences. Default: 64 for single end, 0 for paired sequences."/>
134 <param argument="--fingerprint-back-offset" name="fp_back_offset" type="integer" optional="true" min="0"
135 label="Back offset of the deduplication fingerprint"
136 help="Useful for avoiding adapter sequences. Default: 64 for single end, 0 for paired sequences."/>
137 </section>
138 </inputs>
139 <outputs>
140 <data format="html" name="html_report" label="${tool.name} on ${on_string}: HTML report" />
141 <data format="json" name="json_report" label="${tool.name} on ${on_string}: JSON report" />
142 </outputs>
143 <tests>
144 <test expect_num_outputs="2">
145 <conditional name="input_type">
146 <param name="input_type_selector" value="single" />
147 <param name="input_reads" value="input_fwd.fastq" />
148 </conditional>
149 <output name="html_report" ftype="html">
150 <assert_contents>
151 <has_text text="Sequali report"/>
152 </assert_contents>
153 </output>
154 <output name="json_report" ftype="json">
155 <assert_contents>
156 <has_text text="sequali_version"/>
157 </assert_contents>
158 </output>
159 </test>
160 <test expect_num_outputs="2">
161 <conditional name="input_type">
162 <param name="input_type_selector" value="paired" />
163 <param name="input_reads" value="input_fwd.fastq" />
164 <param name="input_reads_rev" value="input_rev.fastq" />
165 </conditional>
166 <output name="html_report" ftype="html">
167 <assert_contents>
168 <has_text text="Sequali report"/>
169 <has_text text="Filename read 2"/>
170 </assert_contents>
171 </output>
172 <output name="json_report" ftype="json">
173 <assert_contents>
174 <has_text text="sequali_version"/>
175 <has_text text="filename_read2"/>
176 </assert_contents>
177 </output>
178 </test>
179 <test expect_num_outputs="2">
180 <conditional name="input_type">
181 <param name="input_type_selector" value="single" />
182 <param name="input_reads" value="input_nanopore.fastq" />
183 </conditional>
184 <output name="html_report" ftype="html">
185 <assert_contents>
186 <has_text text="Sequali report"/>
187 </assert_contents>
188 </output>
189 <output name="json_report" ftype="json">
190 <assert_contents>
191 <has_text text="sequali_version"/>
192 </assert_contents>
193 </output>
194 </test>
195 <test expect_failure="true">
196 <conditional name="input_type">
197 <param name="input_type_selector" value="single" />
198 <param name="input_reads" value="input_fail.fastq" />
199 </conditional>
200 </test>
201 </tests>
202 <help><![CDATA[
203 .. class:: infomark
204
205 **Purpose**
206
207 Sequali_ is a tool for fast sequencing data quality metrics for short and long reads.
208
209 Features:
210
211 - Informative graphs that allow for judging the quality of a sequence at a quick glance.
212 - Overrepresentation analysis using 21 bp sequence fragments. Overrepresented sequences are checked against the NCBI univec database.
213 - Estimate duplication rate using a fingerprint subsampling technique which is also used in filesystem duplication estimation.
214 - Checks for 6 illumina adapter sequences and 17 nanopore adapter sequences for single read data.
215 - Determines adapters by overlap analysis for paired read data.
216 - Insert size metrics for paired read data.
217 - Per tile quality plots for illumina reads.
218 - Channel and other plots for nanopore reads.
219
220 -----
221
222 **Supported formats**
223
224 - FASTQ. Only the Sanger variation with a phred offset of 33 and the error rate calculation of 10 ^ (-phred/10) is supported. All sequencers use this format today.
225 - Paired end sequencing data is supported.
226 - For sequences called by illumina base callers an additional plot with the per tile quality will be provided.
227 - For sequences called by guppy additional plots for nanopore specific data will be provided.
228 - (unaligned) BAM with single reads. Read-pair information is currently ignored.
229 - For BAM data as delivered by dorado additional nanopore plots will be provided.
230
231 -----
232
233 **Outputs**
234
235 Sequali produces informative HTML report with dynamic plots for each quality metric.
236
237 .. _Sequali: https://sequali.readthedocs.io/en/latest/
238 ]]></help>
239 <citations>
240 <citation type="doi">10.1093/bioadv/vbaf010</citation>
241 </citations>
242 </tool>