comparison qualifilter.xml @ 0:b694fab47ac7 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/qualifilter commit c1d08b00ccb4837dd592970d2000f5fffe695e9f
author iuc
date Mon, 15 Dec 2025 14:04:25 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b694fab47ac7
1 <tool id="qualifilter" name="QualiFilter" version="@TOOL_VERSION@@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Report QC metrics and sample pass/fail based on user-defined thresholds</description>
3
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7
8 <requirements>
9 <requirement type="package" version="1.0.0">qualifilter</requirement>
10 </requirements>
11
12 <version_command>echo @TOOL_VERSION@</version_command>
13
14 <command detect_errors="exit_code" ><![CDATA[
15 qualifilter
16 --input '$input_file'
17 --attributes '$attributes'
18 --thresholds "{\"Total_reads\": ${total_reads}, \"Coverage_gte_10x_pct\": ${coverage_gte_10x_pct}, \"Contam_pct\": ${contam_max}}"
19 --round '${round}'
20 $derive_reads
21 #if $config
22 --config '$config'
23 #end if
24 --outdir .
25 > qualifilter.log 2>&1
26 ]]></command>
27
28 <inputs>
29 <param name="input_file" type="data" format="tabular" label="Input summary file" />
30
31 <param argument="--attributes" type="select" multiple="true" optional="true"
32 label="QC metrics to include"
33 help="Select which metrics to include in the output. Leave empty to include all.">
34 <option value="Sample">Sample</option>
35 <option value="Total_reads">Total reads</option>
36 <option value="Mapped_reads">Mapped reads</option>
37 <option value="Mapping_pct">Mapping %</option>
38 <option value="Median_depth">Median depth</option>
39 <option value="Coverage_gte_10x_pct">Coverage ≥10x %</option>
40 <option value="GC_pct">GC %</option>
41 <option value="Kraken_top1_pct">Kraken top1 %</option>
42 <option value="Kraken_unclassified_pct">Kraken unclassified %</option>
43 <option value="Contam_pct">Contamination %</option>
44 <option value="QC_status">QC status</option>
45 <option value="Total_reads_pass">Total reads pass</option>
46 <option value="Coverage_gte_10x_pct_pass">Coverage at ≥10x pass</option>
47 <option value="Contam_pct_pass">Contamination pass</option>
48 <option value="MTB_reads">MTB reads</option>
49 <option value="Unclassified_reads">Unclassified reads</option>
50 </param>
51
52 <param name="total_reads" type="float" value="1000000" min="0" label="Minimum total reads" help="Minimum number of sequencing reads required for a sample to pass QC (commonly ≥1M for microbial WGS)." />
53 <param name="coverage_gte_10x_pct" type="float" value="90" min="0" max="100" label="Minimum coverage pct at ≥10x depth" help="Percentage of the genome covered at ≥10x depth. Values ≥90% are generally considered good quality." />
54 <param name="contam_max" type="float" value="5" min="0" max="100" label="Maximum contamination %" help="Maximum proportion of reads not belonging to the target organism (typically ≤5%)." />
55 <param name="round" type="integer" value="2" min="0" label="Rounding precision" help="Number of decimal places used to round numeric values in the output." />
56
57 <param name="config" type="data" format="yaml" optional="true"
58 label="Optional config file"
59 help="Provide a YAML or JSON config file to override default allowed columns and rename map. Only advanced users need this." />
60
61 <param argument="--derive_reads" type="boolean"
62 truevalue="--derive_reads" falsevalue=""
63 label="Derive MTB/unclassified reads" />
64 </inputs>
65
66 <outputs>
67 <data name="qc_matrix_tsv" format="tsv" label="QC Matrix (TSV)" from_work_dir="QC_matrix.tsv" />
68 <data name="qc_matrix_csv" format="csv" label="QC Matrix (CSV)" from_work_dir="QC_matrix.csv" />
69 <data name="log" format="txt" label="QualiFilter Log" from_work_dir="qualifilter.log" />
70 </outputs>
71
72 <tests>
73 <test expect_num_outputs="3">
74 <param name="input_file" value="qc_matrix.tabular" ftype="tabular"/>
75 <param name="attributes" value="Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads" />
76 <param name="total_reads" value="1000000" />
77 <param name="coverage_gte_10x_pct" value="90" />
78 <param name="contam_max" value="5" />
79 <param name="round" value="2" />
80 <param name="derive_reads" value="true" />
81
82 <output name="qc_matrix_tsv" file="QC_matrix.tsv" />
83 <output name="qc_matrix_csv" file="QC_matrix.csv" />
84 <output name="log" file="qualifilter.log" />
85 </test>
86 </tests>
87
88 <help><![CDATA[
89 **What it does**
90
91 This tool extracts sequencing quality control (QC) metrics from a MultiQC tabular summary (.tabular) file and generates a consolidated QC matrix containing only the metrics of interest.
92 It summarizes key metrics including Total reads, Mapped reads, Coverage percentage, and Contamination.
93 Each sample is automatically evaluated against user-defined QC thresholds (provided as a JSON string) to assign a QC Pass/Fail status.
94
95 **Input**
96 - A MultiQC-generated .tabular file containing per-sample QC metrics
97 - User-defined thresholds for Total reads, Coverage >=10x percentage, Maximum contamination percentage
98 - You can specify which QC metrics to include in the output using the --attributes option (comma-separated list). If left empty, all available metrics will be included automatically
99 - Optionally, a YAML or JSON config file can be provided to customize allowed columns and rename mappings
100
101 **Available metrics / attributes**
102 - Sample - unique identifier for each sample
103 - Total_reads - total number of sequencing reads
104 - Mapped_reads - reads mapped to the reference genome
105 - Median_depth - median sequencing coverage across the genome
106 - Coverage_gte_10x_pct - percentage of the genome covered at >=10x depth
107 - GC_pct - GC content percentage of reads
108 - Kraken_top1_pct - percentage of reads assigned to the top taxonomic hit by Kraken
109 - Kraken_unclassified_pct - percentage of reads unclassified by Kraken
110 - Contam_pct - estimated contamination percentage
111 - QC_status - Pass/Fail status of the sample based on thresholds
112 - MTB_reads (optional, derived if --derive_reads is selected) - reads assigned to the target organism
113 - Unclassified_reads (optional, derived if --derive_reads is selected) - reads that could not be classified
114
115 **Output**
116 - A summarized QC matrix in TSV format
117 - A summarized QC matrix in CSV format
118 - Both outputs include Pass/Fail status for each sample based on the threshold evaluation
119
120 **Threshold behavior**
121 - Thresholds are provided as a JSON-formatted string. Example: {"Total_reads": 1000000, "Coverage_gte_10x_pct": 90, "Contam_pct": 5}
122
123 **Optional configuration file**
124
125 - An optional YAML or JSON configuration file can be supplied for advanced use cases where the default behavior needs to be customized. This file allows users to:
126 - Define custom allowed columns
127 - Rename columns in the output matrix
128
129 Example YAML::
130
131 allowed_columns:
132 - Sample
133 - Total_reads
134 - Coverage_gte_10x_pct
135 - Contam_pct
136
137 rename_map:
138 qualimap_bamqc-total_reads: Total_reads
139 qualimap_bamqc-mapped_reads: Mapped_reads
140 qualimap_bamqc-percentage_aligned: Mapping_pct
141 qualimap_bamqc-median_coverage: Median_depth
142
143 **Additional Notes**
144 - Read count fields (Total_reads, Mapped_reads) are automatically scaled if MultiQC reports them in millions (e.g., Qualimap output). No action is required
145 - If no QC metric attributes are selected, the tool includes all available columns
146 - Derived read metrics (MTB_reads, Unclassified_reads) are calculated only when the relevant option is enabled
147 - Default thresholds: Total reads >= 1000000, Coverage >=10x percentage >= 90, Contamination percentage <= 5
148 - Rounding precision for numeric metrics can be adjusted (default is 2 decimal places)
149 - The tool generates a log file documenting the processing steps and any issues encountered
150
151 ]]></help>
152
153 <citations>
154 <citation type="bibtex">
155 @misc{bntozini2025,
156 author = {Buhle Ntozini},
157 year = {2025},
158 title = {QualiFilter: QC matrix extractor and decision tool},
159 publisher = {GitHub},
160 journal = {GitHub repository},
161 url = {https://github.com/buhlentozini/QualiFilter}
162 }
163 </citation>
164 </citations>
165 </tool>