changeset 0:b694fab47ac7 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/qualifilter commit c1d08b00ccb4837dd592970d2000f5fffe695e9f
author iuc
date Mon, 15 Dec 2025 14:04:25 +0000
parents
children
files macros.xml qualifilter.xml test-data/QC_matrix.csv test-data/QC_matrix.tsv test-data/qc_matrix.tabular test-data/qualifilter.log
diffstat 6 files changed, 208 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,5 @@
+<macros>
+  <token name="@TOOL_VERSION@">1.0.0</token>
+  <token name="@VERSION_SUFFIX@">0</token>
+  <token name="@PROFILE@">25.0</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qualifilter.xml	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,165 @@
+<tool id="qualifilter" name="QualiFilter" version="@TOOL_VERSION@@VERSION_SUFFIX@" profile="@PROFILE@">
+  <description>Report QC metrics and sample pass/fail based on user-defined thresholds</description>
+
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+
+  <requirements>
+      <requirement type="package" version="1.0.0">qualifilter</requirement>
+  </requirements>
+
+  <version_command>echo @TOOL_VERSION@</version_command>
+
+  <command detect_errors="exit_code" ><![CDATA[
+  qualifilter 
+      --input '$input_file'
+      --attributes '$attributes'
+      --thresholds "{\"Total_reads\": ${total_reads}, \"Coverage_gte_10x_pct\": ${coverage_gte_10x_pct}, \"Contam_pct\": ${contam_max}}"
+      --round '${round}'
+      $derive_reads      
+  #if $config
+      --config '$config' 
+  #end if
+      --outdir . 
+      > qualifilter.log 2>&1
+  ]]></command>
+
+  <inputs>
+    <param name="input_file" type="data" format="tabular" label="Input summary file" />
+
+    <param argument="--attributes" type="select" multiple="true" optional="true"
+           label="QC metrics to include"
+           help="Select which metrics to include in the output. Leave empty to include all.">
+        <option value="Sample">Sample</option>
+        <option value="Total_reads">Total reads</option>
+        <option value="Mapped_reads">Mapped reads</option>
+        <option value="Mapping_pct">Mapping %</option>
+        <option value="Median_depth">Median depth</option>
+        <option value="Coverage_gte_10x_pct">Coverage ≥10x %</option>
+        <option value="GC_pct">GC %</option>
+        <option value="Kraken_top1_pct">Kraken top1 %</option>
+        <option value="Kraken_unclassified_pct">Kraken unclassified %</option>
+        <option value="Contam_pct">Contamination %</option>
+        <option value="QC_status">QC status</option>
+        <option value="Total_reads_pass">Total reads pass</option>
+        <option value="Coverage_gte_10x_pct_pass">Coverage at ≥10x pass</option>
+        <option value="Contam_pct_pass">Contamination pass</option>
+        <option value="MTB_reads">MTB reads</option>
+        <option value="Unclassified_reads">Unclassified reads</option>
+    </param>
+
+    <param name="total_reads" type="float" value="1000000" min="0" label="Minimum total reads" help="Minimum number of sequencing reads required for a sample to pass QC (commonly ≥1M for microbial WGS)." />
+    <param name="coverage_gte_10x_pct" type="float" value="90" min="0" max="100" label="Minimum coverage pct at ≥10x depth" help="Percentage of the genome covered at ≥10x depth. Values ≥90% are generally considered good quality." />
+    <param name="contam_max" type="float" value="5" min="0" max="100" label="Maximum contamination %" help="Maximum proportion of reads not belonging to the target organism (typically ≤5%)." />
+    <param name="round" type="integer" value="2" min="0" label="Rounding precision" help="Number of decimal places used to round numeric values in the output." />
+
+    <param name="config" type="data" format="yaml" optional="true"
+       label="Optional config file"
+       help="Provide a YAML or JSON config file to override default allowed columns and rename map. Only advanced users need this." />
+
+    <param argument="--derive_reads" type="boolean"
+           truevalue="--derive_reads" falsevalue=""
+           label="Derive MTB/unclassified reads" />
+  </inputs>
+
+  <outputs>
+    <data name="qc_matrix_tsv" format="tsv" label="QC Matrix (TSV)" from_work_dir="QC_matrix.tsv" />
+    <data name="qc_matrix_csv" format="csv" label="QC Matrix (CSV)" from_work_dir="QC_matrix.csv" />
+    <data name="log" format="txt" label="QualiFilter Log" from_work_dir="qualifilter.log" />
+  </outputs>
+
+  <tests>
+    <test expect_num_outputs="3">
+      <param name="input_file" value="qc_matrix.tabular" ftype="tabular"/>
+      <param name="attributes" value="Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads" />
+      <param name="total_reads" value="1000000" />
+      <param name="coverage_gte_10x_pct" value="90" />
+      <param name="contam_max" value="5" />
+      <param name="round" value="2" />
+      <param name="derive_reads" value="true" />
+
+      <output name="qc_matrix_tsv" file="QC_matrix.tsv" />
+      <output name="qc_matrix_csv" file="QC_matrix.csv" />
+      <output name="log" file="qualifilter.log" />
+    </test>
+  </tests>
+
+  <help><![CDATA[
+  **What it does**
+  
+  This tool extracts sequencing quality control (QC) metrics from a MultiQC tabular summary (.tabular) file and generates a consolidated QC matrix containing only the metrics of interest. 
+  It summarizes key metrics including Total reads, Mapped reads, Coverage percentage, and Contamination. 
+  Each sample is automatically evaluated against user-defined QC thresholds (provided as a JSON string) to assign a QC Pass/Fail status.
+
+  **Input**
+  - A MultiQC-generated .tabular file containing per-sample QC metrics
+  - User-defined thresholds for Total reads, Coverage >=10x percentage, Maximum contamination percentage
+  - You can specify which QC metrics to include in the output using the --attributes option (comma-separated list). If left empty, all available metrics will be included automatically
+  - Optionally, a YAML or JSON config file can be provided to customize allowed columns and rename mappings
+
+  **Available metrics / attributes**
+  - Sample - unique identifier for each sample
+  - Total_reads - total number of sequencing reads
+  - Mapped_reads - reads mapped to the reference genome
+  - Median_depth - median sequencing coverage across the genome
+  - Coverage_gte_10x_pct - percentage of the genome covered at >=10x depth
+  - GC_pct - GC content percentage of reads
+  - Kraken_top1_pct - percentage of reads assigned to the top taxonomic hit by Kraken
+  - Kraken_unclassified_pct - percentage of reads unclassified by Kraken
+  - Contam_pct - estimated contamination percentage
+  - QC_status - Pass/Fail status of the sample based on thresholds
+  - MTB_reads (optional, derived if --derive_reads is selected) - reads assigned to the target organism
+  - Unclassified_reads (optional, derived if --derive_reads is selected) - reads that could not be classified
+
+  **Output**
+  - A summarized QC matrix in TSV format
+  - A summarized QC matrix in CSV format
+  - Both outputs include Pass/Fail status for each sample based on the threshold evaluation
+
+  **Threshold behavior**
+  - Thresholds are provided as a JSON-formatted string. Example: {"Total_reads": 1000000, "Coverage_gte_10x_pct": 90, "Contam_pct": 5}
+
+  **Optional configuration file**
+
+  - An optional YAML or JSON configuration file can be supplied for advanced use cases where the default behavior needs to be customized. This file allows users to:
+      - Define custom allowed columns
+      - Rename columns in the output matrix
+
+  Example YAML::
+
+      allowed_columns:
+        - Sample
+        - Total_reads
+        - Coverage_gte_10x_pct
+        - Contam_pct
+
+      rename_map:
+        qualimap_bamqc-total_reads: Total_reads
+        qualimap_bamqc-mapped_reads: Mapped_reads
+        qualimap_bamqc-percentage_aligned: Mapping_pct
+        qualimap_bamqc-median_coverage: Median_depth
+
+  **Additional Notes**
+  - Read count fields (Total_reads, Mapped_reads) are automatically scaled if MultiQC reports them in millions (e.g., Qualimap output). No action is required
+  - If no QC metric attributes are selected, the tool includes all available columns
+  - Derived read metrics (MTB_reads, Unclassified_reads) are calculated only when the relevant option is enabled
+  - Default thresholds: Total reads >= 1000000, Coverage >=10x percentage >= 90, Contamination percentage <= 5
+  - Rounding precision for numeric metrics can be adjusted (default is 2 decimal places)
+  - The tool generates a log file documenting the processing steps and any issues encountered
+
+  ]]></help>
+
+  <citations>
+    <citation type="bibtex">
+  @misc{bntozini2025,
+    author = {Buhle Ntozini},
+    year = {2025},
+    title = {QualiFilter: QC matrix extractor and decision tool},
+    publisher = {GitHub},
+    journal = {GitHub repository},
+    url = {https://github.com/buhlentozini/QualiFilter}
+  }
+    </citation>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/QC_matrix.csv	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,11 @@
+Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads
+ERR2510682,2243645.0,2147374.0,95.71,35,96.56,63.95,89.74,3.35,10.26,Fail,True,True,False,2013353.67,75058.8
+ERR4797736,2930841.0,2917004.0,99.53,91,97.28,65.98,96.48,0.51,3.52,Pass,True,True,True,2827782.52,15081.95
+ERR4810698,10001091.0,9936216.0,99.35,344,98.82,66.07,98.49,0.29,1.51,Pass,True,True,True,9850413.37,29200.36
+ERR4812967,3172047.0,3142927.0,99.08,106,98.54,65.01,98.39,0.34,1.61,Pass,True,True,True,3121015.07,10924.71
+ERR4829557,3043795.0,3004399.0,98.71,101,97.21,65.46,97.6,0.73,2.4,Pass,True,True,True,2970882.6,22299.31
+ERR4831263,4383612.0,4331884.0,98.82,147,98.4,64.93,98.54,0.33,1.46,Pass,True,True,True,4319583.07,14558.43
+ERR4831716,3010970.0,2724126.0,90.47,41,96.99,63.61,84.97,4.71,15.03,Fail,True,True,False,2558310.92,141700.63
+ERR8665675,1612980.0,1601774.0,99.31,34,98.06,64.5,97.38,0.47,2.62,Pass,True,True,True,1570695.09,7601.19
+ERR8665676,1586447.0,1575329.0,99.3,34,98.01,64.5,97.36,0.48,2.64,Pass,True,True,True,1544565.97,7564.2
+ERR8665915,4656415.0,4634080.0,99.52,145,99.09,65.08,98.08,0.28,1.92,Pass,True,True,True,4567086.51,13184.55
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/QC_matrix.tsv	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,11 @@
+Sample	Total_reads	Mapped_reads	Mapping_pct	Median_depth	Coverage_gte_10x_pct	GC_pct	Kraken_top1_pct	Kraken_unclassified_pct	Contam_pct	QC_status	Total_reads_pass	Coverage_gte_10x_pct_pass	Contam_pct_pass	MTB_reads	Unclassified_reads
+ERR2510682	2243645.0	2147374.0	95.71	35	96.56	63.95	89.74	3.35	10.26	Fail	True	True	False	2013353.67	75058.8
+ERR4797736	2930841.0	2917004.0	99.53	91	97.28	65.98	96.48	0.51	3.52	Pass	True	True	True	2827782.52	15081.95
+ERR4810698	10001091.0	9936216.0	99.35	344	98.82	66.07	98.49	0.29	1.51	Pass	True	True	True	9850413.37	29200.36
+ERR4812967	3172047.0	3142927.0	99.08	106	98.54	65.01	98.39	0.34	1.61	Pass	True	True	True	3121015.07	10924.71
+ERR4829557	3043795.0	3004399.0	98.71	101	97.21	65.46	97.6	0.73	2.4	Pass	True	True	True	2970882.6	22299.31
+ERR4831263	4383612.0	4331884.0	98.82	147	98.4	64.93	98.54	0.33	1.46	Pass	True	True	True	4319583.07	14558.43
+ERR4831716	3010970.0	2724126.0	90.47	41	96.99	63.61	84.97	4.71	15.03	Fail	True	True	False	2558310.92	141700.63
+ERR8665675	1612980.0	1601774.0	99.31	34	98.06	64.5	97.38	0.47	2.62	Pass	True	True	True	1570695.09	7601.19
+ERR8665676	1586447.0	1575329.0	99.3	34	98.01	64.5	97.36	0.48	2.64	Pass	True	True	True	1544565.97	7564.2
+ERR8665915	4656415.0	4634080.0	99.52	145	99.09	65.08	98.08	0.28	1.92	Pass	True	True	True	4567086.51	13184.55
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qc_matrix.tabular	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,11 @@
+Sample	qualimap_bamqc-avg_gc	qualimap_bamqc-1_x_pc	qualimap_bamqc-5_x_pc	qualimap_bamqc-10_x_pc	qualimap_bamqc-30_x_pc	qualimap_bamqc-50_x_pc	qualimap_bamqc-median_coverage	qualimap_bamqc-mean_coverage	qualimap_bamqc-general_error_rate	qualimap_bamqc-percentage_aligned	qualimap_bamqc-mapped_reads	qualimap_bamqc-mapped_paired_reads	qualimap_bamqc-total_reads	qualimap_bamqc-ns	qualimap_bamqc-duplication_rate	kraken-pct_top_one	kraken-pct_top_n	kraken-pct_unclassified	fastp-pct_duplication	fastp-after_filtering_q30_rate	fastp-after_filtering_q30_bases	fastp-filtering_result_passed_filter_reads	fastp-after_filtering_gc_content	fastp-pct_surviving	fastp-pct_adapter
+ERR2510682	63.94676508255957	99.076737967672	98.4598094267479	96.56377875078317	62.54595909085551	23.169116760345386	35	39.3881	0.25	95.7091696770211	2.147374	2.147194	2.243645	1119	54.48	89.73583930285798	90.33837973561776	3.3453954656229232	2.28059	83.647	186.831594	2.7081999999999997	63.607499999999995	78.28013779524146	7.83090426537435
+ERR4797736	65.98117378087333	97.57900430054684	97.34473194346091	97.28146593972343	96.8967923161387	96.15471450734121	91	89.2787	0.2	99.52788295236759	2.917004	2.916858	2.930841	3124	56.7	96.4836551921708	96.55145884180858	0.514594673035039	3.14759	94.84270000000001	410.387866	3.1974679999999998	65.5292	98.89263878819969	36.10825195034628
+ERR4810698	66.06827162614897	99.10307802368882	98.89272026135139	98.81755362989547	98.62616886832058	98.48000649207577	344	337.5519	0.22999999999999998	99.3513207709039	9.936216	9.935599999999999	10.001090999999999	16602	72.63	98.49338811178657	98.57286100054672	0.2919717772039071	5.84335	92.707	1540.550198	11.088058	65.6448	98.32734427639062	0.3442680548296227
+ERR4812967	65.0135561568253	99.0756499102806	98.80631037018432	98.53640413353003	97.53446195108639	96.27263272713425	106	106.434	0.19	99.08198081554276	3.142927	3.142156	3.172047	1551	41.67	98.3911987578464	98.46440088649423	0.34440566400806866	9.78699	94.0182	529.581781	3.7676499999999997	64.569	99.40619858391054	5.927513273859968
+ERR4829557	65.45924101535405	98.07271034189483	97.66403145211233	97.20967681975331	95.42319992238524	92.96559562528391	101	96.3718	0.22999999999999998	98.70569470020156	3.004399	3.0040299999999998	3.043795	3204	47.1	97.60455607440466	97.72487319640423	0.732615476261983	2.36709	92.5543	423.810995	3.2297979999999997	65.0168	97.3195964509572	0.47186383805488447
+ERR4831263	64.93489615021015	98.80363556243047	98.6006448553473	98.3959767264524	97.5493774044935	96.4940977420089	147	144.7993	0.48	98.81996855561121	4.331884	4.33096	4.383611999999999	963	51.41	98.53935680275767	98.69062775732024	0.3321104526515484	1.1507	86.8973	598.72349	4.667122	64.5177	94.7702870528531	0.30434538937447153
+ERR4831716	63.607144753187995	99.31613326164243	98.26933137966584	96.99410544908208	74.73623675403465	31.720885170956485	41	42.373	0.7799999999999999	90.47336904718412	2.724126	2.7060679999999997	3.01097	154	90.42	84.96633720504228	87.99613075239598	4.706145639797586	8.30781	81.05059999999999	211.60257299999998	3.521098	62.4888	93.46957584947172	3.1281838100653396
+ERR8665675	64.49681937041134	99.10230731636992	98.73660669354773	98.05935897098786	66.35159849231515	13.931033482246077	34	35.867	0.32	99.30526106957309	1.601774	1.601478	1.6129799999999999	1019	32.45	97.37846008433556	97.44301503277686	0.47125112362150823	0.330143	98.3073	159.513983	1.642012	64.0095	96.83787146960127	8.996656110118364
+ERR8665676	64.49749411885034	99.11389059401587	98.72520475880034	98.01429526069401	64.5736447111797	12.862017095195048	34	35.2675	0.32	99.29918869019892	1.575329	1.575054	1.586447	958	32.23	97.36007349499923	97.43262752050951	0.47680128368623975	0.33205399999999996	98.3579	156.973151	1.615348	64.0119	96.8550185873606	9.043050725506657
+ERR8665915	65.07899991839966	99.3171759833092	99.23715842931662	99.09215211405018	98.38822431753867	97.5766695107278	145	146.6877	0.31	99.52033914502896	4.63408	4.633552	4.656415	5053	54.69	98.08160380037087	98.11179842055897	0.28314799475999974	3.2687099999999996	92.937	662.816731	5.10687	64.65950000000001	99.487917374808	22.862192382230347
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qualifilter.log	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,5 @@
+Total_reads appears to be in millions — scaling by 1e6
+Mapped_reads appears to be in millions — scaling by 1e6
+QC matrix saved successfully!
+TSV: ./QC_matrix.tsv
+CSV: ./QC_matrix.csv