Mercurial > repos > iuc > qualifilter
changeset 0:b694fab47ac7 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/qualifilter commit c1d08b00ccb4837dd592970d2000f5fffe695e9f
| author | iuc |
|---|---|
| date | Mon, 15 Dec 2025 14:04:25 +0000 |
| parents | |
| children | |
| files | macros.xml qualifilter.xml test-data/QC_matrix.csv test-data/QC_matrix.tsv test-data/qc_matrix.tabular test-data/qualifilter.log |
| diffstat | 6 files changed, 208 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Dec 15 14:04:25 2025 +0000 @@ -0,0 +1,5 @@ +<macros> + <token name="@TOOL_VERSION@">1.0.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">25.0</token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qualifilter.xml Mon Dec 15 14:04:25 2025 +0000 @@ -0,0 +1,165 @@ +<tool id="qualifilter" name="QualiFilter" version="@TOOL_VERSION@@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Report QC metrics and sample pass/fail based on user-defined thresholds</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="1.0.0">qualifilter</requirement> + </requirements> + + <version_command>echo @TOOL_VERSION@</version_command> + + <command detect_errors="exit_code" ><![CDATA[ + qualifilter + --input '$input_file' + --attributes '$attributes' + --thresholds "{\"Total_reads\": ${total_reads}, \"Coverage_gte_10x_pct\": ${coverage_gte_10x_pct}, \"Contam_pct\": ${contam_max}}" + --round '${round}' + $derive_reads + #if $config + --config '$config' + #end if + --outdir . + > qualifilter.log 2>&1 + ]]></command> + + <inputs> + <param name="input_file" type="data" format="tabular" label="Input summary file" /> + + <param argument="--attributes" type="select" multiple="true" optional="true" + label="QC metrics to include" + help="Select which metrics to include in the output. Leave empty to include all."> + <option value="Sample">Sample</option> + <option value="Total_reads">Total reads</option> + <option value="Mapped_reads">Mapped reads</option> + <option value="Mapping_pct">Mapping %</option> + <option value="Median_depth">Median depth</option> + <option value="Coverage_gte_10x_pct">Coverage ≥10x %</option> + <option value="GC_pct">GC %</option> + <option value="Kraken_top1_pct">Kraken top1 %</option> + <option value="Kraken_unclassified_pct">Kraken unclassified %</option> + <option value="Contam_pct">Contamination %</option> + <option value="QC_status">QC status</option> + <option value="Total_reads_pass">Total reads pass</option> + <option value="Coverage_gte_10x_pct_pass">Coverage at ≥10x pass</option> + <option value="Contam_pct_pass">Contamination pass</option> + <option value="MTB_reads">MTB reads</option> + <option value="Unclassified_reads">Unclassified reads</option> + </param> + + <param name="total_reads" type="float" value="1000000" min="0" label="Minimum total reads" help="Minimum number of sequencing reads required for a sample to pass QC (commonly ≥1M for microbial WGS)." /> + <param name="coverage_gte_10x_pct" type="float" value="90" min="0" max="100" label="Minimum coverage pct at ≥10x depth" help="Percentage of the genome covered at ≥10x depth. Values ≥90% are generally considered good quality." /> + <param name="contam_max" type="float" value="5" min="0" max="100" label="Maximum contamination %" help="Maximum proportion of reads not belonging to the target organism (typically ≤5%)." /> + <param name="round" type="integer" value="2" min="0" label="Rounding precision" help="Number of decimal places used to round numeric values in the output." /> + + <param name="config" type="data" format="yaml" optional="true" + label="Optional config file" + help="Provide a YAML or JSON config file to override default allowed columns and rename map. Only advanced users need this." /> + + <param argument="--derive_reads" type="boolean" + truevalue="--derive_reads" falsevalue="" + label="Derive MTB/unclassified reads" /> + </inputs> + + <outputs> + <data name="qc_matrix_tsv" format="tsv" label="QC Matrix (TSV)" from_work_dir="QC_matrix.tsv" /> + <data name="qc_matrix_csv" format="csv" label="QC Matrix (CSV)" from_work_dir="QC_matrix.csv" /> + <data name="log" format="txt" label="QualiFilter Log" from_work_dir="qualifilter.log" /> + </outputs> + + <tests> + <test expect_num_outputs="3"> + <param name="input_file" value="qc_matrix.tabular" ftype="tabular"/> + <param name="attributes" value="Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads" /> + <param name="total_reads" value="1000000" /> + <param name="coverage_gte_10x_pct" value="90" /> + <param name="contam_max" value="5" /> + <param name="round" value="2" /> + <param name="derive_reads" value="true" /> + + <output name="qc_matrix_tsv" file="QC_matrix.tsv" /> + <output name="qc_matrix_csv" file="QC_matrix.csv" /> + <output name="log" file="qualifilter.log" /> + </test> + </tests> + + <help><![CDATA[ + **What it does** + + This tool extracts sequencing quality control (QC) metrics from a MultiQC tabular summary (.tabular) file and generates a consolidated QC matrix containing only the metrics of interest. + It summarizes key metrics including Total reads, Mapped reads, Coverage percentage, and Contamination. + Each sample is automatically evaluated against user-defined QC thresholds (provided as a JSON string) to assign a QC Pass/Fail status. + + **Input** + - A MultiQC-generated .tabular file containing per-sample QC metrics + - User-defined thresholds for Total reads, Coverage >=10x percentage, Maximum contamination percentage + - You can specify which QC metrics to include in the output using the --attributes option (comma-separated list). If left empty, all available metrics will be included automatically + - Optionally, a YAML or JSON config file can be provided to customize allowed columns and rename mappings + + **Available metrics / attributes** + - Sample - unique identifier for each sample + - Total_reads - total number of sequencing reads + - Mapped_reads - reads mapped to the reference genome + - Median_depth - median sequencing coverage across the genome + - Coverage_gte_10x_pct - percentage of the genome covered at >=10x depth + - GC_pct - GC content percentage of reads + - Kraken_top1_pct - percentage of reads assigned to the top taxonomic hit by Kraken + - Kraken_unclassified_pct - percentage of reads unclassified by Kraken + - Contam_pct - estimated contamination percentage + - QC_status - Pass/Fail status of the sample based on thresholds + - MTB_reads (optional, derived if --derive_reads is selected) - reads assigned to the target organism + - Unclassified_reads (optional, derived if --derive_reads is selected) - reads that could not be classified + + **Output** + - A summarized QC matrix in TSV format + - A summarized QC matrix in CSV format + - Both outputs include Pass/Fail status for each sample based on the threshold evaluation + + **Threshold behavior** + - Thresholds are provided as a JSON-formatted string. Example: {"Total_reads": 1000000, "Coverage_gte_10x_pct": 90, "Contam_pct": 5} + + **Optional configuration file** + + - An optional YAML or JSON configuration file can be supplied for advanced use cases where the default behavior needs to be customized. This file allows users to: + - Define custom allowed columns + - Rename columns in the output matrix + + Example YAML:: + + allowed_columns: + - Sample + - Total_reads + - Coverage_gte_10x_pct + - Contam_pct + + rename_map: + qualimap_bamqc-total_reads: Total_reads + qualimap_bamqc-mapped_reads: Mapped_reads + qualimap_bamqc-percentage_aligned: Mapping_pct + qualimap_bamqc-median_coverage: Median_depth + + **Additional Notes** + - Read count fields (Total_reads, Mapped_reads) are automatically scaled if MultiQC reports them in millions (e.g., Qualimap output). No action is required + - If no QC metric attributes are selected, the tool includes all available columns + - Derived read metrics (MTB_reads, Unclassified_reads) are calculated only when the relevant option is enabled + - Default thresholds: Total reads >= 1000000, Coverage >=10x percentage >= 90, Contamination percentage <= 5 + - Rounding precision for numeric metrics can be adjusted (default is 2 decimal places) + - The tool generates a log file documenting the processing steps and any issues encountered + + ]]></help> + + <citations> + <citation type="bibtex"> + @misc{bntozini2025, + author = {Buhle Ntozini}, + year = {2025}, + title = {QualiFilter: QC matrix extractor and decision tool}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/buhlentozini/QualiFilter} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/QC_matrix.csv Mon Dec 15 14:04:25 2025 +0000 @@ -0,0 +1,11 @@ +Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads +ERR2510682,2243645.0,2147374.0,95.71,35,96.56,63.95,89.74,3.35,10.26,Fail,True,True,False,2013353.67,75058.8 +ERR4797736,2930841.0,2917004.0,99.53,91,97.28,65.98,96.48,0.51,3.52,Pass,True,True,True,2827782.52,15081.95 +ERR4810698,10001091.0,9936216.0,99.35,344,98.82,66.07,98.49,0.29,1.51,Pass,True,True,True,9850413.37,29200.36 +ERR4812967,3172047.0,3142927.0,99.08,106,98.54,65.01,98.39,0.34,1.61,Pass,True,True,True,3121015.07,10924.71 +ERR4829557,3043795.0,3004399.0,98.71,101,97.21,65.46,97.6,0.73,2.4,Pass,True,True,True,2970882.6,22299.31 +ERR4831263,4383612.0,4331884.0,98.82,147,98.4,64.93,98.54,0.33,1.46,Pass,True,True,True,4319583.07,14558.43 +ERR4831716,3010970.0,2724126.0,90.47,41,96.99,63.61,84.97,4.71,15.03,Fail,True,True,False,2558310.92,141700.63 +ERR8665675,1612980.0,1601774.0,99.31,34,98.06,64.5,97.38,0.47,2.62,Pass,True,True,True,1570695.09,7601.19 +ERR8665676,1586447.0,1575329.0,99.3,34,98.01,64.5,97.36,0.48,2.64,Pass,True,True,True,1544565.97,7564.2 +ERR8665915,4656415.0,4634080.0,99.52,145,99.09,65.08,98.08,0.28,1.92,Pass,True,True,True,4567086.51,13184.55
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/QC_matrix.tsv Mon Dec 15 14:04:25 2025 +0000 @@ -0,0 +1,11 @@ +Sample Total_reads Mapped_reads Mapping_pct Median_depth Coverage_gte_10x_pct GC_pct Kraken_top1_pct Kraken_unclassified_pct Contam_pct QC_status Total_reads_pass Coverage_gte_10x_pct_pass Contam_pct_pass MTB_reads Unclassified_reads +ERR2510682 2243645.0 2147374.0 95.71 35 96.56 63.95 89.74 3.35 10.26 Fail True True False 2013353.67 75058.8 +ERR4797736 2930841.0 2917004.0 99.53 91 97.28 65.98 96.48 0.51 3.52 Pass True True True 2827782.52 15081.95 +ERR4810698 10001091.0 9936216.0 99.35 344 98.82 66.07 98.49 0.29 1.51 Pass True True True 9850413.37 29200.36 +ERR4812967 3172047.0 3142927.0 99.08 106 98.54 65.01 98.39 0.34 1.61 Pass True True True 3121015.07 10924.71 +ERR4829557 3043795.0 3004399.0 98.71 101 97.21 65.46 97.6 0.73 2.4 Pass True True True 2970882.6 22299.31 +ERR4831263 4383612.0 4331884.0 98.82 147 98.4 64.93 98.54 0.33 1.46 Pass True True True 4319583.07 14558.43 +ERR4831716 3010970.0 2724126.0 90.47 41 96.99 63.61 84.97 4.71 15.03 Fail True True False 2558310.92 141700.63 +ERR8665675 1612980.0 1601774.0 99.31 34 98.06 64.5 97.38 0.47 2.62 Pass True True True 1570695.09 7601.19 +ERR8665676 1586447.0 1575329.0 99.3 34 98.01 64.5 97.36 0.48 2.64 Pass True True True 1544565.97 7564.2 +ERR8665915 4656415.0 4634080.0 99.52 145 99.09 65.08 98.08 0.28 1.92 Pass True True True 4567086.51 13184.55
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qc_matrix.tabular Mon Dec 15 14:04:25 2025 +0000 @@ -0,0 +1,11 @@ +Sample qualimap_bamqc-avg_gc qualimap_bamqc-1_x_pc qualimap_bamqc-5_x_pc qualimap_bamqc-10_x_pc qualimap_bamqc-30_x_pc qualimap_bamqc-50_x_pc qualimap_bamqc-median_coverage qualimap_bamqc-mean_coverage qualimap_bamqc-general_error_rate qualimap_bamqc-percentage_aligned qualimap_bamqc-mapped_reads qualimap_bamqc-mapped_paired_reads qualimap_bamqc-total_reads qualimap_bamqc-ns qualimap_bamqc-duplication_rate kraken-pct_top_one kraken-pct_top_n kraken-pct_unclassified fastp-pct_duplication fastp-after_filtering_q30_rate fastp-after_filtering_q30_bases fastp-filtering_result_passed_filter_reads fastp-after_filtering_gc_content fastp-pct_surviving fastp-pct_adapter +ERR2510682 63.94676508255957 99.076737967672 98.4598094267479 96.56377875078317 62.54595909085551 23.169116760345386 35 39.3881 0.25 95.7091696770211 2.147374 2.147194 2.243645 1119 54.48 89.73583930285798 90.33837973561776 3.3453954656229232 2.28059 83.647 186.831594 2.7081999999999997 63.607499999999995 78.28013779524146 7.83090426537435 +ERR4797736 65.98117378087333 97.57900430054684 97.34473194346091 97.28146593972343 96.8967923161387 96.15471450734121 91 89.2787 0.2 99.52788295236759 2.917004 2.916858 2.930841 3124 56.7 96.4836551921708 96.55145884180858 0.514594673035039 3.14759 94.84270000000001 410.387866 3.1974679999999998 65.5292 98.89263878819969 36.10825195034628 +ERR4810698 66.06827162614897 99.10307802368882 98.89272026135139 98.81755362989547 98.62616886832058 98.48000649207577 344 337.5519 0.22999999999999998 99.3513207709039 9.936216 9.935599999999999 10.001090999999999 16602 72.63 98.49338811178657 98.57286100054672 0.2919717772039071 5.84335 92.707 1540.550198 11.088058 65.6448 98.32734427639062 0.3442680548296227 +ERR4812967 65.0135561568253 99.0756499102806 98.80631037018432 98.53640413353003 97.53446195108639 96.27263272713425 106 106.434 0.19 99.08198081554276 3.142927 3.142156 3.172047 1551 41.67 98.3911987578464 98.46440088649423 0.34440566400806866 9.78699 94.0182 529.581781 3.7676499999999997 64.569 99.40619858391054 5.927513273859968 +ERR4829557 65.45924101535405 98.07271034189483 97.66403145211233 97.20967681975331 95.42319992238524 92.96559562528391 101 96.3718 0.22999999999999998 98.70569470020156 3.004399 3.0040299999999998 3.043795 3204 47.1 97.60455607440466 97.72487319640423 0.732615476261983 2.36709 92.5543 423.810995 3.2297979999999997 65.0168 97.3195964509572 0.47186383805488447 +ERR4831263 64.93489615021015 98.80363556243047 98.6006448553473 98.3959767264524 97.5493774044935 96.4940977420089 147 144.7993 0.48 98.81996855561121 4.331884 4.33096 4.383611999999999 963 51.41 98.53935680275767 98.69062775732024 0.3321104526515484 1.1507 86.8973 598.72349 4.667122 64.5177 94.7702870528531 0.30434538937447153 +ERR4831716 63.607144753187995 99.31613326164243 98.26933137966584 96.99410544908208 74.73623675403465 31.720885170956485 41 42.373 0.7799999999999999 90.47336904718412 2.724126 2.7060679999999997 3.01097 154 90.42 84.96633720504228 87.99613075239598 4.706145639797586 8.30781 81.05059999999999 211.60257299999998 3.521098 62.4888 93.46957584947172 3.1281838100653396 +ERR8665675 64.49681937041134 99.10230731636992 98.73660669354773 98.05935897098786 66.35159849231515 13.931033482246077 34 35.867 0.32 99.30526106957309 1.601774 1.601478 1.6129799999999999 1019 32.45 97.37846008433556 97.44301503277686 0.47125112362150823 0.330143 98.3073 159.513983 1.642012 64.0095 96.83787146960127 8.996656110118364 +ERR8665676 64.49749411885034 99.11389059401587 98.72520475880034 98.01429526069401 64.5736447111797 12.862017095195048 34 35.2675 0.32 99.29918869019892 1.575329 1.575054 1.586447 958 32.23 97.36007349499923 97.43262752050951 0.47680128368623975 0.33205399999999996 98.3579 156.973151 1.615348 64.0119 96.8550185873606 9.043050725506657 +ERR8665915 65.07899991839966 99.3171759833092 99.23715842931662 99.09215211405018 98.38822431753867 97.5766695107278 145 146.6877 0.31 99.52033914502896 4.63408 4.633552 4.656415 5053 54.69 98.08160380037087 98.11179842055897 0.28314799475999974 3.2687099999999996 92.937 662.816731 5.10687 64.65950000000001 99.487917374808 22.862192382230347
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qualifilter.log Mon Dec 15 14:04:25 2025 +0000 @@ -0,0 +1,5 @@ +Total_reads appears to be in millions — scaling by 1e6 +Mapped_reads appears to be in millions — scaling by 1e6 +QC matrix saved successfully! +TSV: ./QC_matrix.tsv +CSV: ./QC_matrix.csv
