Mercurial > repos > iuc > qualifilter
comparison qualifilter.xml @ 0:b694fab47ac7 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/qualifilter commit c1d08b00ccb4837dd592970d2000f5fffe695e9f
| author | iuc |
|---|---|
| date | Mon, 15 Dec 2025 14:04:25 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b694fab47ac7 |
|---|---|
| 1 <tool id="qualifilter" name="QualiFilter" version="@TOOL_VERSION@@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Report QC metrics and sample pass/fail based on user-defined thresholds</description> | |
| 3 | |
| 4 <macros> | |
| 5 <import>macros.xml</import> | |
| 6 </macros> | |
| 7 | |
| 8 <requirements> | |
| 9 <requirement type="package" version="1.0.0">qualifilter</requirement> | |
| 10 </requirements> | |
| 11 | |
| 12 <version_command>echo @TOOL_VERSION@</version_command> | |
| 13 | |
| 14 <command detect_errors="exit_code" ><![CDATA[ | |
| 15 qualifilter | |
| 16 --input '$input_file' | |
| 17 --attributes '$attributes' | |
| 18 --thresholds "{\"Total_reads\": ${total_reads}, \"Coverage_gte_10x_pct\": ${coverage_gte_10x_pct}, \"Contam_pct\": ${contam_max}}" | |
| 19 --round '${round}' | |
| 20 $derive_reads | |
| 21 #if $config | |
| 22 --config '$config' | |
| 23 #end if | |
| 24 --outdir . | |
| 25 > qualifilter.log 2>&1 | |
| 26 ]]></command> | |
| 27 | |
| 28 <inputs> | |
| 29 <param name="input_file" type="data" format="tabular" label="Input summary file" /> | |
| 30 | |
| 31 <param argument="--attributes" type="select" multiple="true" optional="true" | |
| 32 label="QC metrics to include" | |
| 33 help="Select which metrics to include in the output. Leave empty to include all."> | |
| 34 <option value="Sample">Sample</option> | |
| 35 <option value="Total_reads">Total reads</option> | |
| 36 <option value="Mapped_reads">Mapped reads</option> | |
| 37 <option value="Mapping_pct">Mapping %</option> | |
| 38 <option value="Median_depth">Median depth</option> | |
| 39 <option value="Coverage_gte_10x_pct">Coverage ≥10x %</option> | |
| 40 <option value="GC_pct">GC %</option> | |
| 41 <option value="Kraken_top1_pct">Kraken top1 %</option> | |
| 42 <option value="Kraken_unclassified_pct">Kraken unclassified %</option> | |
| 43 <option value="Contam_pct">Contamination %</option> | |
| 44 <option value="QC_status">QC status</option> | |
| 45 <option value="Total_reads_pass">Total reads pass</option> | |
| 46 <option value="Coverage_gte_10x_pct_pass">Coverage at ≥10x pass</option> | |
| 47 <option value="Contam_pct_pass">Contamination pass</option> | |
| 48 <option value="MTB_reads">MTB reads</option> | |
| 49 <option value="Unclassified_reads">Unclassified reads</option> | |
| 50 </param> | |
| 51 | |
| 52 <param name="total_reads" type="float" value="1000000" min="0" label="Minimum total reads" help="Minimum number of sequencing reads required for a sample to pass QC (commonly ≥1M for microbial WGS)." /> | |
| 53 <param name="coverage_gte_10x_pct" type="float" value="90" min="0" max="100" label="Minimum coverage pct at ≥10x depth" help="Percentage of the genome covered at ≥10x depth. Values ≥90% are generally considered good quality." /> | |
| 54 <param name="contam_max" type="float" value="5" min="0" max="100" label="Maximum contamination %" help="Maximum proportion of reads not belonging to the target organism (typically ≤5%)." /> | |
| 55 <param name="round" type="integer" value="2" min="0" label="Rounding precision" help="Number of decimal places used to round numeric values in the output." /> | |
| 56 | |
| 57 <param name="config" type="data" format="yaml" optional="true" | |
| 58 label="Optional config file" | |
| 59 help="Provide a YAML or JSON config file to override default allowed columns and rename map. Only advanced users need this." /> | |
| 60 | |
| 61 <param argument="--derive_reads" type="boolean" | |
| 62 truevalue="--derive_reads" falsevalue="" | |
| 63 label="Derive MTB/unclassified reads" /> | |
| 64 </inputs> | |
| 65 | |
| 66 <outputs> | |
| 67 <data name="qc_matrix_tsv" format="tsv" label="QC Matrix (TSV)" from_work_dir="QC_matrix.tsv" /> | |
| 68 <data name="qc_matrix_csv" format="csv" label="QC Matrix (CSV)" from_work_dir="QC_matrix.csv" /> | |
| 69 <data name="log" format="txt" label="QualiFilter Log" from_work_dir="qualifilter.log" /> | |
| 70 </outputs> | |
| 71 | |
| 72 <tests> | |
| 73 <test expect_num_outputs="3"> | |
| 74 <param name="input_file" value="qc_matrix.tabular" ftype="tabular"/> | |
| 75 <param name="attributes" value="Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads" /> | |
| 76 <param name="total_reads" value="1000000" /> | |
| 77 <param name="coverage_gte_10x_pct" value="90" /> | |
| 78 <param name="contam_max" value="5" /> | |
| 79 <param name="round" value="2" /> | |
| 80 <param name="derive_reads" value="true" /> | |
| 81 | |
| 82 <output name="qc_matrix_tsv" file="QC_matrix.tsv" /> | |
| 83 <output name="qc_matrix_csv" file="QC_matrix.csv" /> | |
| 84 <output name="log" file="qualifilter.log" /> | |
| 85 </test> | |
| 86 </tests> | |
| 87 | |
| 88 <help><![CDATA[ | |
| 89 **What it does** | |
| 90 | |
| 91 This tool extracts sequencing quality control (QC) metrics from a MultiQC tabular summary (.tabular) file and generates a consolidated QC matrix containing only the metrics of interest. | |
| 92 It summarizes key metrics including Total reads, Mapped reads, Coverage percentage, and Contamination. | |
| 93 Each sample is automatically evaluated against user-defined QC thresholds (provided as a JSON string) to assign a QC Pass/Fail status. | |
| 94 | |
| 95 **Input** | |
| 96 - A MultiQC-generated .tabular file containing per-sample QC metrics | |
| 97 - User-defined thresholds for Total reads, Coverage >=10x percentage, Maximum contamination percentage | |
| 98 - You can specify which QC metrics to include in the output using the --attributes option (comma-separated list). If left empty, all available metrics will be included automatically | |
| 99 - Optionally, a YAML or JSON config file can be provided to customize allowed columns and rename mappings | |
| 100 | |
| 101 **Available metrics / attributes** | |
| 102 - Sample - unique identifier for each sample | |
| 103 - Total_reads - total number of sequencing reads | |
| 104 - Mapped_reads - reads mapped to the reference genome | |
| 105 - Median_depth - median sequencing coverage across the genome | |
| 106 - Coverage_gte_10x_pct - percentage of the genome covered at >=10x depth | |
| 107 - GC_pct - GC content percentage of reads | |
| 108 - Kraken_top1_pct - percentage of reads assigned to the top taxonomic hit by Kraken | |
| 109 - Kraken_unclassified_pct - percentage of reads unclassified by Kraken | |
| 110 - Contam_pct - estimated contamination percentage | |
| 111 - QC_status - Pass/Fail status of the sample based on thresholds | |
| 112 - MTB_reads (optional, derived if --derive_reads is selected) - reads assigned to the target organism | |
| 113 - Unclassified_reads (optional, derived if --derive_reads is selected) - reads that could not be classified | |
| 114 | |
| 115 **Output** | |
| 116 - A summarized QC matrix in TSV format | |
| 117 - A summarized QC matrix in CSV format | |
| 118 - Both outputs include Pass/Fail status for each sample based on the threshold evaluation | |
| 119 | |
| 120 **Threshold behavior** | |
| 121 - Thresholds are provided as a JSON-formatted string. Example: {"Total_reads": 1000000, "Coverage_gte_10x_pct": 90, "Contam_pct": 5} | |
| 122 | |
| 123 **Optional configuration file** | |
| 124 | |
| 125 - An optional YAML or JSON configuration file can be supplied for advanced use cases where the default behavior needs to be customized. This file allows users to: | |
| 126 - Define custom allowed columns | |
| 127 - Rename columns in the output matrix | |
| 128 | |
| 129 Example YAML:: | |
| 130 | |
| 131 allowed_columns: | |
| 132 - Sample | |
| 133 - Total_reads | |
| 134 - Coverage_gte_10x_pct | |
| 135 - Contam_pct | |
| 136 | |
| 137 rename_map: | |
| 138 qualimap_bamqc-total_reads: Total_reads | |
| 139 qualimap_bamqc-mapped_reads: Mapped_reads | |
| 140 qualimap_bamqc-percentage_aligned: Mapping_pct | |
| 141 qualimap_bamqc-median_coverage: Median_depth | |
| 142 | |
| 143 **Additional Notes** | |
| 144 - Read count fields (Total_reads, Mapped_reads) are automatically scaled if MultiQC reports them in millions (e.g., Qualimap output). No action is required | |
| 145 - If no QC metric attributes are selected, the tool includes all available columns | |
| 146 - Derived read metrics (MTB_reads, Unclassified_reads) are calculated only when the relevant option is enabled | |
| 147 - Default thresholds: Total reads >= 1000000, Coverage >=10x percentage >= 90, Contamination percentage <= 5 | |
| 148 - Rounding precision for numeric metrics can be adjusted (default is 2 decimal places) | |
| 149 - The tool generates a log file documenting the processing steps and any issues encountered | |
| 150 | |
| 151 ]]></help> | |
| 152 | |
| 153 <citations> | |
| 154 <citation type="bibtex"> | |
| 155 @misc{bntozini2025, | |
| 156 author = {Buhle Ntozini}, | |
| 157 year = {2025}, | |
| 158 title = {QualiFilter: QC matrix extractor and decision tool}, | |
| 159 publisher = {GitHub}, | |
| 160 journal = {GitHub repository}, | |
| 161 url = {https://github.com/buhlentozini/QualiFilter} | |
| 162 } | |
| 163 </citation> | |
| 164 </citations> | |
| 165 </tool> |
