diff qualifilter.xml @ 0:b694fab47ac7 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/qualifilter commit c1d08b00ccb4837dd592970d2000f5fffe695e9f
author iuc
date Mon, 15 Dec 2025 14:04:25 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qualifilter.xml	Mon Dec 15 14:04:25 2025 +0000
@@ -0,0 +1,165 @@
+<tool id="qualifilter" name="QualiFilter" version="@TOOL_VERSION@@VERSION_SUFFIX@" profile="@PROFILE@">
+  <description>Report QC metrics and sample pass/fail based on user-defined thresholds</description>
+
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+
+  <requirements>
+      <requirement type="package" version="1.0.0">qualifilter</requirement>
+  </requirements>
+
+  <version_command>echo @TOOL_VERSION@</version_command>
+
+  <command detect_errors="exit_code" ><![CDATA[
+  qualifilter 
+      --input '$input_file'
+      --attributes '$attributes'
+      --thresholds "{\"Total_reads\": ${total_reads}, \"Coverage_gte_10x_pct\": ${coverage_gte_10x_pct}, \"Contam_pct\": ${contam_max}}"
+      --round '${round}'
+      $derive_reads      
+  #if $config
+      --config '$config' 
+  #end if
+      --outdir . 
+      > qualifilter.log 2>&1
+  ]]></command>
+
+  <inputs>
+    <param name="input_file" type="data" format="tabular" label="Input summary file" />
+
+    <param argument="--attributes" type="select" multiple="true" optional="true"
+           label="QC metrics to include"
+           help="Select which metrics to include in the output. Leave empty to include all.">
+        <option value="Sample">Sample</option>
+        <option value="Total_reads">Total reads</option>
+        <option value="Mapped_reads">Mapped reads</option>
+        <option value="Mapping_pct">Mapping %</option>
+        <option value="Median_depth">Median depth</option>
+        <option value="Coverage_gte_10x_pct">Coverage ≥10x %</option>
+        <option value="GC_pct">GC %</option>
+        <option value="Kraken_top1_pct">Kraken top1 %</option>
+        <option value="Kraken_unclassified_pct">Kraken unclassified %</option>
+        <option value="Contam_pct">Contamination %</option>
+        <option value="QC_status">QC status</option>
+        <option value="Total_reads_pass">Total reads pass</option>
+        <option value="Coverage_gte_10x_pct_pass">Coverage at ≥10x pass</option>
+        <option value="Contam_pct_pass">Contamination pass</option>
+        <option value="MTB_reads">MTB reads</option>
+        <option value="Unclassified_reads">Unclassified reads</option>
+    </param>
+
+    <param name="total_reads" type="float" value="1000000" min="0" label="Minimum total reads" help="Minimum number of sequencing reads required for a sample to pass QC (commonly ≥1M for microbial WGS)." />
+    <param name="coverage_gte_10x_pct" type="float" value="90" min="0" max="100" label="Minimum coverage pct at ≥10x depth" help="Percentage of the genome covered at ≥10x depth. Values ≥90% are generally considered good quality." />
+    <param name="contam_max" type="float" value="5" min="0" max="100" label="Maximum contamination %" help="Maximum proportion of reads not belonging to the target organism (typically ≤5%)." />
+    <param name="round" type="integer" value="2" min="0" label="Rounding precision" help="Number of decimal places used to round numeric values in the output." />
+
+    <param name="config" type="data" format="yaml" optional="true"
+       label="Optional config file"
+       help="Provide a YAML or JSON config file to override default allowed columns and rename map. Only advanced users need this." />
+
+    <param argument="--derive_reads" type="boolean"
+           truevalue="--derive_reads" falsevalue=""
+           label="Derive MTB/unclassified reads" />
+  </inputs>
+
+  <outputs>
+    <data name="qc_matrix_tsv" format="tsv" label="QC Matrix (TSV)" from_work_dir="QC_matrix.tsv" />
+    <data name="qc_matrix_csv" format="csv" label="QC Matrix (CSV)" from_work_dir="QC_matrix.csv" />
+    <data name="log" format="txt" label="QualiFilter Log" from_work_dir="qualifilter.log" />
+  </outputs>
+
+  <tests>
+    <test expect_num_outputs="3">
+      <param name="input_file" value="qc_matrix.tabular" ftype="tabular"/>
+      <param name="attributes" value="Sample,Total_reads,Mapped_reads,Mapping_pct,Median_depth,Coverage_gte_10x_pct,GC_pct,Kraken_top1_pct,Kraken_unclassified_pct,Contam_pct,QC_status,Total_reads_pass,Coverage_gte_10x_pct_pass,Contam_pct_pass,MTB_reads,Unclassified_reads" />
+      <param name="total_reads" value="1000000" />
+      <param name="coverage_gte_10x_pct" value="90" />
+      <param name="contam_max" value="5" />
+      <param name="round" value="2" />
+      <param name="derive_reads" value="true" />
+
+      <output name="qc_matrix_tsv" file="QC_matrix.tsv" />
+      <output name="qc_matrix_csv" file="QC_matrix.csv" />
+      <output name="log" file="qualifilter.log" />
+    </test>
+  </tests>
+
+  <help><![CDATA[
+  **What it does**
+  
+  This tool extracts sequencing quality control (QC) metrics from a MultiQC tabular summary (.tabular) file and generates a consolidated QC matrix containing only the metrics of interest. 
+  It summarizes key metrics including Total reads, Mapped reads, Coverage percentage, and Contamination. 
+  Each sample is automatically evaluated against user-defined QC thresholds (provided as a JSON string) to assign a QC Pass/Fail status.
+
+  **Input**
+  - A MultiQC-generated .tabular file containing per-sample QC metrics
+  - User-defined thresholds for Total reads, Coverage >=10x percentage, Maximum contamination percentage
+  - You can specify which QC metrics to include in the output using the --attributes option (comma-separated list). If left empty, all available metrics will be included automatically
+  - Optionally, a YAML or JSON config file can be provided to customize allowed columns and rename mappings
+
+  **Available metrics / attributes**
+  - Sample - unique identifier for each sample
+  - Total_reads - total number of sequencing reads
+  - Mapped_reads - reads mapped to the reference genome
+  - Median_depth - median sequencing coverage across the genome
+  - Coverage_gte_10x_pct - percentage of the genome covered at >=10x depth
+  - GC_pct - GC content percentage of reads
+  - Kraken_top1_pct - percentage of reads assigned to the top taxonomic hit by Kraken
+  - Kraken_unclassified_pct - percentage of reads unclassified by Kraken
+  - Contam_pct - estimated contamination percentage
+  - QC_status - Pass/Fail status of the sample based on thresholds
+  - MTB_reads (optional, derived if --derive_reads is selected) - reads assigned to the target organism
+  - Unclassified_reads (optional, derived if --derive_reads is selected) - reads that could not be classified
+
+  **Output**
+  - A summarized QC matrix in TSV format
+  - A summarized QC matrix in CSV format
+  - Both outputs include Pass/Fail status for each sample based on the threshold evaluation
+
+  **Threshold behavior**
+  - Thresholds are provided as a JSON-formatted string. Example: {"Total_reads": 1000000, "Coverage_gte_10x_pct": 90, "Contam_pct": 5}
+
+  **Optional configuration file**
+
+  - An optional YAML or JSON configuration file can be supplied for advanced use cases where the default behavior needs to be customized. This file allows users to:
+      - Define custom allowed columns
+      - Rename columns in the output matrix
+
+  Example YAML::
+
+      allowed_columns:
+        - Sample
+        - Total_reads
+        - Coverage_gte_10x_pct
+        - Contam_pct
+
+      rename_map:
+        qualimap_bamqc-total_reads: Total_reads
+        qualimap_bamqc-mapped_reads: Mapped_reads
+        qualimap_bamqc-percentage_aligned: Mapping_pct
+        qualimap_bamqc-median_coverage: Median_depth
+
+  **Additional Notes**
+  - Read count fields (Total_reads, Mapped_reads) are automatically scaled if MultiQC reports them in millions (e.g., Qualimap output). No action is required
+  - If no QC metric attributes are selected, the tool includes all available columns
+  - Derived read metrics (MTB_reads, Unclassified_reads) are calculated only when the relevant option is enabled
+  - Default thresholds: Total reads >= 1000000, Coverage >=10x percentage >= 90, Contamination percentage <= 5
+  - Rounding precision for numeric metrics can be adjusted (default is 2 decimal places)
+  - The tool generates a log file documenting the processing steps and any issues encountered
+
+  ]]></help>
+
+  <citations>
+    <citation type="bibtex">
+  @misc{bntozini2025,
+    author = {Buhle Ntozini},
+    year = {2025},
+    title = {QualiFilter: QC matrix extractor and decision tool},
+    publisher = {GitHub},
+    journal = {GitHub repository},
+    url = {https://github.com/buhlentozini/QualiFilter}
+  }
+    </citation>
+  </citations>
+</tool>