comparison hap.py.xml @ 2:9c358e648d74 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/happy commit 4abfaf634dcfed1dfce1bcf199c12acc33982ba4
author iuc
date Fri, 24 Oct 2025 13:26:46 +0000
parents 37c4cd0fdfc5
children
comparison
equal deleted inserted replaced
1:52e1e6c4ce31 2:9c358e648d74
1 <tool id="som.py" name="som.py" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09"> 1 <tool id="som.py" name="som.py and hap.py" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>A tool to perform comparisons only based on chromosome, position, and allele identity for comparison of somatic callsets.</description> 2 <description>Comparison of somatic callsets based only on chromosome, position, and allele identity</description>
3
4 <macros> 3 <macros>
5 <token name="@TOOL_VERSION@">0.3.14</token> 4 <token name="@TOOL_VERSION@">0.3.15</token>
5 <token name="@PROFILE@">24.1</token>
6 <token name="@VERSION_SUFFIX@">0</token> 6 <token name="@VERSION_SUFFIX@">0</token>
7 </macros> 7 </macros>
8 8 <xrefs>
9 <xref type="bio.tools">hap.py</xref>
10 </xrefs>
9 <requirements> 11 <requirements>
10 <requirement type="package" version="@TOOL_VERSION@">hap.py</requirement> 12 <requirement type="package" version="@TOOL_VERSION@">hap.py</requirement>
11 </requirements> 13 </requirements>
12 14 <version_command>som.py -h</version_command>
13 <command detect_errors="exit_code"><![CDATA[ 15 <command detect_errors="exit_code"><![CDATA[
14
15 #if $reference_genome.source == 'history': 16 #if $reference_genome.source == 'history':
16 #set $ref_genome = 'reference.fasta' 17 #set $ref_genome = 'reference.fasta'
17 ln -s -f '${reference_genome.history_item}' $ref_genome; 18 ln -s -f '${reference_genome.history_item}' $ref_genome;
18 samtools faidx $ref_genome; 19 samtools faidx $ref_genome;
19 #else: 20 #else:
20 #set $ref_genome = $reference_genome.index.fields.path 21 #set $ref_genome = $reference_genome.index.fields.path
21 #end if 22 #end if
22 export HGREF='$ref_genome' && 23 export HGREF='$ref_genome' &&
23 som.py 24 #if $program_select == 'som.py':
24 '$truth' 25 som.py
25 '$query' 26 #else:
26 -r '$ref_genome' 27 hap.py
27 -o 'happy' | sed 's/\s\+/\t/g' | tail -n+2> results.tsv 28 #end if
29 '$truth'
30 '$query'
31 -r '$ref_genome'
32 -o output
33 | sed 's/\s\+/\t/g' | tail -n+2 > results.tsv
28 ]]> 34 ]]>
29 </command> 35 </command>
30
31 <inputs> 36 <inputs>
37 <param name="program_select" type="select" label="Comparison method" help="Select the comparison method to use.">
38 <option value="som.py">som.py: Genotype-level comparison</option>
39 <option value="hap.py">hap.py: Allele-based comparison (chromosome, position, allele)</option>
40 </param>
32 <conditional name="reference_genome"> 41 <conditional name="reference_genome">
33 <param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options."> 42 <param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options.">
34 <option value="indexed" selected="true">Use a built-in genome</option> 43 <option value="indexed" selected="true">Use a built-in genome</option>
35 <option value="history">Use a genome from history</option> 44 <option value="history">Use a genome from history</option>
36 </param> 45 </param>
37 <when value="indexed"> 46 <when value="indexed">
38 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team."> 47 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team.">
39 <options from_data_table="fasta_indexes"> 48 <options from_data_table="fasta_indexes">
40 <filter type="sort_by" column="2" /> 49 <filter type="sort_by" column="2"/>
41 <validator type="no_options" message="No genomes are available for the selected input dataset" /> 50 <validator type="no_options" message="No genomes are available for the selected input dataset"/>
42 </options> 51 </options>
43 </param> 52 </param>
44 </when> 53 </when>
45 <when value="history"> 54 <when value="history">
46 <param name="history_item" type="data" format="fasta" label="Reference genome" help="A reference genome in FASTA format" /> 55 <param name="history_item" type="data" format="fasta" label="Reference genome" help="A reference genome in FASTA format"/>
47 </when> 56 </when>
48 </conditional> 57 </conditional>
49 <param name="truth" type="data" format="vcf" label="ground truth variant calls"/> 58 <param name="truth" type="data" format="vcf" label="ground truth variant calls"/>
50 <param name="query" type="data" format="vcf" label="query variant calls"/> 59 <param name="query" type="data" format="vcf" label="query variant calls"/>
51 </inputs> 60 </inputs>
52
53 <outputs> 61 <outputs>
54 <data name="results" label="${tool.name} on ${on_string}" format="tabular" from_work_dir="results.tsv"/> 62 <data name="results" label="${tool.name} on ${on_string}" format="tabular" from_work_dir="results.tsv"/>
55 <data name="stats" label="${tool.name} on ${on_string}: stats" format="csv" from_work_dir="happy.stats.csv" /> 63 <data name="sompy_metrics" label="${tool.name} on ${on_string}: som.py metrics" format="json" from_work_dir="output.metrics.json">
56 <data name="metrics" label="${tool.name} on ${on_string}: metrics" format="json" from_work_dir="happy.metrics.json"/> 64 <filter>'som.py' in program_select </filter>
65 </data>
66 <data name="happy_metrics" label="${tool.name} on ${on_string}: hap.py metrics" format="json" from_work_dir="output.metrics.json.gz">
67 <filter>'hap.py' in program_select </filter>
68 </data>
69 <data name="stats" label="${tool.name} on ${on_string}: som.py comparison stats" format="csv" from_work_dir="output.stats.csv">
70 <filter>'som.py' in program_select </filter>
71 </data>
72 <data name="summary" label="${tool.name} on ${on_string}: hap.py summary" format="csv" from_work_dir="output.summary.csv">
73 <filter>'hap.py' in program_select </filter>
74 </data>
57 </outputs> 75 </outputs>
58
59 <tests> 76 <tests>
77 <!-- Testing som.py -->
60 <test expect_num_outputs="3"> 78 <test expect_num_outputs="3">
61 <param name="source" value="history"/> 79 <param name="program_select" value="som.py"/>
62 <param name="history_item" value="chr21.fa"/> 80 <conditional name="reference_genome">
81 <param name="source" value="history"/>
82 <param name="history_item" value="chr21.fa"/>
83 </conditional>
63 <param name="truth" value="small_NA12878-Freebayes-chr21.vcf"/> 84 <param name="truth" value="small_NA12878-Freebayes-chr21.vcf"/>
64 <param name="query" value="small_NA12878-GATK3-chr21.vcf"/> 85 <param name="query" value="small_NA12878-GATK3-chr21.vcf"/>
65 86 <output name="results">
66 <output name="results" file="results.tsv"/> 87 <assert_contents>
67 <output name="stats" file="happy.stats.csv" compare="sim_size" delta_frac="0.3" /> 88 <has_text text="records"/>
68 <output name="metrics" file="happy.metrics.json" compare="sim_size" delta_frac="0.3" /> 89 <has_n_lines n="4"/>
90 </assert_contents>
91 </output>
92 <output name="stats">
93 <assert_contents>
94 <has_line_matching expression="^0,indels,1,1,0,1.*"/>
95 <has_text text="som.py-"/>
96 </assert_contents>
97 </output>
98 <output name="sompy_metrics">
99 <assert_contents>
100 <has_json_property_with_text property="name" text="som.py.comparison"/>
101 <has_text text="som.py.comparison"/>
102 </assert_contents>
103 </output>
104 </test>
105 <!-- Testing hap.py -->
106 <test expect_num_outputs="3">
107 <param name="program_select" value="hap.py"/>
108 <conditional name="reference_genome">
109 <param name="source" value="history"/>
110 <param name="history_item" value="reference.fasta"/>
111 </conditional>
112 <param name="truth" value="hap_py_truth.vcf.gz"/>
113 <param name="query" value="hap_py_query.vcf.gz"/>
114 <output name="results">
115 <assert_contents>
116 <has_n_lines n="6"/>
117 </assert_contents>
118 </output>
119 <output name="summary">
120 <assert_contents>
121 <has_n_lines n="5"/>
122 <has_text text="SNP,PASS,5,4,1,6,2,0,0,0,0.8,0.666667,0.0,0.727273,0.25,0.5,0.0,0.0"/>
123 </assert_contents>
124 </output>
125 <output name="happy_metrics" decompress="true">
126 <assert_contents>
127 <has_json_property_with_text property="name" text="hap.py.comparison"/>
128 <has_text text="hap.py.comparison"/>
129 </assert_contents>
130 </output>
69 </test> 131 </test>
70 </tests> 132 </tests>
133 <help format="markdown"><![CDATA[
71 134
72 <help><![CDATA[ 135 ## What this tool does
73 som.py is a tool to perform comparisons only based on chromosome, position, and allele identity. This comparison will not resolve haplotypes and only verify that the same alleles were observed at the same positions (e.g. for comparison of somatic callsets). 136 **hap.py** compares variant callsets (VCFs) against a reference “truth” set to evaluate variant calling accuracy. It performs haplotype-aware comparisons, matching variants by local sequence context rather than simple record alignment. This enables accurate benchmarking even when complex or multi-nucleotide variants are represented differently.
137 ---
138
139 ## Inputs
140 - **Truth VCF:** High-confidence reference variants.
141 - **Query VCF:** Variants to evaluate.
142 - **Reference FASTA:** Genome reference used for both callsets.
143 ---
144
145 ## Outputs
146 - **Summary CSV / JSON:** Precision, recall, and F1 metrics.
147 - **ROC and stratified metrics:** Optional, for detailed performance breakdowns.
148 - **Annotated VCFs:** Optional lists of matched and unmatched variants.
149 ---
150
151 ## Notes
152 - Use **hap.py** for haplotype-level benchmarking.
153 - Use **som.py** for simpler allele-based comparisons (chromosome, position, allele).
74 ]]></help> 154 ]]></help>
75 155 <citations>
156 <citation type="bibtex">@misc{illumina_happy,
157 title = {hap.py},
158 author = {{Illumina}},
159 year = {2021},
160 howpublished = {\url{https://github.com/Illumina/hap.py}},
161 note = {Accessed: 2025-10-19}
162 }</citation>
163 </citations>
76 </tool> 164 </tool>