comparison lineagespot_wrapper.xml @ 0:5e8505f27681 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lineagespot commit 0bc6ed15054577af1089d55ef9aa1071d122eb6b
author iuc
date Tue, 08 Aug 2023 15:11:47 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5e8505f27681
1 <tool id="lineagespot" name="lineagespot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
2 <description>identifies SARS-CoV-2 lineages contributing to metagenomic samples from per-sample variant files</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.4.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <xrefs>
8 <xref type="bio.tools">lineagespot</xref>
9 <xref type="bioconductor">lineagespot</xref>
10 </xrefs>
11 <requirements>
12 <requirement type="package" version="4.3.1">r-base</requirement>
13 <requirement type="package" version="1.20.3">r-getopt</requirement>
14 <requirement type="package" version="@TOOL_VERSION@">bioconductor-lineagespot</requirement>
15 <requirement type="package" version="3.11">python</requirement>
16 </requirements>
17 <command detect_errors="exit_code"><![CDATA[
18 ## Prepare lineage definitions
19 mkdir refs &&
20 #if str($voc_source.choice) == 'cached':
21 python '${__tool_directory__}/convert_lineage_defs.py' -i '${voc_source.constellations.fields.path}/definitions' -o refs &&
22 #end if
23
24 ## Symlink the VCFs of all samples into a vcfs folder
25 ## and all custom lineage definitions into refs/
26 sh arrange_custom_inputs.sh &&
27
28 #if str($ann_data.choice) == 'custom':
29 ## lineagespot insists on a .gff3 suffix for the annotation file
30 ln -s '$ann_data.in_gff3' custom.gff3 &&
31 #end if
32
33 Rscript '${__tool_directory__}/lineagespot_verbose.R'
34 --in_vcf vcfs/
35 --in_ref refs/
36 #if str($ann_data.choice) == 'standard':
37 --in_gff3 '${__tool_directory__}/NC_045512.2_annot.gff3'
38 #else:
39 --in_gff3 custom.gff3
40 #end if
41 --in_threshold $in_threshold
42 ]]></command>
43 <configfiles>
44 <configfile filename="arrange_custom_inputs.sh"><![CDATA[mkdir vcfs &&
45 #for $vcf in $in_vcf:
46 #set $sample_name = $vcf.element_identifier.replace("'", '_').replace('/', '_')
47 ln -s '$vcf' 'vcfs/${sample_name}.vcf' &&
48 #end for
49 #set $num_samples = len($in_vcf)
50 echo "Gathered $num_samples samples for lineagespot run."
51 #if str($voc_source.choice) == 'custom':
52 #for $lineage_def in $voc_source.collection:
53 #set $lineage_name = $lineage_def.element_identifier.replace("'", '_').replace('/', '_')
54 ln -s '$lineage_def' 'refs/${lineage_name}.txt' &&
55 #set $num_lineages = len(list($voc_source.collection))
56 #end for
57 echo "Gathered $num_lineages custom lineage definitions for the analysis."
58 #end if]]></configfile>
59 </configfiles>
60 <inputs>
61 <param type="data" name="in_vcf" format="vcf" multiple="true" label="Per-sample variant calling data" />
62 <conditional name="voc_source">
63 <param name="choice" type="select"
64 label="Source of lineage definitions"
65 help="Lineagespot detects lineage evidence based on lineage mutation patterns defined in tool-specific definition files. The Galaxy tool also has experimental support for reading lineage definitions from cached constellations data (see tool help below).">
66 <option value="custom">Collection of lineagespot-specific definitions in history</option>
67 <option value="cached">Lineage definitions from cached constellations data</option>
68 </param>
69 <when value="custom">
70 <param name="collection" type="data_collection" collection_type="list" format="tabular" label="Collection of lineage definitions" />
71 </when>
72 <when value="cached">
73 <param name="constellations" label="Cached constellations release" type="select">
74 <options from_data_table="pangolin_constellations">
75 <column name="value" index="0" />
76 <column name="description" index="1" />
77 <column name="date" index="3" />
78 <column name="path" index="4" />
79 <filter type="sort_by" column="3" />
80 <validator type="no_options" message="No cached constellations release available" />
81 </options>
82 </param>
83 </when>
84 </conditional>
85 <conditional name="ann_data">
86 <param name="choice" type="select" label="Source of genome feature annotations" help="Select built-in genome file to use built-in annotations for the SARS-CoV-2 reference sequence NC_045512.2.">
87 <option value="standard">Use built-in genome file</option>
88 <option value="custom">Provide custom genome file</option>
89 </param>
90 <when value="standard" />
91 <when value="custom">
92 <param name="in_gff3" type="data" format="gff3" label="Genes GFF" help="GFF3 input listing gene positions on the reference sequence" />
93 </when>
94 </conditional>
95 <param type="float" name="in_threshold" value="0.8" label="AF threshold for identifying variants per sample" help="Variants in the input VCFs with an allele frequency less than the threshold will be ignored as noise in the lineage analysis."/>
96 </inputs>
97 <outputs>
98 <data name="lineage_hits" format="tabular" from_work_dir="lineage_hits.txt" label="${tool.name} on ${on_string}: Lineage Hits"/>
99 <data name="lineage_report" format="tabular" from_work_dir="lineage_report.txt" label="${tool.name} on ${on_string}: Lineage Report"/>
100 <data name="variants_table" format="tabular" from_work_dir="variants_table.txt" label="${tool.name} on ${on_string}: Variants Table"/>
101 </outputs>
102 <tests>
103 <!-- test with custom genome annotation and lineage definitions -->
104 <test expect_num_outputs="3">
105 <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/>
106 <conditional name="ann_data">
107 <param name="choice" value="custom"/>
108 <param name="in_gff3" value="NC_045512.2_annot.gff3"/>
109 </conditional>
110 <conditional name="voc_source">
111 <param name="choice" value="custom"/>
112 <param name="collection">
113 <collection type="list">
114 <element name="AY.1" ftype="tabular" value="AY.1.txt"/>
115 <element name="B.1.617.2" ftype="tabular" value="B.1.617.2.txt"/>
116 <element name="B.1.351" ftype="tabular" value="B.1.351.txt"/>
117 <element name="P.1" ftype="tabular" value="P.1.txt"/>
118 </collection>
119 </param>
120 </conditional>
121 <output name="lineage_hits">
122 <assert_contents>
123 <has_n_lines n="208"/>
124 </assert_contents>
125 </output>
126 <output name="lineage_report">
127 <assert_contents>
128 <has_n_lines n="13"/>
129 </assert_contents>
130 </output>
131 <output name="variants_table">
132 <assert_contents>
133 <has_n_lines n="1829"/>
134 </assert_contents>
135 </output>
136 </test>
137 <!-- test with built-in genome annotation and cached constellations -->
138 <test expect_num_outputs="3">
139 <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/>
140 <conditional name="ann_data">
141 <param name="choice" value="standard"/>
142 </conditional>
143 <conditional name="voc_source">
144 <param name="choice" value="cached"/>
145 <param name="constellations" value="v0.1.12"/>
146 </conditional>
147 <output name="lineage_hits">
148 <assert_contents>
149 <has_n_lines n="123"/>
150 </assert_contents>
151 </output>
152 <output name="lineage_report">
153 <assert_contents>
154 <has_n_lines n="10"/>
155 </assert_contents>
156 </output>
157 <output name="variants_table">
158 <assert_contents>
159 <has_n_lines n="1829"/>
160 </assert_contents>
161 </output>
162 </test>
163 </tests>
164 <help><![CDATA[
165 **lineagespot** - Detection of SARS-CoV-2 lineages in wastewater samples using next-generation sequencing
166
167 The Galaxy tool wraps the functionality of the `lineagespot Bioconductor package <https://doi.org/doi:10.18129/B9.bioc.lineagespot>`__.
168
169 **Inputs**
170
171 *VCF datasets with per-sample variant calls*
172
173 The tool accepts any number of input datasets in VCF format, for which it will generate reports of SARS-CoV-2 lineage evidence. These datasets need to be annotated using the VCF annotation standard field 'ANN' as added, for example, by SnpEff.
174
175 *Lineage definitions*
176
177 The tool requires definitions of mutation profiles for lineages of interest (it cannot find evidence for lineages it does not know about). These can be provided as a collection of simple 2-columns tabular datasets, like this definition for lineage B.1.617.2::
178
179 gene amino acid
180 ORF1b P314L
181 ORF1b G662S
182 ORF1b P1000L
183 S T19R
184 S G142D
185 S E156G
186 S del157/158
187 S L452R
188 S T478K
189 S D614G
190 S P681R
191 S D950N
192 ORF3a S26L
193 M I82T
194 ORF7a V82A
195 ORF7a T120I
196 ORF8 D119I
197 ORF8 del120/121
198 N D63G
199 N R203M
200 N D377Y
201
202 where the gene names should match those used by the upstream tool producing the VCF ANN field.
203
204 Alternatively, lineage definitions can be extracted from `constellations <https://github.com/cov-lineages/constellations>`__ data cached on the Galaxy server.
205
206 .. class:: Warning mark
207
208 Please note that extraction of lineage definitions from constellations data is still **experimental**.
209 The conversion process may drop some lineage defining mutations and shouldn't be trusted blindly.
210 For full and up to date details see the `comments in the conversion script <https://github.com/search?q=repo%3Agalaxyproject%2Ftools-iuc+path%3A%2F%5Etools%5C%2Flineagespot%5C%2F%2F+convert_lineage_defs.py&type=code>`__.
211
212 *Genome feature annotations*
213
214 To learn about the position of the genes mentioned in VCF ANN fields and the lineage definitions on the genome, the tool requires an additional genome feature annotation input in gff3 format.
215 You should normally just use the offered built-in genome annotation file.
216 A custom annotation file would only be required if you've mapped your sequencing data to a different reference genome than NC_045512.2 that isn't positionally identical to it.
217 ]]></help>
218 <citations>
219 <citation type="doi">10.1038/s41598-022-06625-6</citation>
220 </citations>
221 </tool>