Mercurial > repos > iuc > lineagespot
comparison lineagespot_wrapper.xml @ 0:5e8505f27681 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lineagespot commit 0bc6ed15054577af1089d55ef9aa1071d122eb6b
| author | iuc |
|---|---|
| date | Tue, 08 Aug 2023 15:11:47 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5e8505f27681 |
|---|---|
| 1 <tool id="lineagespot" name="lineagespot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09"> | |
| 2 <description>identifies SARS-CoV-2 lineages contributing to metagenomic samples from per-sample variant files</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">1.4.0</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 </macros> | |
| 7 <xrefs> | |
| 8 <xref type="bio.tools">lineagespot</xref> | |
| 9 <xref type="bioconductor">lineagespot</xref> | |
| 10 </xrefs> | |
| 11 <requirements> | |
| 12 <requirement type="package" version="4.3.1">r-base</requirement> | |
| 13 <requirement type="package" version="1.20.3">r-getopt</requirement> | |
| 14 <requirement type="package" version="@TOOL_VERSION@">bioconductor-lineagespot</requirement> | |
| 15 <requirement type="package" version="3.11">python</requirement> | |
| 16 </requirements> | |
| 17 <command detect_errors="exit_code"><![CDATA[ | |
| 18 ## Prepare lineage definitions | |
| 19 mkdir refs && | |
| 20 #if str($voc_source.choice) == 'cached': | |
| 21 python '${__tool_directory__}/convert_lineage_defs.py' -i '${voc_source.constellations.fields.path}/definitions' -o refs && | |
| 22 #end if | |
| 23 | |
| 24 ## Symlink the VCFs of all samples into a vcfs folder | |
| 25 ## and all custom lineage definitions into refs/ | |
| 26 sh arrange_custom_inputs.sh && | |
| 27 | |
| 28 #if str($ann_data.choice) == 'custom': | |
| 29 ## lineagespot insists on a .gff3 suffix for the annotation file | |
| 30 ln -s '$ann_data.in_gff3' custom.gff3 && | |
| 31 #end if | |
| 32 | |
| 33 Rscript '${__tool_directory__}/lineagespot_verbose.R' | |
| 34 --in_vcf vcfs/ | |
| 35 --in_ref refs/ | |
| 36 #if str($ann_data.choice) == 'standard': | |
| 37 --in_gff3 '${__tool_directory__}/NC_045512.2_annot.gff3' | |
| 38 #else: | |
| 39 --in_gff3 custom.gff3 | |
| 40 #end if | |
| 41 --in_threshold $in_threshold | |
| 42 ]]></command> | |
| 43 <configfiles> | |
| 44 <configfile filename="arrange_custom_inputs.sh"><![CDATA[mkdir vcfs && | |
| 45 #for $vcf in $in_vcf: | |
| 46 #set $sample_name = $vcf.element_identifier.replace("'", '_').replace('/', '_') | |
| 47 ln -s '$vcf' 'vcfs/${sample_name}.vcf' && | |
| 48 #end for | |
| 49 #set $num_samples = len($in_vcf) | |
| 50 echo "Gathered $num_samples samples for lineagespot run." | |
| 51 #if str($voc_source.choice) == 'custom': | |
| 52 #for $lineage_def in $voc_source.collection: | |
| 53 #set $lineage_name = $lineage_def.element_identifier.replace("'", '_').replace('/', '_') | |
| 54 ln -s '$lineage_def' 'refs/${lineage_name}.txt' && | |
| 55 #set $num_lineages = len(list($voc_source.collection)) | |
| 56 #end for | |
| 57 echo "Gathered $num_lineages custom lineage definitions for the analysis." | |
| 58 #end if]]></configfile> | |
| 59 </configfiles> | |
| 60 <inputs> | |
| 61 <param type="data" name="in_vcf" format="vcf" multiple="true" label="Per-sample variant calling data" /> | |
| 62 <conditional name="voc_source"> | |
| 63 <param name="choice" type="select" | |
| 64 label="Source of lineage definitions" | |
| 65 help="Lineagespot detects lineage evidence based on lineage mutation patterns defined in tool-specific definition files. The Galaxy tool also has experimental support for reading lineage definitions from cached constellations data (see tool help below)."> | |
| 66 <option value="custom">Collection of lineagespot-specific definitions in history</option> | |
| 67 <option value="cached">Lineage definitions from cached constellations data</option> | |
| 68 </param> | |
| 69 <when value="custom"> | |
| 70 <param name="collection" type="data_collection" collection_type="list" format="tabular" label="Collection of lineage definitions" /> | |
| 71 </when> | |
| 72 <when value="cached"> | |
| 73 <param name="constellations" label="Cached constellations release" type="select"> | |
| 74 <options from_data_table="pangolin_constellations"> | |
| 75 <column name="value" index="0" /> | |
| 76 <column name="description" index="1" /> | |
| 77 <column name="date" index="3" /> | |
| 78 <column name="path" index="4" /> | |
| 79 <filter type="sort_by" column="3" /> | |
| 80 <validator type="no_options" message="No cached constellations release available" /> | |
| 81 </options> | |
| 82 </param> | |
| 83 </when> | |
| 84 </conditional> | |
| 85 <conditional name="ann_data"> | |
| 86 <param name="choice" type="select" label="Source of genome feature annotations" help="Select built-in genome file to use built-in annotations for the SARS-CoV-2 reference sequence NC_045512.2."> | |
| 87 <option value="standard">Use built-in genome file</option> | |
| 88 <option value="custom">Provide custom genome file</option> | |
| 89 </param> | |
| 90 <when value="standard" /> | |
| 91 <when value="custom"> | |
| 92 <param name="in_gff3" type="data" format="gff3" label="Genes GFF" help="GFF3 input listing gene positions on the reference sequence" /> | |
| 93 </when> | |
| 94 </conditional> | |
| 95 <param type="float" name="in_threshold" value="0.8" label="AF threshold for identifying variants per sample" help="Variants in the input VCFs with an allele frequency less than the threshold will be ignored as noise in the lineage analysis."/> | |
| 96 </inputs> | |
| 97 <outputs> | |
| 98 <data name="lineage_hits" format="tabular" from_work_dir="lineage_hits.txt" label="${tool.name} on ${on_string}: Lineage Hits"/> | |
| 99 <data name="lineage_report" format="tabular" from_work_dir="lineage_report.txt" label="${tool.name} on ${on_string}: Lineage Report"/> | |
| 100 <data name="variants_table" format="tabular" from_work_dir="variants_table.txt" label="${tool.name} on ${on_string}: Variants Table"/> | |
| 101 </outputs> | |
| 102 <tests> | |
| 103 <!-- test with custom genome annotation and lineage definitions --> | |
| 104 <test expect_num_outputs="3"> | |
| 105 <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/> | |
| 106 <conditional name="ann_data"> | |
| 107 <param name="choice" value="custom"/> | |
| 108 <param name="in_gff3" value="NC_045512.2_annot.gff3"/> | |
| 109 </conditional> | |
| 110 <conditional name="voc_source"> | |
| 111 <param name="choice" value="custom"/> | |
| 112 <param name="collection"> | |
| 113 <collection type="list"> | |
| 114 <element name="AY.1" ftype="tabular" value="AY.1.txt"/> | |
| 115 <element name="B.1.617.2" ftype="tabular" value="B.1.617.2.txt"/> | |
| 116 <element name="B.1.351" ftype="tabular" value="B.1.351.txt"/> | |
| 117 <element name="P.1" ftype="tabular" value="P.1.txt"/> | |
| 118 </collection> | |
| 119 </param> | |
| 120 </conditional> | |
| 121 <output name="lineage_hits"> | |
| 122 <assert_contents> | |
| 123 <has_n_lines n="208"/> | |
| 124 </assert_contents> | |
| 125 </output> | |
| 126 <output name="lineage_report"> | |
| 127 <assert_contents> | |
| 128 <has_n_lines n="13"/> | |
| 129 </assert_contents> | |
| 130 </output> | |
| 131 <output name="variants_table"> | |
| 132 <assert_contents> | |
| 133 <has_n_lines n="1829"/> | |
| 134 </assert_contents> | |
| 135 </output> | |
| 136 </test> | |
| 137 <!-- test with built-in genome annotation and cached constellations --> | |
| 138 <test expect_num_outputs="3"> | |
| 139 <param name="in_vcf" value="SampleA_freebayes_ann.vcf,SampleB_freebayes_ann.vcf,SampleC_freebayes_ann.vcf"/> | |
| 140 <conditional name="ann_data"> | |
| 141 <param name="choice" value="standard"/> | |
| 142 </conditional> | |
| 143 <conditional name="voc_source"> | |
| 144 <param name="choice" value="cached"/> | |
| 145 <param name="constellations" value="v0.1.12"/> | |
| 146 </conditional> | |
| 147 <output name="lineage_hits"> | |
| 148 <assert_contents> | |
| 149 <has_n_lines n="123"/> | |
| 150 </assert_contents> | |
| 151 </output> | |
| 152 <output name="lineage_report"> | |
| 153 <assert_contents> | |
| 154 <has_n_lines n="10"/> | |
| 155 </assert_contents> | |
| 156 </output> | |
| 157 <output name="variants_table"> | |
| 158 <assert_contents> | |
| 159 <has_n_lines n="1829"/> | |
| 160 </assert_contents> | |
| 161 </output> | |
| 162 </test> | |
| 163 </tests> | |
| 164 <help><![CDATA[ | |
| 165 **lineagespot** - Detection of SARS-CoV-2 lineages in wastewater samples using next-generation sequencing | |
| 166 | |
| 167 The Galaxy tool wraps the functionality of the `lineagespot Bioconductor package <https://doi.org/doi:10.18129/B9.bioc.lineagespot>`__. | |
| 168 | |
| 169 **Inputs** | |
| 170 | |
| 171 *VCF datasets with per-sample variant calls* | |
| 172 | |
| 173 The tool accepts any number of input datasets in VCF format, for which it will generate reports of SARS-CoV-2 lineage evidence. These datasets need to be annotated using the VCF annotation standard field 'ANN' as added, for example, by SnpEff. | |
| 174 | |
| 175 *Lineage definitions* | |
| 176 | |
| 177 The tool requires definitions of mutation profiles for lineages of interest (it cannot find evidence for lineages it does not know about). These can be provided as a collection of simple 2-columns tabular datasets, like this definition for lineage B.1.617.2:: | |
| 178 | |
| 179 gene amino acid | |
| 180 ORF1b P314L | |
| 181 ORF1b G662S | |
| 182 ORF1b P1000L | |
| 183 S T19R | |
| 184 S G142D | |
| 185 S E156G | |
| 186 S del157/158 | |
| 187 S L452R | |
| 188 S T478K | |
| 189 S D614G | |
| 190 S P681R | |
| 191 S D950N | |
| 192 ORF3a S26L | |
| 193 M I82T | |
| 194 ORF7a V82A | |
| 195 ORF7a T120I | |
| 196 ORF8 D119I | |
| 197 ORF8 del120/121 | |
| 198 N D63G | |
| 199 N R203M | |
| 200 N D377Y | |
| 201 | |
| 202 where the gene names should match those used by the upstream tool producing the VCF ANN field. | |
| 203 | |
| 204 Alternatively, lineage definitions can be extracted from `constellations <https://github.com/cov-lineages/constellations>`__ data cached on the Galaxy server. | |
| 205 | |
| 206 .. class:: Warning mark | |
| 207 | |
| 208 Please note that extraction of lineage definitions from constellations data is still **experimental**. | |
| 209 The conversion process may drop some lineage defining mutations and shouldn't be trusted blindly. | |
| 210 For full and up to date details see the `comments in the conversion script <https://github.com/search?q=repo%3Agalaxyproject%2Ftools-iuc+path%3A%2F%5Etools%5C%2Flineagespot%5C%2F%2F+convert_lineage_defs.py&type=code>`__. | |
| 211 | |
| 212 *Genome feature annotations* | |
| 213 | |
| 214 To learn about the position of the genes mentioned in VCF ANN fields and the lineage definitions on the genome, the tool requires an additional genome feature annotation input in gff3 format. | |
| 215 You should normally just use the offered built-in genome annotation file. | |
| 216 A custom annotation file would only be required if you've mapped your sequencing data to a different reference genome than NC_045512.2 that isn't positionally identical to it. | |
| 217 ]]></help> | |
| 218 <citations> | |
| 219 <citation type="doi">10.1038/s41598-022-06625-6</citation> | |
| 220 </citations> | |
| 221 </tool> |
