comparison squirrel-phylo.xml @ 0:14936593e454 draft

planemo upload for repository https://github.com/aineniamh/squirrel commit ed19e40212d1e6651efb3a032d1170f4fd03b989
author iuc
date Thu, 16 Jan 2025 07:07:17 +0000
parents
children 153c1ee28c48
comparison
equal deleted inserted replaced
-1:000000000000 0:14936593e454
1 <tool id="squirrel_phylo" name="Squirrel Phylo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>Phylogenetic and APOBEC3 analysis of MPXV (Mpox virus)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_command"/>
8
9 <command detect_errors="exit_code"><![CDATA[
10 #set $alignment_output = 'input.aln.fasta'
11 #set $tree_output = 'input.tree'
12
13 #if $apobec3
14 #set $aa_recon_output = "input.tree.amino_acid.reconstruction.csv"
15 #set $branch_snps_output = "input.tree.branch_snps.reconstruction.csv"
16 #set $svg_output = "input.tree.svg"
17 #set $png_output = "input.tree.png"
18 #end if
19
20 ln -s '${sequences}' input.fasta &&
21
22 squirrel
23 #if $apobec3
24 --run-apobec3-phylo
25 --fig-height $fig_height
26 --fig-width $fig_width
27 #else
28 --run-phylo
29 #end if
30
31 --clade $clade
32
33 #if $mask_file
34 --additional-mask $mask_file
35 #end if
36
37 #if $bg_file
38 --background-file '$bg_file'
39 #else
40 --include-background
41 #end if
42
43 #if $out_group
44 --outgroups $out_group
45 #end if
46
47 $no_mask
48 $no_iter_mask
49
50 --threads \${GALAXY_SLOTS:-1}
51 input.fasta &&
52
53 mv '${alignment_output}' '$alignment' &&
54 mv '${tree_output}' '$tree'
55
56 #if $apobec3
57 && mv '${aa_recon_output}' '$aa_recon' &&
58 mv '${branch_snps_output}' '$branch_snps' &&
59 mv '${svg_output}' '$svg' &&
60 mv '${png_output}' '$png'
61 #end if
62 ]]></command>
63
64 <inputs>
65 <param name="sequences"
66 type="data"
67 format="fasta"
68 label="Sequences in fasta format"
69 help="You can upload a FASTA sequence to the history and use it as reference" />
70 <param name="apobec3"
71 type="boolean"
72 checked="false"
73 label="Run additional APOBEC3-mutation reconstruction pipeline" />
74 <param name="clade"
75 type="select"
76 label="Select MPXV Clade">
77 <option value="cladei">Clade I</option>
78 <option value="cladeia">Clade Ia</option>
79 <option value="cladeib">Clade Ib</option>
80 <option value="cladeii">Clade II</option>
81 <option value="cladeiia">Clade IIa</option>
82 <option value="cladeiib">Clade IIb</option>
83 </param>
84 <section name="other_settings" expanded="false" title="Additional Settings">
85 <param name="no_mask"
86 type="boolean"
87 truevalue="--no-mask"
88 falsevalue=""
89 label="SKIP masking repeat regions?"
90 help="Set to True to Skip masking of repetitive regions. Default: masks repeat regions." />
91 <param name="no_iter_mask"
92 type="boolean"
93 truevalue="--no-itr-mask"
94 falsevalue=""
95 label="SKIP masking of end ITR?"
96 help="Set to True to skip masking of end ITR. Default: masks ITR" />
97 <param name="mask_file"
98 type="data"
99 format="csv"
100 optional="true"
101 label="Mask additional sites"
102 help="Run squirrel in alignment with QC to generate the SNP mask file." />
103 <param name="bg_file"
104 type="data"
105 format="fasta"
106 optional="true"
107 label="Background file - leave empty for automatic background sequences."
108 help="Include a default background set of sequences for the phylogenetics pipeline. The set will be determined by previous 'clade' setting"/>
109 <param name="out_group"
110 type="text"
111 label="Specify outgroup(s)"
112 help="Specify which MPXV outgroup(s) in the alignment to use in the phylogeny. These will get pruned out from the final tree."/>
113 <param name="fig_height"
114 label="Overwrite tree figure default height"
115 type="integer"
116 min="0"
117 value="25"
118 optional="true">
119 </param>
120 <param name="fig_width"
121 label="Overwrite tree figure default width"
122 type="integer"
123 min="0"
124 value="40"
125 optional="true">
126 </param>
127 </section>
128 </inputs>
129
130 <outputs>
131 <!-- standard outputs-->
132 <data name="tree" format="newick" label="${tool.name} - phylogenetic tree" />
133 <data name="alignment" format="fasta" label="${tool.name} - aligned sequences" />
134 <!-- apobec3 outputs-->
135 <data name="svg" format="svg" label="${tool.name} - phylotree svg image">
136 <filter>apobec3</filter>
137 </data>
138 <data name="png" format="png" label="${tool.name} - phylotree png image">
139 <filter>apobec3</filter>
140 </data>
141 <data name="aa_recon" format="png" label="${tool.name} - aa mutations ancestral reconstruction">
142 <filter>apobec3</filter>
143 </data>
144 <data name="branch_snps" format="png" label="${tool.name} - apobec3 nt mutations">
145 <filter>apobec3</filter>
146 </data>
147 </outputs>
148
149 <tests>
150 <test expect_num_outputs="2">
151 <param name="sequences" value="test-sequences.fasta" />
152 <param name="bg_file" value="test-background.fasta" />
153 <param name="out_group" value="KJ642615" />
154 <param name="apobec3" value="false" />
155 <output name="alignment" file="sequences.aln.fasta" />
156 <output name="tree">
157 <assert_contents>
158 <has_line_matching expression="#NEXUS"/>
159 </assert_contents>
160 </output>
161 </test>
162
163 <test expect_num_outputs="6">
164 <param name="sequences" value="test-sequences.fasta" />
165 <param name="bg_file" value="test-background.fasta" />
166 <param name="out_group" value="KJ642615" />
167 <param name="apobec3" value="true" />
168 <output name="alignment" file="sequences.aln.fasta" />
169 <output name="tree">
170 <assert_contents>
171 <has_line_matching expression="#NEXUS"/>
172 </assert_contents>
173 </output>
174 <output name="svg">
175 <assert_contents>
176 <has_text text="svg xmlns:"/>
177 <has_text text="DQ011155"/>
178 </assert_contents>
179 </output>
180 <output name="png" file="sequences.tree.png" ftype="png" compare="sim_size" delta="1000" />
181 <output name="aa_recon" file="sequences.tree.amino_acid.reconstruction.csv" />
182 <output name="branch_snps" file="sequences.tree.branch_snps.reconstruction.csv" />
183 </test>
184
185 </tests>
186 <help><![CDATA[
187 squirrel allows for rapidly producing reliable alignments for MPXV and also enable maximum-likelihood phylogenetics pipeline tree estimation.
188
189 Ensure your input sequences are of a singular clade and not mixed CladeI/CladeII. CladeI and CladeIa/b are fine to combine.
190
191 **Alignment**
192 Squirrel maps each query genome in the input file against a reference genome specific to each clade using minimap2. Using gofasta, the mapping file is then converted into a multiple sequence alignment.
193
194 For Clade II, the reference used is NC_063383 and for Clade I, we use NC_003310. This means that all coordinates within an alignment will be relative to these references. A benefit of this is that within a clade, alignment files and be combined without having to recalculate the alignment. Note however that insertions relative to the reference sequence will not be included in the alignment.
195
196 Squirrel by default creates a single alignment fasta file. Using the genbank coordinates for NC_063383 it also has the ability to extract the aligned coding sequences either as separate records or as a concatenated alignment. This can facilitate codon-aware phylogenetic or sequence analysis.
197
198 **APOBEC3**
199 Enrichment of APOBEC3-mutations in the MPXV population are a signature of sustained human-to-human transmission. Identifying APOBEC3-like mutations in MPXV genomes from samples in a new outbreak can be a piece of evidence to support sustained human transmission of mpox. Squirrel can run an APOBEC3-reconstruction and map these mutations onto the phylogeny.
200
201 **Default Masking**
202 Squirrel performs masking (replacement with N) on low-complexity or repetitive regions that have been characterised for Clade I and II. These regions are defined in to_mask.cladeii.csv and to_mask.cladei.csv (see github: https://github.com/aineniamh/squirrel/blob/main/squirrel/data/).
203
204 **Additional Masking**
205 Additional mask file can be provided to mask sites in addition to default masking. To generate additional masking file, run the galaxy tool *squirrel-qc*
206
207
208 ]]></help>
209
210 <expand macro="citations" />
211 </tool>