Mercurial > repos > iuc > squirrel_phylo
comparison squirrel-phylo.xml @ 0:14936593e454 draft
planemo upload for repository https://github.com/aineniamh/squirrel commit ed19e40212d1e6651efb3a032d1170f4fd03b989
| author | iuc |
|---|---|
| date | Thu, 16 Jan 2025 07:07:17 +0000 |
| parents | |
| children | 153c1ee28c48 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:14936593e454 |
|---|---|
| 1 <tool id="squirrel_phylo" name="Squirrel Phylo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | |
| 2 <description>Phylogenetic and APOBEC3 analysis of MPXV (Mpox virus)</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <expand macro="version_command"/> | |
| 8 | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 #set $alignment_output = 'input.aln.fasta' | |
| 11 #set $tree_output = 'input.tree' | |
| 12 | |
| 13 #if $apobec3 | |
| 14 #set $aa_recon_output = "input.tree.amino_acid.reconstruction.csv" | |
| 15 #set $branch_snps_output = "input.tree.branch_snps.reconstruction.csv" | |
| 16 #set $svg_output = "input.tree.svg" | |
| 17 #set $png_output = "input.tree.png" | |
| 18 #end if | |
| 19 | |
| 20 ln -s '${sequences}' input.fasta && | |
| 21 | |
| 22 squirrel | |
| 23 #if $apobec3 | |
| 24 --run-apobec3-phylo | |
| 25 --fig-height $fig_height | |
| 26 --fig-width $fig_width | |
| 27 #else | |
| 28 --run-phylo | |
| 29 #end if | |
| 30 | |
| 31 --clade $clade | |
| 32 | |
| 33 #if $mask_file | |
| 34 --additional-mask $mask_file | |
| 35 #end if | |
| 36 | |
| 37 #if $bg_file | |
| 38 --background-file '$bg_file' | |
| 39 #else | |
| 40 --include-background | |
| 41 #end if | |
| 42 | |
| 43 #if $out_group | |
| 44 --outgroups $out_group | |
| 45 #end if | |
| 46 | |
| 47 $no_mask | |
| 48 $no_iter_mask | |
| 49 | |
| 50 --threads \${GALAXY_SLOTS:-1} | |
| 51 input.fasta && | |
| 52 | |
| 53 mv '${alignment_output}' '$alignment' && | |
| 54 mv '${tree_output}' '$tree' | |
| 55 | |
| 56 #if $apobec3 | |
| 57 && mv '${aa_recon_output}' '$aa_recon' && | |
| 58 mv '${branch_snps_output}' '$branch_snps' && | |
| 59 mv '${svg_output}' '$svg' && | |
| 60 mv '${png_output}' '$png' | |
| 61 #end if | |
| 62 ]]></command> | |
| 63 | |
| 64 <inputs> | |
| 65 <param name="sequences" | |
| 66 type="data" | |
| 67 format="fasta" | |
| 68 label="Sequences in fasta format" | |
| 69 help="You can upload a FASTA sequence to the history and use it as reference" /> | |
| 70 <param name="apobec3" | |
| 71 type="boolean" | |
| 72 checked="false" | |
| 73 label="Run additional APOBEC3-mutation reconstruction pipeline" /> | |
| 74 <param name="clade" | |
| 75 type="select" | |
| 76 label="Select MPXV Clade"> | |
| 77 <option value="cladei">Clade I</option> | |
| 78 <option value="cladeia">Clade Ia</option> | |
| 79 <option value="cladeib">Clade Ib</option> | |
| 80 <option value="cladeii">Clade II</option> | |
| 81 <option value="cladeiia">Clade IIa</option> | |
| 82 <option value="cladeiib">Clade IIb</option> | |
| 83 </param> | |
| 84 <section name="other_settings" expanded="false" title="Additional Settings"> | |
| 85 <param name="no_mask" | |
| 86 type="boolean" | |
| 87 truevalue="--no-mask" | |
| 88 falsevalue="" | |
| 89 label="SKIP masking repeat regions?" | |
| 90 help="Set to True to Skip masking of repetitive regions. Default: masks repeat regions." /> | |
| 91 <param name="no_iter_mask" | |
| 92 type="boolean" | |
| 93 truevalue="--no-itr-mask" | |
| 94 falsevalue="" | |
| 95 label="SKIP masking of end ITR?" | |
| 96 help="Set to True to skip masking of end ITR. Default: masks ITR" /> | |
| 97 <param name="mask_file" | |
| 98 type="data" | |
| 99 format="csv" | |
| 100 optional="true" | |
| 101 label="Mask additional sites" | |
| 102 help="Run squirrel in alignment with QC to generate the SNP mask file." /> | |
| 103 <param name="bg_file" | |
| 104 type="data" | |
| 105 format="fasta" | |
| 106 optional="true" | |
| 107 label="Background file - leave empty for automatic background sequences." | |
| 108 help="Include a default background set of sequences for the phylogenetics pipeline. The set will be determined by previous 'clade' setting"/> | |
| 109 <param name="out_group" | |
| 110 type="text" | |
| 111 label="Specify outgroup(s)" | |
| 112 help="Specify which MPXV outgroup(s) in the alignment to use in the phylogeny. These will get pruned out from the final tree."/> | |
| 113 <param name="fig_height" | |
| 114 label="Overwrite tree figure default height" | |
| 115 type="integer" | |
| 116 min="0" | |
| 117 value="25" | |
| 118 optional="true"> | |
| 119 </param> | |
| 120 <param name="fig_width" | |
| 121 label="Overwrite tree figure default width" | |
| 122 type="integer" | |
| 123 min="0" | |
| 124 value="40" | |
| 125 optional="true"> | |
| 126 </param> | |
| 127 </section> | |
| 128 </inputs> | |
| 129 | |
| 130 <outputs> | |
| 131 <!-- standard outputs--> | |
| 132 <data name="tree" format="newick" label="${tool.name} - phylogenetic tree" /> | |
| 133 <data name="alignment" format="fasta" label="${tool.name} - aligned sequences" /> | |
| 134 <!-- apobec3 outputs--> | |
| 135 <data name="svg" format="svg" label="${tool.name} - phylotree svg image"> | |
| 136 <filter>apobec3</filter> | |
| 137 </data> | |
| 138 <data name="png" format="png" label="${tool.name} - phylotree png image"> | |
| 139 <filter>apobec3</filter> | |
| 140 </data> | |
| 141 <data name="aa_recon" format="png" label="${tool.name} - aa mutations ancestral reconstruction"> | |
| 142 <filter>apobec3</filter> | |
| 143 </data> | |
| 144 <data name="branch_snps" format="png" label="${tool.name} - apobec3 nt mutations"> | |
| 145 <filter>apobec3</filter> | |
| 146 </data> | |
| 147 </outputs> | |
| 148 | |
| 149 <tests> | |
| 150 <test expect_num_outputs="2"> | |
| 151 <param name="sequences" value="test-sequences.fasta" /> | |
| 152 <param name="bg_file" value="test-background.fasta" /> | |
| 153 <param name="out_group" value="KJ642615" /> | |
| 154 <param name="apobec3" value="false" /> | |
| 155 <output name="alignment" file="sequences.aln.fasta" /> | |
| 156 <output name="tree"> | |
| 157 <assert_contents> | |
| 158 <has_line_matching expression="#NEXUS"/> | |
| 159 </assert_contents> | |
| 160 </output> | |
| 161 </test> | |
| 162 | |
| 163 <test expect_num_outputs="6"> | |
| 164 <param name="sequences" value="test-sequences.fasta" /> | |
| 165 <param name="bg_file" value="test-background.fasta" /> | |
| 166 <param name="out_group" value="KJ642615" /> | |
| 167 <param name="apobec3" value="true" /> | |
| 168 <output name="alignment" file="sequences.aln.fasta" /> | |
| 169 <output name="tree"> | |
| 170 <assert_contents> | |
| 171 <has_line_matching expression="#NEXUS"/> | |
| 172 </assert_contents> | |
| 173 </output> | |
| 174 <output name="svg"> | |
| 175 <assert_contents> | |
| 176 <has_text text="svg xmlns:"/> | |
| 177 <has_text text="DQ011155"/> | |
| 178 </assert_contents> | |
| 179 </output> | |
| 180 <output name="png" file="sequences.tree.png" ftype="png" compare="sim_size" delta="1000" /> | |
| 181 <output name="aa_recon" file="sequences.tree.amino_acid.reconstruction.csv" /> | |
| 182 <output name="branch_snps" file="sequences.tree.branch_snps.reconstruction.csv" /> | |
| 183 </test> | |
| 184 | |
| 185 </tests> | |
| 186 <help><![CDATA[ | |
| 187 squirrel allows for rapidly producing reliable alignments for MPXV and also enable maximum-likelihood phylogenetics pipeline tree estimation. | |
| 188 | |
| 189 Ensure your input sequences are of a singular clade and not mixed CladeI/CladeII. CladeI and CladeIa/b are fine to combine. | |
| 190 | |
| 191 **Alignment** | |
| 192 Squirrel maps each query genome in the input file against a reference genome specific to each clade using minimap2. Using gofasta, the mapping file is then converted into a multiple sequence alignment. | |
| 193 | |
| 194 For Clade II, the reference used is NC_063383 and for Clade I, we use NC_003310. This means that all coordinates within an alignment will be relative to these references. A benefit of this is that within a clade, alignment files and be combined without having to recalculate the alignment. Note however that insertions relative to the reference sequence will not be included in the alignment. | |
| 195 | |
| 196 Squirrel by default creates a single alignment fasta file. Using the genbank coordinates for NC_063383 it also has the ability to extract the aligned coding sequences either as separate records or as a concatenated alignment. This can facilitate codon-aware phylogenetic or sequence analysis. | |
| 197 | |
| 198 **APOBEC3** | |
| 199 Enrichment of APOBEC3-mutations in the MPXV population are a signature of sustained human-to-human transmission. Identifying APOBEC3-like mutations in MPXV genomes from samples in a new outbreak can be a piece of evidence to support sustained human transmission of mpox. Squirrel can run an APOBEC3-reconstruction and map these mutations onto the phylogeny. | |
| 200 | |
| 201 **Default Masking** | |
| 202 Squirrel performs masking (replacement with N) on low-complexity or repetitive regions that have been characterised for Clade I and II. These regions are defined in to_mask.cladeii.csv and to_mask.cladei.csv (see github: https://github.com/aineniamh/squirrel/blob/main/squirrel/data/). | |
| 203 | |
| 204 **Additional Masking** | |
| 205 Additional mask file can be provided to mask sites in addition to default masking. To generate additional masking file, run the galaxy tool *squirrel-qc* | |
| 206 | |
| 207 | |
| 208 ]]></help> | |
| 209 | |
| 210 <expand macro="citations" /> | |
| 211 </tool> |
