Mercurial > repos > iuc > squirrel_phylo
diff squirrel-phylo.xml @ 0:14936593e454 draft
planemo upload for repository https://github.com/aineniamh/squirrel commit ed19e40212d1e6651efb3a032d1170f4fd03b989
| author | iuc |
|---|---|
| date | Thu, 16 Jan 2025 07:07:17 +0000 |
| parents | |
| children | 153c1ee28c48 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/squirrel-phylo.xml Thu Jan 16 07:07:17 2025 +0000 @@ -0,0 +1,211 @@ +<tool id="squirrel_phylo" name="Squirrel Phylo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> + <description>Phylogenetic and APOBEC3 analysis of MPXV (Mpox virus)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + + <command detect_errors="exit_code"><![CDATA[ + #set $alignment_output = 'input.aln.fasta' + #set $tree_output = 'input.tree' + + #if $apobec3 + #set $aa_recon_output = "input.tree.amino_acid.reconstruction.csv" + #set $branch_snps_output = "input.tree.branch_snps.reconstruction.csv" + #set $svg_output = "input.tree.svg" + #set $png_output = "input.tree.png" + #end if + + ln -s '${sequences}' input.fasta && + + squirrel + #if $apobec3 + --run-apobec3-phylo + --fig-height $fig_height + --fig-width $fig_width + #else + --run-phylo + #end if + + --clade $clade + + #if $mask_file + --additional-mask $mask_file + #end if + + #if $bg_file + --background-file '$bg_file' + #else + --include-background + #end if + + #if $out_group + --outgroups $out_group + #end if + + $no_mask + $no_iter_mask + + --threads \${GALAXY_SLOTS:-1} + input.fasta && + + mv '${alignment_output}' '$alignment' && + mv '${tree_output}' '$tree' + + #if $apobec3 + && mv '${aa_recon_output}' '$aa_recon' && + mv '${branch_snps_output}' '$branch_snps' && + mv '${svg_output}' '$svg' && + mv '${png_output}' '$png' + #end if + ]]></command> + + <inputs> + <param name="sequences" + type="data" + format="fasta" + label="Sequences in fasta format" + help="You can upload a FASTA sequence to the history and use it as reference" /> + <param name="apobec3" + type="boolean" + checked="false" + label="Run additional APOBEC3-mutation reconstruction pipeline" /> + <param name="clade" + type="select" + label="Select MPXV Clade"> + <option value="cladei">Clade I</option> + <option value="cladeia">Clade Ia</option> + <option value="cladeib">Clade Ib</option> + <option value="cladeii">Clade II</option> + <option value="cladeiia">Clade IIa</option> + <option value="cladeiib">Clade IIb</option> + </param> + <section name="other_settings" expanded="false" title="Additional Settings"> + <param name="no_mask" + type="boolean" + truevalue="--no-mask" + falsevalue="" + label="SKIP masking repeat regions?" + help="Set to True to Skip masking of repetitive regions. Default: masks repeat regions." /> + <param name="no_iter_mask" + type="boolean" + truevalue="--no-itr-mask" + falsevalue="" + label="SKIP masking of end ITR?" + help="Set to True to skip masking of end ITR. Default: masks ITR" /> + <param name="mask_file" + type="data" + format="csv" + optional="true" + label="Mask additional sites" + help="Run squirrel in alignment with QC to generate the SNP mask file." /> + <param name="bg_file" + type="data" + format="fasta" + optional="true" + label="Background file - leave empty for automatic background sequences." + help="Include a default background set of sequences for the phylogenetics pipeline. The set will be determined by previous 'clade' setting"/> + <param name="out_group" + type="text" + label="Specify outgroup(s)" + help="Specify which MPXV outgroup(s) in the alignment to use in the phylogeny. These will get pruned out from the final tree."/> + <param name="fig_height" + label="Overwrite tree figure default height" + type="integer" + min="0" + value="25" + optional="true"> + </param> + <param name="fig_width" + label="Overwrite tree figure default width" + type="integer" + min="0" + value="40" + optional="true"> + </param> + </section> + </inputs> + + <outputs> + <!-- standard outputs--> + <data name="tree" format="newick" label="${tool.name} - phylogenetic tree" /> + <data name="alignment" format="fasta" label="${tool.name} - aligned sequences" /> + <!-- apobec3 outputs--> + <data name="svg" format="svg" label="${tool.name} - phylotree svg image"> + <filter>apobec3</filter> + </data> + <data name="png" format="png" label="${tool.name} - phylotree png image"> + <filter>apobec3</filter> + </data> + <data name="aa_recon" format="png" label="${tool.name} - aa mutations ancestral reconstruction"> + <filter>apobec3</filter> + </data> + <data name="branch_snps" format="png" label="${tool.name} - apobec3 nt mutations"> + <filter>apobec3</filter> + </data> + </outputs> + + <tests> + <test expect_num_outputs="2"> + <param name="sequences" value="test-sequences.fasta" /> + <param name="bg_file" value="test-background.fasta" /> + <param name="out_group" value="KJ642615" /> + <param name="apobec3" value="false" /> + <output name="alignment" file="sequences.aln.fasta" /> + <output name="tree"> + <assert_contents> + <has_line_matching expression="#NEXUS"/> + </assert_contents> + </output> + </test> + + <test expect_num_outputs="6"> + <param name="sequences" value="test-sequences.fasta" /> + <param name="bg_file" value="test-background.fasta" /> + <param name="out_group" value="KJ642615" /> + <param name="apobec3" value="true" /> + <output name="alignment" file="sequences.aln.fasta" /> + <output name="tree"> + <assert_contents> + <has_line_matching expression="#NEXUS"/> + </assert_contents> + </output> + <output name="svg"> + <assert_contents> + <has_text text="svg xmlns:"/> + <has_text text="DQ011155"/> + </assert_contents> + </output> + <output name="png" file="sequences.tree.png" ftype="png" compare="sim_size" delta="1000" /> + <output name="aa_recon" file="sequences.tree.amino_acid.reconstruction.csv" /> + <output name="branch_snps" file="sequences.tree.branch_snps.reconstruction.csv" /> + </test> + + </tests> + <help><![CDATA[ + squirrel allows for rapidly producing reliable alignments for MPXV and also enable maximum-likelihood phylogenetics pipeline tree estimation. + + Ensure your input sequences are of a singular clade and not mixed CladeI/CladeII. CladeI and CladeIa/b are fine to combine. + + **Alignment** + Squirrel maps each query genome in the input file against a reference genome specific to each clade using minimap2. Using gofasta, the mapping file is then converted into a multiple sequence alignment. + + For Clade II, the reference used is NC_063383 and for Clade I, we use NC_003310. This means that all coordinates within an alignment will be relative to these references. A benefit of this is that within a clade, alignment files and be combined without having to recalculate the alignment. Note however that insertions relative to the reference sequence will not be included in the alignment. + + Squirrel by default creates a single alignment fasta file. Using the genbank coordinates for NC_063383 it also has the ability to extract the aligned coding sequences either as separate records or as a concatenated alignment. This can facilitate codon-aware phylogenetic or sequence analysis. + + **APOBEC3** + Enrichment of APOBEC3-mutations in the MPXV population are a signature of sustained human-to-human transmission. Identifying APOBEC3-like mutations in MPXV genomes from samples in a new outbreak can be a piece of evidence to support sustained human transmission of mpox. Squirrel can run an APOBEC3-reconstruction and map these mutations onto the phylogeny. + + **Default Masking** + Squirrel performs masking (replacement with N) on low-complexity or repetitive regions that have been characterised for Clade I and II. These regions are defined in to_mask.cladeii.csv and to_mask.cladei.csv (see github: https://github.com/aineniamh/squirrel/blob/main/squirrel/data/). + + **Additional Masking** + Additional mask file can be provided to mask sites in addition to default masking. To generate additional masking file, run the galaxy tool *squirrel-qc* + + + ]]></help> + +<expand macro="citations" /> +</tool>
