diff squirrel-phylo.xml @ 0:14936593e454 draft

planemo upload for repository https://github.com/aineniamh/squirrel commit ed19e40212d1e6651efb3a032d1170f4fd03b989
author iuc
date Thu, 16 Jan 2025 07:07:17 +0000
parents
children 153c1ee28c48
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/squirrel-phylo.xml	Thu Jan 16 07:07:17 2025 +0000
@@ -0,0 +1,211 @@
+<tool id="squirrel_phylo" name="Squirrel Phylo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>Phylogenetic and APOBEC3 analysis of MPXV (Mpox virus)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    
+    <command detect_errors="exit_code"><![CDATA[
+      #set $alignment_output = 'input.aln.fasta'
+      #set $tree_output = 'input.tree'
+
+      #if $apobec3
+        #set $aa_recon_output = "input.tree.amino_acid.reconstruction.csv"
+        #set $branch_snps_output = "input.tree.branch_snps.reconstruction.csv"
+        #set $svg_output = "input.tree.svg"
+        #set $png_output = "input.tree.png"
+      #end if
+
+      ln -s '${sequences}' input.fasta &&
+
+      squirrel
+      #if $apobec3
+        --run-apobec3-phylo
+        --fig-height $fig_height
+        --fig-width $fig_width
+      #else
+        --run-phylo
+      #end if
+
+      --clade $clade
+
+      #if $mask_file
+        --additional-mask $mask_file
+      #end if
+
+      #if $bg_file
+        --background-file '$bg_file'
+      #else
+        --include-background
+      #end if
+
+      #if $out_group
+        --outgroups $out_group
+      #end if
+
+      $no_mask
+      $no_iter_mask
+
+      --threads \${GALAXY_SLOTS:-1}
+      input.fasta &&
+
+      mv '${alignment_output}' '$alignment' &&
+      mv '${tree_output}' '$tree'
+
+      #if $apobec3
+        && mv '${aa_recon_output}' '$aa_recon' &&
+        mv '${branch_snps_output}' '$branch_snps' &&
+        mv '${svg_output}' '$svg' &&
+        mv '${png_output}' '$png'
+      #end if
+    ]]></command>
+
+    <inputs>
+        <param name="sequences"
+          type="data"
+          format="fasta"
+          label="Sequences in fasta format" 
+          help="You can upload a FASTA sequence to the history and use it as reference" />
+        <param name="apobec3"
+          type="boolean"
+          checked="false"
+          label="Run additional APOBEC3-mutation reconstruction pipeline" />
+        <param name="clade"
+          type="select"
+          label="Select MPXV Clade">
+          <option value="cladei">Clade I</option>
+          <option value="cladeia">Clade Ia</option>
+          <option value="cladeib">Clade Ib</option>
+          <option value="cladeii">Clade II</option>
+          <option value="cladeiia">Clade IIa</option>
+          <option value="cladeiib">Clade IIb</option>
+        </param>
+        <section name="other_settings" expanded="false" title="Additional Settings">
+          <param name="no_mask"
+            type="boolean" 
+            truevalue="--no-mask"
+            falsevalue=""
+            label="SKIP masking repeat regions?" 
+            help="Set to True to Skip masking of repetitive regions. Default: masks repeat regions." />
+          <param name="no_iter_mask"
+            type="boolean" 
+            truevalue="--no-itr-mask"
+            falsevalue=""
+            label="SKIP masking of end ITR?" 
+            help="Set to True to skip masking of end ITR. Default: masks ITR" />
+          <param name="mask_file"
+            type="data" 
+            format="csv"
+            optional="true"
+            label="Mask additional sites" 
+            help="Run squirrel in alignment with QC to generate the SNP mask file." />
+          <param name="bg_file" 
+            type="data"
+            format="fasta"
+            optional="true"
+            label="Background file - leave empty for automatic background sequences."
+            help="Include a default background set of sequences for the phylogenetics pipeline. The set will be determined by previous 'clade' setting"/>
+          <param name="out_group"
+            type="text"
+            label="Specify outgroup(s)"
+            help="Specify which MPXV outgroup(s) in the alignment to use in the phylogeny. These will get pruned out from the final tree."/>
+          <param name="fig_height"
+            label="Overwrite tree figure default height"
+            type="integer"
+            min="0"
+            value="25"
+            optional="true">
+          </param>
+          <param name="fig_width"
+            label="Overwrite tree figure default width"
+            type="integer"
+            min="0"
+            value="40"
+            optional="true">
+          </param>
+        </section>
+    </inputs>
+
+    <outputs>
+      <!-- standard outputs-->
+      <data name="tree" format="newick" label="${tool.name} - phylogenetic tree" />
+      <data name="alignment" format="fasta" label="${tool.name} - aligned sequences" />
+      <!-- apobec3 outputs-->
+      <data name="svg" format="svg" label="${tool.name} - phylotree svg image">
+          <filter>apobec3</filter>
+      </data>
+      <data name="png" format="png" label="${tool.name} - phylotree png image"> 
+        <filter>apobec3</filter>
+      </data>
+      <data name="aa_recon" format="png" label="${tool.name} - aa mutations ancestral reconstruction">
+        <filter>apobec3</filter>
+      </data>
+      <data name="branch_snps" format="png" label="${tool.name} - apobec3 nt mutations">
+        <filter>apobec3</filter>
+      </data>
+    </outputs>
+    
+    <tests>
+        <test expect_num_outputs="2">
+          <param name="sequences" value="test-sequences.fasta" />
+          <param name="bg_file" value="test-background.fasta" />
+          <param name="out_group" value="KJ642615" />
+          <param name="apobec3" value="false" />
+          <output name="alignment" file="sequences.aln.fasta" />
+          <output name="tree">
+              <assert_contents>
+                  <has_line_matching expression="#NEXUS"/>
+              </assert_contents>
+          </output>
+        </test>
+
+        <test expect_num_outputs="6">
+          <param name="sequences" value="test-sequences.fasta" />
+          <param name="bg_file" value="test-background.fasta" />
+          <param name="out_group" value="KJ642615" />
+          <param name="apobec3" value="true" />
+          <output name="alignment" file="sequences.aln.fasta" />
+          <output name="tree">
+              <assert_contents>
+                  <has_line_matching expression="#NEXUS"/>
+              </assert_contents>
+          </output>
+          <output name="svg">
+              <assert_contents>
+                  <has_text text="svg xmlns:"/>
+                  <has_text text="DQ011155"/>
+              </assert_contents>
+          </output>
+          <output name="png" file="sequences.tree.png" ftype="png" compare="sim_size" delta="1000" />
+          <output name="aa_recon" file="sequences.tree.amino_acid.reconstruction.csv" />
+          <output name="branch_snps" file="sequences.tree.branch_snps.reconstruction.csv" />
+        </test>
+
+    </tests>
+    <help><![CDATA[
+      squirrel allows for rapidly producing reliable alignments for MPXV and also enable maximum-likelihood phylogenetics pipeline tree estimation.
+
+      Ensure your input sequences are of a singular clade and not mixed CladeI/CladeII. CladeI and CladeIa/b are fine to combine.
+
+      **Alignment**
+      Squirrel maps each query genome in the input file against a reference genome specific to each clade using minimap2. Using gofasta, the mapping file is then converted into a multiple sequence alignment.
+
+      For Clade II, the reference used is NC_063383 and for Clade I, we use NC_003310. This means that all coordinates within an alignment will be relative to these references. A benefit of this is that within a clade, alignment files and be combined without having to recalculate the alignment. Note however that insertions relative to the reference sequence will not be included in the alignment.
+
+      Squirrel by default creates a single alignment fasta file. Using the genbank coordinates for NC_063383 it also has the ability to extract the aligned coding sequences either as separate records or as a concatenated alignment. This can facilitate codon-aware phylogenetic or sequence analysis.
+
+      **APOBEC3**
+      Enrichment of APOBEC3-mutations in the MPXV population are a signature of sustained human-to-human transmission. Identifying APOBEC3-like mutations in MPXV genomes from samples in a new outbreak can be a piece of evidence to support sustained human transmission of mpox. Squirrel can run an APOBEC3-reconstruction and map these mutations onto the phylogeny.
+
+      **Default Masking**
+      Squirrel performs masking (replacement with N) on low-complexity or repetitive regions that have been characterised for Clade I and II. These regions are defined in to_mask.cladeii.csv and to_mask.cladei.csv (see github: https://github.com/aineniamh/squirrel/blob/main/squirrel/data/).
+
+      **Additional Masking**
+      Additional mask file can be provided to mask sites in addition to default masking. To generate additional masking file, run the galaxy tool *squirrel-qc*
+
+
+    ]]></help>
+
+<expand macro="citations" />
+</tool>