comparison macros.xml @ 0:0e7bd9c4dd43 draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/phabox commit 52385539f64c4e46c2e8953588efa3ea01bb99fd
author ufz
date Wed, 16 Apr 2025 09:42:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0e7bd9c4dd43
1 <macros>
2 <token name="@TOOL_VERSION@">2.1.11</token>
3 <token name="@VERSION_SUFFIX@">0</token>
4 <xml name="citations">
5 <citations>
6 <citation type="doi">10.1093/bioadv/vbad101</citation>
7 <yield/>
8 </citations>
9 </xml>
10
11 <xml name="general">
12 <param argument="--dbdir" type="select" label="Phabox2 database">
13 <options from_data_table="phabox"/>
14 </param>
15 <param argument="--contigs" type="data" format="fasta" optional="false" label="Contig sequences"/>
16 <param argument="--proteins" type="data" format="fasta" optional="true" label="Predicted proteins"/>
17 <param argument="--len" type="integer" value="3000" min="0" label="Minimum contig length" help="Contigs with length smaller than this value will not proceed"/>
18 </xml>
19 <token name="@GENERAL@"><![CDATA[
20 --dbdir '$dbdir.fields.path'
21 --outpth output/
22 --contigs '$contigs'
23 #if $proteins
24 --proteins '$proteins'
25 #end if
26 --midfolder intermediate/
27 --len $len
28 --threads "\${GALAXY_SLOTS:-1}"
29 ]]></token>
30
31
32 <xml name="phamer">
33 <section name="phamer" title="Options for virus identification" help="">
34 <param argument="--reject" type="float" value="10" min="0" max="20" label="Minimum known proteins percentage" help="Reject sequences in which the percent proteins aligned to known phages is smaller than the value"/>
35 </section>
36 </xml>
37 <token name="@PHAMER@"><![CDATA[
38 --reject $phamer.reject
39 ]]></token>
40
41
42 <xml name="network">
43 <section name="network" title="Options for virus-virus connections" help="The options below are used to generate a network for virus-virus connections. The current parameters are optimized for the ICTV 2024 and are highly accurate for grouping genus-level vOTUs. When making changes, make sure you understand what they are.">
44 <param argument="--aai" type="float" value="75" min="0" max="100" label="Average amino acids identity"/>
45 <param argument="--share" type="float" value="15" min="0" max="100" label="Minimum shared number of proteins"/>
46 <param argument="--pcov" type="float" value="80" min="0" max="100" label="Protein-based coverage"/>
47 <!-- \-\-draw not recommended to be used according to CLI help -->
48 </section>
49 </xml>
50 <token name="@NETWORK@"><![CDATA[
51 --aai $network.aai
52 --share $network.share
53 --pcov $network.pcov
54 ]]></token>
55
56 <xml name="crispr">
57 <section name="crispr" title="Options used to predict CRISPRs based on MAGs" help="">
58 <param argument="--bfolder" type="data" format="fasta" optional="true" label="MAGS"/>
59 <param argument="--prophage" type="integer" value="1000" min="0" max="100000" label="Minimum alignment length for estimate potential prophage"/>
60 <param argument="--cpident" type="float" value="90" min="90" max="100" label="Alignment identity for CRISPRs"/>
61 <param argument="--ccov" type="float" value="90" min="0" max="100" label="Alignment coverage for CRISPRs"/>
62 <param argument="--blast" type="select" label="BLAST program for CRISPRs" help="blastn-short will lead to more sensitive results but require more time to execute the program">
63 <option value="blastn">blastn</option>
64 <option value="blastn-short">blastn-short</option>
65 </param>
66 <param argument="--magonly" type="boolean" truevalue="--magonly Y" falsevalue="--magonly N" label="Only predicting host based on the provided MAGs" help="Default is to predict the host based on the MAGs and the reference database"/>
67 </section>
68 </xml>
69 <token name="@CRISPR_PRE@"><![CDATA[
70 #if $crispr.bfolder
71 mkdir bfolder &&
72 #for b in $crispr.bfolder
73 #set bname = re.sub('[^\w\-_\.]', '_', $b.element_identifier)
74 ln -s '$b' '$bname' &&
75 #end for
76 #end if
77 ]]></token>
78 <token name="@CRISPR@"><![CDATA[
79 #if $crispr.bfolder
80 --bfolder bfolder
81 #end if
82 --prophage $crispr.prophage
83 --cpident $crispr.cpident
84 --ccov $crispr.ccov
85 --blast $crispr.blast
86 $magonly
87 ]]></token>
88
89 <xml name="contamination">
90 <section name="contamination" title="Options for contamination detection" help="">
91 <param argument="--sensitive" type="boolean" truevalue="--sensitive Y" falsevalue="--sensitive N" label="Sensitive search for prokaryotic genes" help="Enabling this will lead to more sensitive results but require more time to execute the program"/>
92 </section>
93 </xml>
94 <token name="@CONTAMINATION@"><![CDATA[
95 $contamination.sensitive
96 ]]></token>
97
98 <xml name="aai">
99 <param argument="--aai" type="float" value="75" min="0" max="100" label="Average amino acids identity for AAI based genus grouping"/>
100 </xml>
101
102 <xml name="votu">
103 <section name="votu" title="Options vOTU grouping" help="">
104 <conditional name="mode_cond">
105 <param argument="--mode" type="select" label="Clustering mode" >
106 <option value="ANI">ANI</option>
107 <option value="AAI">AAI</option>
108 </param>
109 <when value="ANI">
110 <param argument="--ani" type="float" value="95" min="0" max="100" label="Alignment identity for ANI-based clustering"/>
111 <param argument="--tcov" type="float" value="85" min="0" max="100" label="Alignment coverage for ANI-based clustering"/>
112 </when>
113 <when value="AAI">
114 <param argument="--aai" type="float" value="75" min="0" max="100" label="Average amino acids identity for AAI based genus grouping"/>
115 <param argument="--pcov" type="float" value="80" min="0" max="100" label="Protein-level coverage for AAI based genus grouping"/>
116 <param argument="--share" type="float" value="15" min="0" max="100" label="Minimum shared number of proteins for AAI based genus grouping"/>
117 </when>
118 </conditional>
119 </section>
120 </xml>
121 <token name="@VOTU@"><![CDATA[
122 --mode $votu.mode_cond.mode
123 #if $votu.mode_cond.mode == "AAI"
124 --aai $votu.mode_cond.aai
125 --pcov $votu.mode_cond.pcov
126 --share $votu.mode_cond.share
127 #else if $votu.mode_cond.mode == "ANI"
128 --ani $votu.mode_cond.ani
129 --tcov $votu.mode_cond.tcov
130 #end if
131 ]]></token>
132
133 <xml name="tree">
134 <section name="tree" title="Options for tree building" help="">
135 <param argument="--marker" type="select" multiple="true" label="Markers used to generate tree" help="Using combinations of these markers can improve the accuracy of the tree. But will decrease the number of sequences in the tree. Numbers in parentheses give the percentage of prokaryotic viruses that have the corresponding protein.">
136 <option value="endolysin">endolysin (91)</option>
137 <option value="holin">holin (75)</option>
138 <option value="head">marjor head (77)</option>
139 <option value="portal" selected="true">portal (84) </option>
140 <option value="terl" selected="true">terminase large subunit (92)</option>
141 </param>
142 <param argument="--mcov" type="float" value="50" min="0" max="100" label="Alignment coverage for matching marker genes"/>
143 <param argument="--mpident" type="float" value="25" min="0" max="100" label="Alignment identitiy for matching marker genes"/>
144 </section>
145 </xml>
146 <token name="@TREE@"><![CDATA[
147 --marker
148 #for m in $tree.marker
149 $m
150 #end for
151 --mcov $tree.mcov
152 --mpident $tree.mpident
153 ## constructing the MSA and building the tree
154 ## (the program would use mafft and fasttree)
155 ## can be done more flexibly in Galaxy
156 ## (leavinh this here to ensure it won't be implemented)
157 ## --msa Y
158 ## --tree Y
159 ]]></token>
160
161 <xml name="supp_out" tokens="task">
162 <collection name="@TASK@_supp_out" type="list" label="${tool.name} on ${on_string}: @TASK@ supplement">
163 <discover_datasets pattern="(?P&lt;designation&gt;.+).fa" format="fasta" directory="output/final_prediction/@TASK@_supplementary"/>
164 <discover_datasets pattern="(?P&lt;designation&gt;.+).tsv" format="tabular" directory="output/final_prediction/@TASK@_supplementary"/>
165 <discover_datasets pattern="(?P&lt;designation&gt;.+).tab" format="tabular" directory="output/final_prediction/@TASK@_supplementary"/>
166 <filter>supplements and "@TASK@" in supplements</filter>
167 </collection>
168 </xml>
169
170 <token name="@COMMON_OUTPUT_DOC@"><![CDATA[
171 A tabular dataset with the following columns:
172
173 - Accession: the accession or the name of the input contigs.
174 - Length: the length of input contigs.
175 ]]></token>
176 <token name="@PHAMER_OUTPUT_DOC@"><![CDATA[
177 - Pred: virus or non-virus.
178 - Proportion: the proportion of the proteins that can be aligned to the virus database (from 0 to 1).
179 - PhaMerScore: the prediction score given by the deep learning model.
180 - PhaMerConfidence: the confidence of prediction, determined by both Proportion and PhaMerScore (high-confidence, medium-confidence, low-confidence, lower than reject threshold (according to the --reject parameter, default: 0.1)).
181 For the virus with low-confidence or lower than reject threshold, we recommend you to run the contamination task to check their sequence quality.
182 ]]></token>
183 <token name="@PHAGCN_OUTPUT_DOC@"><![CDATA[
184 - Lineage: the predicted taxonomy lineage (NCBI version) of the contigs. Each rank is separated by the ';'.
185 - PhaGCNScore: the predicted score for each rank in the lineage. Each rank is separated by the ';'.
186 - Genus: whether the contig has a genus level name ('-' means unknown).
187 - GenusCluster: if the Genus is '-', the program will assign a genus-level grouping result: group_idx (idx = 1, 2, 3, ...) or singleton. This can be viewed as genus-level OTUs based on the average shared protein identities between sequences.
188 - Prokaryotic virus (Bacteriophages and Archaeal virus): Y/N
189 ]]></token>
190 <token name="@PHATYP_OUTPUT_DOC@"><![CDATA[
191 - TYPE: virulent or temperate (virus).
192 - PhaTYPScore: the prediction score given by the deep learning model.
193 ]]></token>
194 <token name="@CHERRY_OUTPUT_DOC@"><![CDATA[
195 - Host: the predicted host (NCBI taxonomy) of the contigs. '-' means unknown host.
196 - CHERRYScore: the predicted score from the model.
197 - Method:
198 - CRISPR-based(MAG): CRISPRs alignment results from provided MAG (if any)
199 - CRISPR-based(DB): CRISPRs alignment results from database.
200 - AAI-based: predicting host based on virus-simil
201 - Host_NCBI_lineage
202 - Host_GTDB_lineage
203 ]]></token>
204
205 <token name="@COMMON_INPUT_DOC@"><![CDATA[
206 **Input**
207
208 - Contig sequences in FASTA format
209 - Optionally own predicted protein sequences can be given (by default the tool will use prodigal and diamond blastp for the prediction)
210 ]]></token>
211
212 </macros>