comparison load.xml @ 0:301ee8d3a0f4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ampvis2 commit 9ed0c3078be166bd22136771f517ae91a5198ecf
author iuc
date Fri, 16 Aug 2024 08:49:16 +0000
parents
children 07e7ec7ab1ac
comparison
equal deleted inserted replaced
-1:000000000000 0:301ee8d3a0f4
1 <tool id="ampvis2_load" name="ampvis2 load" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="header"/>
7 <command detect_errors="exit_code"><![CDATA[
8 #if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2")
9 ln -s '$otutable' otutable.biom &&
10 #else if not $otutable.is_of_type("phyloseq")
11 ## asv/otu column can not be specified so set the needed name
12 ## if empty https://github.com/KasperSkytte/ampvis2/issues/166
13 ## also done in taxonomy.tsv
14 #if $asv_otu_col_empty
15 sed -e '1 s/^\t/ASV\t/' '$otutable' > otutable.tsv &&
16 #else
17 ln -s '$otutable' otutable.tsv &&
18 #end if
19 #end if
20 #if $taxonomy
21 #if $asv_otu_col_empty
22 sed -e '1 s/^\t/ASV\t/' '$taxonomy' > taxonomy.tsv &&
23 #else
24 ln -s '$taxonomy' taxonomy.tsv &&
25 #end if
26 #end if
27 Rscript '$rscript'
28 ]]></command>
29 <configfiles>
30 <configfile name="rscript"><![CDATA[
31 library(ampvis2, quietly = TRUE)
32 library(readr, quietly = TRUE)
33 ## 'manually' load metadata treating all columns as character
34 ## giving colClasses to amp_load seems not possible
35 ## - check.names=F: leave empty column names empty .. fixed below
36 #if $metadata
37 metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character", check.names=F)
38 ## we do not require the metadata to have a 1st column named "SampleID",
39 ## but it should not be empty
40 if(colnames(metadata)[1] == ""){
41 colnames(metadata)[1] <- "SampleID"
42 }
43 if(exists("SampleID", where = metadata)){
44 rownames(metadata) <- metadata[["SampleID"]]
45 }else{
46 rownames(metadata) <- metadata[[1]]
47 }
48 #end if
49
50 #if $otutable.is_of_type("phyloseq")
51 otutable <- readRDS("$otutable")
52 print(class(otutable))
53 #end if
54 data <- amp_load(
55 #if $otutable.is_of_type("phyloseq")
56 otutable = otutable,
57 #else if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2")
58 otutable = "otutable.biom",
59 #else
60 otutable = "otutable.tsv",
61 #end if
62 #if $metadata
63 metadata = metadata,
64 #end if
65 #if $taxonomy
66 taxonomy = "taxonomy.tsv",
67 #end if
68 #if $fasta
69 fasta = "$fasta",
70 #end if
71 #if $tree
72 tree = "$tree",
73 #end if
74 pruneSingletons = $pruneSingletons
75 )
76
77 #if $asv_sequences
78 library(ape, quietly = TRUE)
79
80 seq <- as.DNAbin(strsplit(rownames(data\$abund), ""))
81 names(seq) <- paste0("ASV", seq_along(seq))
82 data\$refseq <- seq
83 data <- matchOTUs(data, seq)
84 #end if
85
86 ## try to guess column types with plyr::type.convert
87 #if $guess_column_types
88 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE)
89 #end if
90
91 saveRDS(data, "$ampvis")
92 ## write metadata list for biom input or if metadata is given
93 #if "metadata" in $write_lists
94 @SAVE_METADATA_LIST@
95 #end if
96
97 #if "tax" in $write_lists
98 @SAVE_TAX_LIST@
99 #end if
100 ## print overview of the data to stdout
101 data
102 ]]></configfile>
103 </configfiles>
104 <inputs>
105 <param argument="otutable" type="data" format="phyloseq,dada2_sequencetable,tabular,biom1,biom2" label="OTU table"/>
106 <param name="asv_otu_col_empty" type="boolean" checked="false" label="OTU/ASV column has empty header" help="By default ampvis2 expects a column named ASV or OTU containing the ASV or OTU identifiers. By checking this a column with an empty header will be used (as produced by dada2)."/>
107 <param name="asv_sequences" type="boolean" checked="false" label="ASV identifiers are the ASV sequences" help="By checking this the identifiers will be renamed to ASV1, ASV2, etc and the sequences will be stored in the ampvis2 object." />
108 <param argument="metadata" type="data" format="tabular,tsv" optional="true" label="Sample metadata">
109 <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator>
110 </param>
111 <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/>
112 <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/>
113 <param argument="fasta" type="data" format="fasta" optional="true" label="Fasta file"/>
114 <param argument="tree" type="data" format="newick" optional="true" label="Phylogenetic tree"/>
115 <param argument="pruneSingletons" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove singleton OTUs"/>
116 <param name="write_lists" type="select" optional="true" multiple="true" label="Output list data sets" help="Needed by most downstream tools. Select if the inputs contain taxonomic / metadata information.">
117 <option value="tax" selected="true">Taxonomy list</option>
118 <option value="metadata" selected="true">Metadata list</option>
119 </param>
120 </inputs>
121 <outputs>
122 <data name="ampvis" format="ampvis2"/>
123 <data name="metadata_list_out" format="tabular" label="${tool.name} on ${on_string}: metadata list">
124 <filter>write_lists and "metadata" in write_lists</filter>
125 </data>
126 <data name="taxonomy_list_out" format="tabular" label="${tool.name} on ${on_string}: taxonomy list">
127 <filter>write_lists and "tax" in write_lists</filter>
128 </data>
129 </outputs>
130 <tests>
131 <!-- load otu table + metadata + taxonomy -->
132 <test expect_num_outputs="3">
133 <param name="otutable" value="AalborgWWTPs.otu.csv"/>
134 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/>
135 <param name="taxonomy" value="AalborgWWTPs.tax"/>
136 <output name="ampvis" value="AalborgWWTPs.rds" ftype="ampvis2" compare="sim_size"/>
137 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
138 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
139 <assert_stdout>
140 <has_text text="ampvis2 object with 3 elements."/>
141 <has_text text="575.79"/>
142 <has_text text="SampleID, Plant, Date, Year, Period"/>
143 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/>
144 </assert_stdout>
145 </test>
146 <!-- load otu table + metadata + taxonomy + tree + fasta -->
147 <test expect_num_outputs="3">
148 <param name="otutable" value="AalborgWWTPs.otu.csv"/>
149 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/>
150 <param name="taxonomy" value="AalborgWWTPs.tax"/>
151 <param name="fasta" value="AalborgWWTPs.fa" ftype="fasta"/>
152 <param name="tree" value="AalborgWWTPs.nwk" ftype="newick"/>
153 <output name="ampvis" value="AalborgWWTPs-complete.rds" ftype="ampvis2" compare="sim_size"/>
154 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/>
155 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/>
156 <assert_stdout>
157 <has_text text="ampvis2 object with 5 elements."/>
158 <has_text text="575.79"/>
159 <has_text text="SampleID, Plant, Date, Year, Period"/>
160 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/>
161 </assert_stdout>
162 </test>
163 <!-- test biom 1/2 input (taken from https://github.com/biocore/biom-format/tree/master/examples)
164 metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129
165 taxonomy is loaded from all but 1
166 -->
167 <test expect_num_outputs="1">
168 <param name="otutable" value="rich-dense.biom" ftype="biom1"/>
169 <param name="write_lists" value=""/>
170 <output name="ampvis" ftype="ampvis2">
171 <assert_contents>
172 <has_size value="748"/>
173 </assert_contents>
174 </output>
175 <assert_stdout>
176 <has_text text="ampvis2 object with 3 elements."/>
177 <has_text text="4.5"/>
178 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
179 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
180 </assert_stdout>
181 </test>
182 <test expect_num_outputs="1">
183 <param name="otutable" value="rich-sparse.biom" ftype="biom1"/>
184 <param name="write_lists" value=""/>
185 <output name="ampvis" ftype="ampvis2">
186 <assert_contents>
187 <has_size value="751"/>
188 </assert_contents>
189 </output>
190 <assert_stdout>
191 <has_text text="ampvis2 object with 3 elements."/>
192 <has_text text="4.5"/>
193 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/>
194 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
195 </assert_stdout>
196 </test>
197 <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) -->
198 <test expect_num_outputs="1">
199 <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/>
200 <output name="ampvis" ftype="ampvis2">
201 <assert_contents>
202 <has_size value="395"/>
203 </assert_contents>
204 </output>
205 <param name="write_lists" value=""/>
206 <assert_stdout>
207 <has_text text="ampvis2 object with 3 elements."/>
208 <has_text text="4.5"/>
209 <has_text text="SampleID, DummyVariable"/>
210 <has_text text="0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%)"/>
211 </assert_stdout>
212 </test>
213 <test expect_num_outputs="1">
214 <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/>
215 <output name="ampvis" ftype="ampvis2">
216 <assert_contents>
217 <has_size value="753"/>
218 </assert_contents>
219 </output>
220 <param name="write_lists" value=""/>
221 <assert_stdout>
222 <has_text text="ampvis2 object with 3 elements."/>
223 <has_text text="4.5"/>
224 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/>
225 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/>
226 </assert_stdout>
227 </test>
228 <!-- load dada2 ASV table + metadata + taxonomy -->
229 <test expect_num_outputs="3">
230 <param name="otutable" value="dada2-removeBimeraDenovo.tab" ftype="dada2_sequencetable"/>
231 <param name="metadata" value="dada2-metadata.tsv" ftype="tsv"/>
232 <param name="taxonomy" value="dada2-assignTaxonomy.tabular"/>
233 <param name="asv_otu_col_empty" value="true"/>
234 <param name="asv_sequences" value="true"/>
235 <output name="ampvis" ftype="ampvis2">
236 <assert_contents>
237 <has_size min="100"/>
238 </assert_contents>
239 </output>
240 <output name="metadata_list_out">
241 <assert_contents>
242 <has_n_lines n="23"/>
243 <has_n_columns n="4"/>
244 <has_text text="Sample"/>
245 </assert_contents>
246 </output>
247 <output name="taxonomy_list_out">
248 <assert_contents>
249 <has_n_lines n="370"/>
250 <has_n_columns n="2"/>
251 <has_line line="Bacteria&#009;Kingdom"/>
252 </assert_contents>
253 </output>
254 <assert_stdout>
255 <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 -->
256 <has_text text="6212.45"/>
257 <has_text text="Sample, time"/>
258 <has_text text="232(100%) 232(100%) 232(100%) 231(99.57%) 209(90.09%) 127(54.74%)"/>
259 </assert_stdout>
260 </test>
261 <!-- load data from phyloseq -->
262 <test expect_num_outputs="3">
263 <param name="otutable" value="output.phyloseq" ftype="phyloseq"/>
264 <output name="ampvis" ftype="ampvis2">
265 <assert_contents>
266 <has_size min="100"/>
267 </assert_contents>
268 </output>
269 <output name="metadata_list_out">
270 <assert_contents>
271 <has_n_lines n="6"/>
272 <has_n_columns n="4"/>
273 <has_text text="SampleID"/>
274 </assert_contents>
275 </output>
276 <output name="taxonomy_list_out">
277 <assert_contents>
278 <has_n_lines n="147"/>
279 <has_n_columns n="2"/>
280 <has_line line="Bacteria&#009;Kingdom"/>
281 </assert_contents>
282 </output>
283 <assert_stdout>
284 <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 -->
285 <has_text text="SampleID, Property, Number"/>
286 <has_text text="64(100%) 64(100%) 64(100%) 64(100%) 62(96.88%) 56(87.5%) 0(0%)"/>
287 </assert_stdout>
288 </test>
289 </tests>
290 <help><![CDATA[
291
292 What it does
293 ============
294
295 This tool reads an OTU or ASV table and corresponding sample metadata, and returns
296 a RDS data set for use in all ampvis2 tools. It is therefore required to load
297 data with this tool before any other ampvis2 tools can be used.
298
299 The Galaxy tool calls the `amp_load <https://kasperskytte.github.io/ampvis2/reference/amp_load.html>`_
300 function of the ampvis2 package. This function validates and corrects the
301 provided data frames in different ways to make it suitable for the rest of the
302 ampvis2 tools. It is important that the provided data sets match the
303 requirements as described in the following to work properly.
304
305 Input
306 =====
307
308 **The OTU-table**
309
310 contains information about the OTU/ASVs, their read counts in each sample, and
311 optionally their assigned taxonomy. The OTU table can be given as
312
313 - Tabular data set
314 - BIOM version (1 and 2)
315
316 Metadata and taxonomy in the tabular or BIOM files that are given via the
317 ``OTU table`` parameter can is overwritten if by data presented via the
318 ``Sample metadata`` or ``Taxonomy table`` parameters.
319
320 If given in tabular format the provided OTU-table must be a table with the
321 following requirements:
322
323 - The rows are OTU IDs and the columns are samples.
324 - The OTU IDs are by default expected to be in a column called "OTU", "ASV", or "#OTU ID".
325 For data using an empty header for the OTU/ASV colum enable the option *OTU/ASV column has empty header*
326 (this allows to process data as produced e.g. by dada2).
327 - The column names of the table are the sample IDs, exactly matching those in
328 the metadata
329 - The last 7 columns are optionally the corresponding taxonomy assigned to the
330 OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
331
332 If the ASV IDs are actually the ASV Sequences then enabling
333 *ASV identifiers are the ASV sequences* will rename the identifiers to ASV1, ASV2,...
334 (and save the sequences in the ampvis2 object).
335
336 Generally avoid special characters and spaces in row- and column names.
337
338 The OTU table can also contain the taxonomic information in additional columns:
339 Kingdom, Phylum, Class, Order, Family, Genus.
340
341 Check `here <https://biom-format.org/>`_ for information on the BIOM formats.
342
343 **The metadata**
344
345 contains additional information about the samples, for example where each sample
346 was taken, date, pH, treatment etc, which is used to compare and group the
347 samples during analysis. The amount of information in the metadata is unlimited,
348 it can contain any number of columns (variables), however there are a few
349 requirements:
350
351 - The sample IDs must be in the first column. The sample IDs must match exactly
352 to those in the OTU-table. Any unmatched samples between the otutable and
353 metadata will be removed with a warning.
354 - Generally avoid special characters and spaces in row- and column names.
355
356 By default the data types of metadata columns are guessed with
357 ``readr::type_convert``. The guessed column types can be seen in the last (4th)
358 column of the ``metadata list`` output and also stdout of the tool. Guessing of
359 data types can be disabled using the parameter ``Guess metadata column types``.
360 If disabled matadata from separate tabular input is treated as character data,
361 and if loaded from biom files that data is used as is. Metadata types can be set
362 manually using the tool ``ampvis2: set metadata``
363
364 Dates should be given in the format ``YYYY-MM-DD`` (Y: year, M: month, D: day).
365
366 In addition to the RDS data set a metadata (resp. taxonomy) list data set is returned
367 if metadata (resp. taxonomic information) is given to this tool. It contains
368 restructured metadata (taxonomic information) that is used in downstream ampvis2
369 Galaxy tools in order to select metadata / metadata values (resp. taxonomic levels).
370
371 **Taxonomy**
372
373 is a tabular data set with 7 columns and one row per ASV/OTU:
374
375 - the 1st column is identical to the 1st column of the OTU table parameter
376 - the remaining columns contain data for Kingdom, Phylum, Class, Order, Family, Genus
377
378 Note that the taxonomic information can also be embedded in the OTU table.
379
380 **Tree**
381
382 a tree with branch lengths in Newick format.
383
384 This is needed / usefull only if the data is used as input of: ``ampvis2:
385 ordination plot`` for ordination methods NNDS / MMDS with (un)weighted UniFrac
386 distances. Note that the loaded tree is also filtered by the ``ampvis2: subset
387 ...`` tools.
388
389 **Fasta**
390
391 a fasta file containing the sequences of the OTUs. Note that this information is
392 only used in ``ampvis2: export fasta``. If the OTU table is modified by
393 ``ampvis2: mergereplicates`` or the ``ampvis2: subset ...`` tools this might be
394 useful to obtain a filtered list of sequences.
395
396
397 Output
398 ======
399
400 **RDS**
401
402 The main output of the tool is an RDS data set that contains the R representation of
403 the ampvis2 object containing the provided data (OTU table, metadata, taxonomy,
404 phylogenetic tree, and fasta).
405
406 **List files**
407
408 Summarize the metadata and taxonomy information:
409
410 - the taxonomy list file lists all taxa in a 1 column tabular data set
411 - the metadata list file lists the Metadata variables (column 1), and the corresponding
412 available metadata values (column 2), if the variable is the SampleID (column 3), and
413 the data type of the corresponding metadata variable (column 4)
414
415 These files are auxilliary files that are needed in downstream ``ampvis2`` Galaxy tools
416 to allow selecting metadata and taxonomy. They are not passed to the underlying R functions.
417
418 Note that, if the no taxonomy (or metadata) is given then the underlying ``ampvis2`` R
419 function adds dummy taxonomy (resp. metadata). In this case the output of the list datasets
420 can be disabled with the ``Output list data sets`` parameter.
421 ]]></help>
422 <expand macro="citations"/>
423 </tool>