Mercurial > repos > iuc > ampvis2_load
comparison load.xml @ 0:301ee8d3a0f4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ampvis2 commit 9ed0c3078be166bd22136771f517ae91a5198ecf
| author | iuc |
|---|---|
| date | Fri, 16 Aug 2024 08:49:16 +0000 |
| parents | |
| children | 07e7ec7ab1ac |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:301ee8d3a0f4 |
|---|---|
| 1 <tool id="ampvis2_load" name="ampvis2 load" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
| 2 <description></description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="header"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 #if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2") | |
| 9 ln -s '$otutable' otutable.biom && | |
| 10 #else if not $otutable.is_of_type("phyloseq") | |
| 11 ## asv/otu column can not be specified so set the needed name | |
| 12 ## if empty https://github.com/KasperSkytte/ampvis2/issues/166 | |
| 13 ## also done in taxonomy.tsv | |
| 14 #if $asv_otu_col_empty | |
| 15 sed -e '1 s/^\t/ASV\t/' '$otutable' > otutable.tsv && | |
| 16 #else | |
| 17 ln -s '$otutable' otutable.tsv && | |
| 18 #end if | |
| 19 #end if | |
| 20 #if $taxonomy | |
| 21 #if $asv_otu_col_empty | |
| 22 sed -e '1 s/^\t/ASV\t/' '$taxonomy' > taxonomy.tsv && | |
| 23 #else | |
| 24 ln -s '$taxonomy' taxonomy.tsv && | |
| 25 #end if | |
| 26 #end if | |
| 27 Rscript '$rscript' | |
| 28 ]]></command> | |
| 29 <configfiles> | |
| 30 <configfile name="rscript"><![CDATA[ | |
| 31 library(ampvis2, quietly = TRUE) | |
| 32 library(readr, quietly = TRUE) | |
| 33 ## 'manually' load metadata treating all columns as character | |
| 34 ## giving colClasses to amp_load seems not possible | |
| 35 ## - check.names=F: leave empty column names empty .. fixed below | |
| 36 #if $metadata | |
| 37 metadata <- read.table("$metadata", header = TRUE, sep = "\t", colClasses = "character", check.names=F) | |
| 38 ## we do not require the metadata to have a 1st column named "SampleID", | |
| 39 ## but it should not be empty | |
| 40 if(colnames(metadata)[1] == ""){ | |
| 41 colnames(metadata)[1] <- "SampleID" | |
| 42 } | |
| 43 if(exists("SampleID", where = metadata)){ | |
| 44 rownames(metadata) <- metadata[["SampleID"]] | |
| 45 }else{ | |
| 46 rownames(metadata) <- metadata[[1]] | |
| 47 } | |
| 48 #end if | |
| 49 | |
| 50 #if $otutable.is_of_type("phyloseq") | |
| 51 otutable <- readRDS("$otutable") | |
| 52 print(class(otutable)) | |
| 53 #end if | |
| 54 data <- amp_load( | |
| 55 #if $otutable.is_of_type("phyloseq") | |
| 56 otutable = otutable, | |
| 57 #else if $otutable.is_of_type("biom1") or $otutable.is_of_type("biom2") | |
| 58 otutable = "otutable.biom", | |
| 59 #else | |
| 60 otutable = "otutable.tsv", | |
| 61 #end if | |
| 62 #if $metadata | |
| 63 metadata = metadata, | |
| 64 #end if | |
| 65 #if $taxonomy | |
| 66 taxonomy = "taxonomy.tsv", | |
| 67 #end if | |
| 68 #if $fasta | |
| 69 fasta = "$fasta", | |
| 70 #end if | |
| 71 #if $tree | |
| 72 tree = "$tree", | |
| 73 #end if | |
| 74 pruneSingletons = $pruneSingletons | |
| 75 ) | |
| 76 | |
| 77 #if $asv_sequences | |
| 78 library(ape, quietly = TRUE) | |
| 79 | |
| 80 seq <- as.DNAbin(strsplit(rownames(data\$abund), "")) | |
| 81 names(seq) <- paste0("ASV", seq_along(seq)) | |
| 82 data\$refseq <- seq | |
| 83 data <- matchOTUs(data, seq) | |
| 84 #end if | |
| 85 | |
| 86 ## try to guess column types with plyr::type.convert | |
| 87 #if $guess_column_types | |
| 88 data\$metadata <- readr::type_convert(data\$metadata, guess_integer=TRUE) | |
| 89 #end if | |
| 90 | |
| 91 saveRDS(data, "$ampvis") | |
| 92 ## write metadata list for biom input or if metadata is given | |
| 93 #if "metadata" in $write_lists | |
| 94 @SAVE_METADATA_LIST@ | |
| 95 #end if | |
| 96 | |
| 97 #if "tax" in $write_lists | |
| 98 @SAVE_TAX_LIST@ | |
| 99 #end if | |
| 100 ## print overview of the data to stdout | |
| 101 data | |
| 102 ]]></configfile> | |
| 103 </configfiles> | |
| 104 <inputs> | |
| 105 <param argument="otutable" type="data" format="phyloseq,dada2_sequencetable,tabular,biom1,biom2" label="OTU table"/> | |
| 106 <param name="asv_otu_col_empty" type="boolean" checked="false" label="OTU/ASV column has empty header" help="By default ampvis2 expects a column named ASV or OTU containing the ASV or OTU identifiers. By checking this a column with an empty header will be used (as produced by dada2)."/> | |
| 107 <param name="asv_sequences" type="boolean" checked="false" label="ASV identifiers are the ASV sequences" help="By checking this the identifiers will be renamed to ASV1, ASV2, etc and the sequences will be stored in the ampvis2 object." /> | |
| 108 <param argument="metadata" type="data" format="tabular,tsv" optional="true" label="Sample metadata"> | |
| 109 <validator type="expression" message="Table must have at least 1 column"><![CDATA[value.metadata.columns > 0]]></validator> | |
| 110 </param> | |
| 111 <param name="guess_column_types" type="boolean" checked="true" label="Guess metadata column types" help="See help"/> | |
| 112 <param argument="taxonomy" type="data" format="tabular" optional="true" label="Taxonomy table"/> | |
| 113 <param argument="fasta" type="data" format="fasta" optional="true" label="Fasta file"/> | |
| 114 <param argument="tree" type="data" format="newick" optional="true" label="Phylogenetic tree"/> | |
| 115 <param argument="pruneSingletons" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Remove singleton OTUs"/> | |
| 116 <param name="write_lists" type="select" optional="true" multiple="true" label="Output list data sets" help="Needed by most downstream tools. Select if the inputs contain taxonomic / metadata information."> | |
| 117 <option value="tax" selected="true">Taxonomy list</option> | |
| 118 <option value="metadata" selected="true">Metadata list</option> | |
| 119 </param> | |
| 120 </inputs> | |
| 121 <outputs> | |
| 122 <data name="ampvis" format="ampvis2"/> | |
| 123 <data name="metadata_list_out" format="tabular" label="${tool.name} on ${on_string}: metadata list"> | |
| 124 <filter>write_lists and "metadata" in write_lists</filter> | |
| 125 </data> | |
| 126 <data name="taxonomy_list_out" format="tabular" label="${tool.name} on ${on_string}: taxonomy list"> | |
| 127 <filter>write_lists and "tax" in write_lists</filter> | |
| 128 </data> | |
| 129 </outputs> | |
| 130 <tests> | |
| 131 <!-- load otu table + metadata + taxonomy --> | |
| 132 <test expect_num_outputs="3"> | |
| 133 <param name="otutable" value="AalborgWWTPs.otu.csv"/> | |
| 134 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/> | |
| 135 <param name="taxonomy" value="AalborgWWTPs.tax"/> | |
| 136 <output name="ampvis" value="AalborgWWTPs.rds" ftype="ampvis2" compare="sim_size"/> | |
| 137 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/> | |
| 138 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/> | |
| 139 <assert_stdout> | |
| 140 <has_text text="ampvis2 object with 3 elements."/> | |
| 141 <has_text text="575.79"/> | |
| 142 <has_text text="SampleID, Plant, Date, Year, Period"/> | |
| 143 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/> | |
| 144 </assert_stdout> | |
| 145 </test> | |
| 146 <!-- load otu table + metadata + taxonomy + tree + fasta --> | |
| 147 <test expect_num_outputs="3"> | |
| 148 <param name="otutable" value="AalborgWWTPs.otu.csv"/> | |
| 149 <param name="metadata" value="AalborgWWTPs.tsv" ftype="tsv"/> | |
| 150 <param name="taxonomy" value="AalborgWWTPs.tax"/> | |
| 151 <param name="fasta" value="AalborgWWTPs.fa" ftype="fasta"/> | |
| 152 <param name="tree" value="AalborgWWTPs.nwk" ftype="newick"/> | |
| 153 <output name="ampvis" value="AalborgWWTPs-complete.rds" ftype="ampvis2" compare="sim_size"/> | |
| 154 <output name="metadata_list_out" value="AalborgWWTPs-metadata.list"/> | |
| 155 <output name="taxonomy_list_out" value="AalborgWWTPs-taxonomy.list"/> | |
| 156 <assert_stdout> | |
| 157 <has_text text="ampvis2 object with 5 elements."/> | |
| 158 <has_text text="575.79"/> | |
| 159 <has_text text="SampleID, Plant, Date, Year, Period"/> | |
| 160 <has_text text="200(100%) 194(97%) 177(88.5%) 170(85%) 152(76%) 113(56.5%) 2(1%)"/> | |
| 161 </assert_stdout> | |
| 162 </test> | |
| 163 <!-- test biom 1/2 input (taken from https://github.com/biocore/biom-format/tree/master/examples) | |
| 164 metadata seems not to be loaded from a biom file https://github.com/MadsAlbertsen/ampvis2/issues/129 | |
| 165 taxonomy is loaded from all but 1 | |
| 166 --> | |
| 167 <test expect_num_outputs="1"> | |
| 168 <param name="otutable" value="rich-dense.biom" ftype="biom1"/> | |
| 169 <param name="write_lists" value=""/> | |
| 170 <output name="ampvis" ftype="ampvis2"> | |
| 171 <assert_contents> | |
| 172 <has_size value="748"/> | |
| 173 </assert_contents> | |
| 174 </output> | |
| 175 <assert_stdout> | |
| 176 <has_text text="ampvis2 object with 3 elements."/> | |
| 177 <has_text text="4.5"/> | |
| 178 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/> | |
| 179 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> | |
| 180 </assert_stdout> | |
| 181 </test> | |
| 182 <test expect_num_outputs="1"> | |
| 183 <param name="otutable" value="rich-sparse.biom" ftype="biom1"/> | |
| 184 <param name="write_lists" value=""/> | |
| 185 <output name="ampvis" ftype="ampvis2"> | |
| 186 <assert_contents> | |
| 187 <has_size value="751"/> | |
| 188 </assert_contents> | |
| 189 </output> | |
| 190 <assert_stdout> | |
| 191 <has_text text="ampvis2 object with 3 elements."/> | |
| 192 <has_text text="4.5"/> | |
| 193 <has_text text="SampleID, BarcodeSequence, LinkerPrimerSequence, BODY_SITE, Description"/> | |
| 194 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> | |
| 195 </assert_stdout> | |
| 196 </test> | |
| 197 <!-- input file seems to miss metadata check that no metadata & taxonomy is loaded (ampvis2 adds dummy metadata) --> | |
| 198 <test expect_num_outputs="1"> | |
| 199 <param name="otutable" value="min_sparse_otu_table_hdf5.biom" ftype="biom2"/> | |
| 200 <output name="ampvis" ftype="ampvis2"> | |
| 201 <assert_contents> | |
| 202 <has_size value="395"/> | |
| 203 </assert_contents> | |
| 204 </output> | |
| 205 <param name="write_lists" value=""/> | |
| 206 <assert_stdout> | |
| 207 <has_text text="ampvis2 object with 3 elements."/> | |
| 208 <has_text text="4.5"/> | |
| 209 <has_text text="SampleID, DummyVariable"/> | |
| 210 <has_text text="0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%) 0(0%)"/> | |
| 211 </assert_stdout> | |
| 212 </test> | |
| 213 <test expect_num_outputs="1"> | |
| 214 <param name="otutable" value="rich_sparse_otu_table_hdf5.biom" ftype="biom2"/> | |
| 215 <output name="ampvis" ftype="ampvis2"> | |
| 216 <assert_contents> | |
| 217 <has_size value="753"/> | |
| 218 </assert_contents> | |
| 219 </output> | |
| 220 <param name="write_lists" value=""/> | |
| 221 <assert_stdout> | |
| 222 <has_text text="ampvis2 object with 3 elements."/> | |
| 223 <has_text text="4.5"/> | |
| 224 <has_text text="SampleID, BODY_SITE, BarcodeSequence, Description, LinkerPrimerSequence"/> | |
| 225 <has_text text="5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 5(100%) 1(20%)"/> | |
| 226 </assert_stdout> | |
| 227 </test> | |
| 228 <!-- load dada2 ASV table + metadata + taxonomy --> | |
| 229 <test expect_num_outputs="3"> | |
| 230 <param name="otutable" value="dada2-removeBimeraDenovo.tab" ftype="dada2_sequencetable"/> | |
| 231 <param name="metadata" value="dada2-metadata.tsv" ftype="tsv"/> | |
| 232 <param name="taxonomy" value="dada2-assignTaxonomy.tabular"/> | |
| 233 <param name="asv_otu_col_empty" value="true"/> | |
| 234 <param name="asv_sequences" value="true"/> | |
| 235 <output name="ampvis" ftype="ampvis2"> | |
| 236 <assert_contents> | |
| 237 <has_size min="100"/> | |
| 238 </assert_contents> | |
| 239 </output> | |
| 240 <output name="metadata_list_out"> | |
| 241 <assert_contents> | |
| 242 <has_n_lines n="23"/> | |
| 243 <has_n_columns n="4"/> | |
| 244 <has_text text="Sample"/> | |
| 245 </assert_contents> | |
| 246 </output> | |
| 247 <output name="taxonomy_list_out"> | |
| 248 <assert_contents> | |
| 249 <has_n_lines n="370"/> | |
| 250 <has_n_columns n="2"/> | |
| 251 <has_line line="Bacteria	Kingdom"/> | |
| 252 </assert_contents> | |
| 253 </output> | |
| 254 <assert_stdout> | |
| 255 <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 --> | |
| 256 <has_text text="6212.45"/> | |
| 257 <has_text text="Sample, time"/> | |
| 258 <has_text text="232(100%) 232(100%) 232(100%) 231(99.57%) 209(90.09%) 127(54.74%)"/> | |
| 259 </assert_stdout> | |
| 260 </test> | |
| 261 <!-- load data from phyloseq --> | |
| 262 <test expect_num_outputs="3"> | |
| 263 <param name="otutable" value="output.phyloseq" ftype="phyloseq"/> | |
| 264 <output name="ampvis" ftype="ampvis2"> | |
| 265 <assert_contents> | |
| 266 <has_size min="100"/> | |
| 267 </assert_contents> | |
| 268 </output> | |
| 269 <output name="metadata_list_out"> | |
| 270 <assert_contents> | |
| 271 <has_n_lines n="6"/> | |
| 272 <has_n_columns n="4"/> | |
| 273 <has_text text="SampleID"/> | |
| 274 </assert_contents> | |
| 275 </output> | |
| 276 <output name="taxonomy_list_out"> | |
| 277 <assert_contents> | |
| 278 <has_n_lines n="147"/> | |
| 279 <has_n_columns n="2"/> | |
| 280 <has_line line="Bacteria	Kingdom"/> | |
| 281 </assert_contents> | |
| 282 </output> | |
| 283 <assert_stdout> | |
| 284 <has_text text="ampvis2 object with 4 elements."/> <!-- this also has fasta, i.e. 4 --> | |
| 285 <has_text text="SampleID, Property, Number"/> | |
| 286 <has_text text="64(100%) 64(100%) 64(100%) 64(100%) 62(96.88%) 56(87.5%) 0(0%)"/> | |
| 287 </assert_stdout> | |
| 288 </test> | |
| 289 </tests> | |
| 290 <help><![CDATA[ | |
| 291 | |
| 292 What it does | |
| 293 ============ | |
| 294 | |
| 295 This tool reads an OTU or ASV table and corresponding sample metadata, and returns | |
| 296 a RDS data set for use in all ampvis2 tools. It is therefore required to load | |
| 297 data with this tool before any other ampvis2 tools can be used. | |
| 298 | |
| 299 The Galaxy tool calls the `amp_load <https://kasperskytte.github.io/ampvis2/reference/amp_load.html>`_ | |
| 300 function of the ampvis2 package. This function validates and corrects the | |
| 301 provided data frames in different ways to make it suitable for the rest of the | |
| 302 ampvis2 tools. It is important that the provided data sets match the | |
| 303 requirements as described in the following to work properly. | |
| 304 | |
| 305 Input | |
| 306 ===== | |
| 307 | |
| 308 **The OTU-table** | |
| 309 | |
| 310 contains information about the OTU/ASVs, their read counts in each sample, and | |
| 311 optionally their assigned taxonomy. The OTU table can be given as | |
| 312 | |
| 313 - Tabular data set | |
| 314 - BIOM version (1 and 2) | |
| 315 | |
| 316 Metadata and taxonomy in the tabular or BIOM files that are given via the | |
| 317 ``OTU table`` parameter can is overwritten if by data presented via the | |
| 318 ``Sample metadata`` or ``Taxonomy table`` parameters. | |
| 319 | |
| 320 If given in tabular format the provided OTU-table must be a table with the | |
| 321 following requirements: | |
| 322 | |
| 323 - The rows are OTU IDs and the columns are samples. | |
| 324 - The OTU IDs are by default expected to be in a column called "OTU", "ASV", or "#OTU ID". | |
| 325 For data using an empty header for the OTU/ASV colum enable the option *OTU/ASV column has empty header* | |
| 326 (this allows to process data as produced e.g. by dada2). | |
| 327 - The column names of the table are the sample IDs, exactly matching those in | |
| 328 the metadata | |
| 329 - The last 7 columns are optionally the corresponding taxonomy assigned to the | |
| 330 OTUs, named "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species". | |
| 331 | |
| 332 If the ASV IDs are actually the ASV Sequences then enabling | |
| 333 *ASV identifiers are the ASV sequences* will rename the identifiers to ASV1, ASV2,... | |
| 334 (and save the sequences in the ampvis2 object). | |
| 335 | |
| 336 Generally avoid special characters and spaces in row- and column names. | |
| 337 | |
| 338 The OTU table can also contain the taxonomic information in additional columns: | |
| 339 Kingdom, Phylum, Class, Order, Family, Genus. | |
| 340 | |
| 341 Check `here <https://biom-format.org/>`_ for information on the BIOM formats. | |
| 342 | |
| 343 **The metadata** | |
| 344 | |
| 345 contains additional information about the samples, for example where each sample | |
| 346 was taken, date, pH, treatment etc, which is used to compare and group the | |
| 347 samples during analysis. The amount of information in the metadata is unlimited, | |
| 348 it can contain any number of columns (variables), however there are a few | |
| 349 requirements: | |
| 350 | |
| 351 - The sample IDs must be in the first column. The sample IDs must match exactly | |
| 352 to those in the OTU-table. Any unmatched samples between the otutable and | |
| 353 metadata will be removed with a warning. | |
| 354 - Generally avoid special characters and spaces in row- and column names. | |
| 355 | |
| 356 By default the data types of metadata columns are guessed with | |
| 357 ``readr::type_convert``. The guessed column types can be seen in the last (4th) | |
| 358 column of the ``metadata list`` output and also stdout of the tool. Guessing of | |
| 359 data types can be disabled using the parameter ``Guess metadata column types``. | |
| 360 If disabled matadata from separate tabular input is treated as character data, | |
| 361 and if loaded from biom files that data is used as is. Metadata types can be set | |
| 362 manually using the tool ``ampvis2: set metadata`` | |
| 363 | |
| 364 Dates should be given in the format ``YYYY-MM-DD`` (Y: year, M: month, D: day). | |
| 365 | |
| 366 In addition to the RDS data set a metadata (resp. taxonomy) list data set is returned | |
| 367 if metadata (resp. taxonomic information) is given to this tool. It contains | |
| 368 restructured metadata (taxonomic information) that is used in downstream ampvis2 | |
| 369 Galaxy tools in order to select metadata / metadata values (resp. taxonomic levels). | |
| 370 | |
| 371 **Taxonomy** | |
| 372 | |
| 373 is a tabular data set with 7 columns and one row per ASV/OTU: | |
| 374 | |
| 375 - the 1st column is identical to the 1st column of the OTU table parameter | |
| 376 - the remaining columns contain data for Kingdom, Phylum, Class, Order, Family, Genus | |
| 377 | |
| 378 Note that the taxonomic information can also be embedded in the OTU table. | |
| 379 | |
| 380 **Tree** | |
| 381 | |
| 382 a tree with branch lengths in Newick format. | |
| 383 | |
| 384 This is needed / usefull only if the data is used as input of: ``ampvis2: | |
| 385 ordination plot`` for ordination methods NNDS / MMDS with (un)weighted UniFrac | |
| 386 distances. Note that the loaded tree is also filtered by the ``ampvis2: subset | |
| 387 ...`` tools. | |
| 388 | |
| 389 **Fasta** | |
| 390 | |
| 391 a fasta file containing the sequences of the OTUs. Note that this information is | |
| 392 only used in ``ampvis2: export fasta``. If the OTU table is modified by | |
| 393 ``ampvis2: mergereplicates`` or the ``ampvis2: subset ...`` tools this might be | |
| 394 useful to obtain a filtered list of sequences. | |
| 395 | |
| 396 | |
| 397 Output | |
| 398 ====== | |
| 399 | |
| 400 **RDS** | |
| 401 | |
| 402 The main output of the tool is an RDS data set that contains the R representation of | |
| 403 the ampvis2 object containing the provided data (OTU table, metadata, taxonomy, | |
| 404 phylogenetic tree, and fasta). | |
| 405 | |
| 406 **List files** | |
| 407 | |
| 408 Summarize the metadata and taxonomy information: | |
| 409 | |
| 410 - the taxonomy list file lists all taxa in a 1 column tabular data set | |
| 411 - the metadata list file lists the Metadata variables (column 1), and the corresponding | |
| 412 available metadata values (column 2), if the variable is the SampleID (column 3), and | |
| 413 the data type of the corresponding metadata variable (column 4) | |
| 414 | |
| 415 These files are auxilliary files that are needed in downstream ``ampvis2`` Galaxy tools | |
| 416 to allow selecting metadata and taxonomy. They are not passed to the underlying R functions. | |
| 417 | |
| 418 Note that, if the no taxonomy (or metadata) is given then the underlying ``ampvis2`` R | |
| 419 function adds dummy taxonomy (resp. metadata). In this case the output of the list datasets | |
| 420 can be disabled with the ``Output list data sets`` parameter. | |
| 421 ]]></help> | |
| 422 <expand macro="citations"/> | |
| 423 </tool> |
