Mercurial > repos > iuc > decontam
view decontam.xml @ 1:e64a5fc0af6e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontam commit 33bc2badb2a878bda5a40587d0b853a32253b8f2
| author | iuc |
|---|---|
| date | Wed, 23 Oct 2024 13:18:52 +0000 |
| parents | 86da5c894956 |
| children | 871214ac722e |
line wrap: on
line source
<tool id="decontam" name="Decontam" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <macros> <import>macros.xml</import> </macros> <expand macro="bio_tools"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ Rscript '$rscript' ]]></command> <configfiles> <configfile name="rscript"><![CDATA[ library(tidyverse) library(phyloseq) library(ggplot2) library(decontam) #if $input_type.select_input == 'phyloseq': ps <- readRDS("$input_type.phyloseq_object") sample_data(ps)\$control <- as.logical(sample_data(ps)[["$input_type.control_metadata"]]) #else ## get OTU table (first column is the OTU/ASV ID) otu <- read_tsv("$input_type.otu") otu2 <- otu %>% tibble::column_to_rownames(colnames(otu)[1]) #use first column as rownames OTU <- otu_table(otu2, taxa_are_rows = FALSE) ## get metadata table must have matching OTU/ASV ID in first column meta <- read_tsv("$input_type.metadata") meta2 <- meta %>% tibble::column_to_rownames(colnames(meta)[1]) #use first column as rownames control_column = as.integer("$input_type.control") - 1 ##remove one index since the dataframe uses the first column as index ## convert 0/1 to bool for the control column and store in control column meta2\$control <- as.logical(meta2[[control_column]]) sampledata <- sample_data(meta2) ps <- phyloseq(OTU, FALSE, sampledata) #end if ## plot library_size_vs_control df <- as.data.frame(sample_data(ps)) # Put sample_data into a ggplot-friendly data.frame df\$LibrarySize <- sample_sums(ps) df <- df[order(df\$LibrarySize),] df\$Index <- seq(nrow(df)) ggplot(data=df, aes(x=Index, y=LibrarySize, color=control)) + geom_point() ggsave("$library_size_vs_control", device = "png", width = 10, height = 8, units = "cm") ## plot prevalence contamdf.prev <- isContaminant(ps, method="prevalence", neg="control", threshold=$threshold) table(contamdf.prev\$contaminant) ps.pa <- transform_sample_counts(ps, function(abund) 1*(abund>0)) ps.pa.neg <- prune_samples(sample_data(ps.pa)\$control == TRUE, ps.pa) ps.pa.pos <- prune_samples(sample_data(ps.pa)\$control == FALSE, ps.pa) ## Make data.frame of prevalence in positive and negative samples df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg), contaminant=contamdf.prev\$contaminant) ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() + xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)") ggsave("$prevalence", device = "png", width = 10, height = 8, units = "cm") ## remove contamination features from original data #if $input_type.select_input == 'phyloseq': id_name <- "SampleID" #else id_name <- colnames(otu)[1] ## we use the same name for the ID column as the OTU input #end if ps.noncontam <- prune_taxa(!contamdf.prev\$contaminant, ps) otu_table(ps.noncontam) %>% as.data.frame() %>% rownames_to_column(id_name) -> otu write.table(otu, file="$decontam_otu", sep = "\t", row.names=FALSE, quote = FALSE) saveRDS(ps.noncontam, "$decontam_phyloseq") ]]></configfile> </configfiles> <inputs> <conditional name="input_type"> <param name="select_input" type="select" label="Phyloseq or Feature table input" help="This tool can work with phyloseq objects or feature table inputs."> <option value="phyloseq">Phyloseq</option> <option value="feature_table">Feature table</option> </param> <when value="phyloseq"> <param name="phyloseq_object" type="data" format="phyloseq" label="Phyloseq object"/> <param name="control_metadata" type="text" label="Control column" help="Column in the phyloseq metadata specifying weather a sample is a negative control (0 for normal samples / 1 for control)"/> </when> <when value="feature_table"> <param name="otu" type="data" format="tabular" label="Feature table" help="OTU/ASV or other feature table. The first column must have corresponding IDs to the metadata table."/> <param name="metadata" type="data" format="tabular" label="Metadata" help="Metadata that contains a column specifying weather a samples is a negativ control (0 for normal samples / 1 for control). The first column must have corresponding IDs to the feature table."/> <param name="control" type="data_column" data_ref="metadata" use_header_names="true" multiple="false" optional="false" label="Control column" help="Column specifying weather a sample is a negative control (0 for normal samples / 1 for control)."/> </when> </conditional> <param name="threshold" type="float" label="Threshold to detect a contaminant" value="0.1" min="0" max="1" help="Probability of the feature to be a decontaminant in the statistical test being performed." /> </inputs> <outputs> <data name="library_size_vs_control" format="png" label="${tool.name} on ${on_string}: Library Size vs Control Plot"/> <data name="prevalence" format="png" label="${tool.name} on ${on_string}: Prevalence Plot"/> <data name="decontam_otu" format="tabular" label="${tool.name} on ${on_string}: Removed Contaminants - Feature Table"/> <data name="decontam_phyloseq" format="phyloseq" label="${tool.name} on ${on_string}: Removed Contaminants - Phyloseq Object"/> </outputs> <tests> <test> <conditional name="input_type"> <param name="select_input" value="phyloseq"/> <param name="phyloseq_object" value="phyloseq_input.rds"/> <param name="control_metadata" value="control"/> </conditional> <param name="threshold" value="0.5"/> <output name="decontam_phyloseq" file="phyloseq_output.rds" ftype="phyloseq"/> <output name="decontam_otu" file="otu_output.tsv" ftype="tabular"/> <output name="prevalence" file="Prevalence_Plot.png" ftype="png"/> <output name="library_size_vs_control" file="Library_Size_vs_Control_Plot.png" ftype="png"/> </test> <test> <conditional name="input_type"> <param name="select_input" value="feature_table"/> <param name="otu" value="otu_input.tsv"/> <param name="metadata" value="metadata_input.tsv"/> <!-- using the index of the column --> <param name="control" value="8"/> </conditional> <param name="threshold" value="0.5"/> <output name="decontam_phyloseq" file="phyloseq_output2.rds" ftype="phyloseq"/> <output name="decontam_otu" file="otu_output.tsv" ftype="tabular"/> <output name="prevalence" file="Prevalence_Plot.png" ftype="png"/> <output name="library_size_vs_control" file="Library_Size_vs_Control_Plot.png" ftype="png"/> </test> </tests> <help><![CDATA[ Simple Statistical Identification of Contaminating Sequence Features in Marker-Gene or Metagenomics Data ======================================================================================================== This tool identifies contaminating sequence features in marker-gene or metagenomics datasets. It can be applied to any type of feature derived from environmental sequencing data (e.g., ASVs, OTUs, taxonomic groups, MAGs, etc.). The method requires either DNA quantitation data or sequenced negative control samples. .. note:: Currently, only the negative control sample method is implemented in this wrapper. **Output** - If a phyloseq object is provided as input, the output will be a phyloseq object pruned to include only non-contaminant features. - If only the feature table or metadata is provided, the output will be a pruned phyloseq object containing only non-contaminant features, without the TAX table. The output feature table will also be pruned to include only non-contaminant features. **Threshold** The default threshold for identifying a contaminant is a probability of 0.1 in the statistical test being performed. In the prevalence test, a special threshold value of 0.5 is notable: this will identify as contaminants any sequences that are more prevalent in negative controls than in positive samples. ]]> </help> <expand macro="citations"/> </tool>
