Mercurial > repos > galaxyp > cardinal_classification
changeset 2:bf0eb536e4e5 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
line wrap: on
line diff
--- a/classification.xml Thu Oct 25 07:18:58 2018 -0400 +++ b/classification.xml Fri Feb 15 10:07:59 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="cardinal_classification" name="MSI classification" version="@VERSION@.1"> +<tool id="cardinal_classification" name="MSI classification" version="@VERSION@.2"> <description>spatial classification of mass spectrometry imaging data</description> <macros> <import>macros.xml</import> @@ -28,12 +28,13 @@ library(lattice) library(ggplot2) -@READING_MSIDATA@ +@READING_MSIDATA_INRAM@ -## create full matrix to make processed imzML files compatible with classification -iData(msidata) <- iData(msidata)[] +## to make sure that processed files work as well: +iData(msidata) = iData(msidata)[] -@DATA_PROPERTIES@ +@DATA_PROPERTIES_INRAM@ + ######################################## PDF ################################### ################################################################################ @@ -60,7 +61,7 @@ grid.table(property_df, rows= NULL) -if (npeaks > 0 && sum(is.na(spectra(msidata)))==0){ +if (npeaks > 0 && sum(is.na(spectra(msidata)[]))==0){ opar <- par() @@ -141,6 +142,10 @@ ## PLS-cvApply: msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) + ## remove msidata to clean up RAM space + rm(msidata) + gc() + ## create table with summary count = 1 summary_plscv = list() @@ -231,9 +236,13 @@ x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] + ## remove msidata to clean up RAM space + rm(msidata) + gc() pls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, pls_classes) colnames(pls_classes2) = c("pixel names", "x", "y","predicted condition") pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels) + pls_toplabels[,4:6] <-round(pls_toplabels[,4:6],6) write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(pls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") @@ -275,6 +284,10 @@ ## OPLS-cvApply: msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv) + ## remove msidata to clean up RAM space + rm(msidata) + gc() + ## create table with summary count = 1 summary_oplscv = list() @@ -369,7 +382,12 @@ opls_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, opls_classes) colnames(opls_classes2) = c("pixel names", "x", "y","predicted condition") + ## remove msidata to clean up RAM space + rm(msidata) + gc() + opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels) + opls_toplabels[,4:6] <-round(opls_toplabels[,4:6],6) write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(opls_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") @@ -410,6 +428,10 @@ ## SSC-cvApply: msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") + ## remove msidata to clean up RAM space + rm(msidata) + gc() + ## create table with summary count = 1 summary_ssccv = list() @@ -502,9 +524,15 @@ pixel_names = gsub(" = ", "y_", pixel_names) x_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,2] y_coordinates = matrix(unlist(strsplit(pixel_names, "_")), ncol=3, byrow=TRUE)[,3] + + ## remove msidata to clean up RAM space + rm(msidata) + gc() + ssc_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, ssc_classes) colnames(ssc_classes2) = c("pixel names", "x", "y","predicted condition") ssc_toplabels = topLabels(msidata.ssc, n=$type_cond.method_cond.ssc_analysis_cond.ssc_toplabels) + ssc_toplabels[,6:9] <-round(ssc_toplabels[,6:9],6) write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(ssc_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") @@ -571,6 +599,11 @@ predicted_classes2 = data.frame(pixel_names, x_coordinates, y_coordinates, predicted_classes) colnames(predicted_classes2) = c("pixel names", "x", "y","predicted condition") predicted_toplabels = topLabels(prediction, n=$type_cond.predicted_toplabels) + if (colnames(predicted_toplabels)[4] == "coefficients"){ + predicted_toplabels[,4:6] <-round(predicted_toplabels[,4:6],5) + + }else{ + predicted_toplabels[,6:9] <-round(predicted_toplabels[,6:9],5)} write.table(predicted_toplabels, file="$mzfeatures", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") write.table(predicted_classes2, file="$pixeloutput", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") @@ -745,7 +778,7 @@ <param name="output_rdata" type="boolean" label="Results as .RData output" help="Can be used to generate a classification prediction on new data"/> </inputs> <outputs> - <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}"/> + <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "${tool.name} on ${on_string}: results"/> <data format="tabular" name="mzfeatures" label="${tool.name} on ${on_string}: features"/> <data format="tabular" name="pixeloutput" label="${tool.name} on ${on_string}: pixels"/> <data format="rdata" name="classification_rdata" label="${tool.name} on ${on_string}: results.RData">
--- a/macros.xml Thu Oct 25 07:18:58 2018 -0400 +++ b/macros.xml Fri Feb 15 10:07:59 2019 -0500 @@ -69,25 +69,92 @@ ## Range y coordinates minimumy = min(coord(msidata)[,2]) maximumy = max(coord(msidata)[,2]) + + + properties = c("Number of m/z features", + "Range of m/z values", + "Number of pixels", + "Range of x coordinates", + "Range of y coordinates") + + values = c(paste0(maxfeatures), + paste0(minmz, " - ", maxmz), + paste0(pixelcount), + paste0(minimumx, " - ", maximumx), + paste0(minimumy, " - ", maximumy)) + + property_df = data.frame(properties, values) + ]]></token> + + <token name="@READING_MSIDATA_INRAM@"><![CDATA[ + ## importing MSI data files + + ## function to read RData files independent of filename + loadRData <- function(fileName){ + load(fileName) + get(ls()[ls() != "fileName"]) + } + + #if $infile.ext == 'imzml' + #if str($processed_cond.processed_file) == "processed": + msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") + centroided(msidata) = $centroids + #else + msidata <- readImzML('infile') + centroided(msidata) = $centroids + #end if + #elif $infile.ext == 'analyze75' + msidata = readAnalyze('infile') + centroided(msidata) = $centroids + #else + msidata = loadRData('infile.RData') + #end if + + ]]></token> + + <token name="@DATA_PROPERTIES_INRAM@"><![CDATA[ +########################### QC numbers ######################## +## including intensity calculations which need data in RAM + ## Number of features (mz) + maxfeatures = length(features(msidata)) + ## Range mz + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + ## Number of spectra (pixels) + pixelcount = length(pixels(msidata)) + ## Range x coordinates + minimumx = min(coord(msidata)[,1]) + maximumx = max(coord(msidata)[,1]) + ## Range y coordinates + minimumy = min(coord(msidata)[,2]) + maximumy = max(coord(msidata)[,2]) ## Range of intensities minint = round(min(spectra(msidata)[], na.rm=TRUE), digits=2) maxint = round(max(spectra(msidata)[], na.rm=TRUE), digits=2) ## Number of intensities > 0, for if conditions npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) + ## Number of NA in spectra matrix + NAcount = sum(is.na(spectra(msidata)[])) + ## Number of NA in spectra matrix + infcount = sum(is.infinite(spectra(msidata)[])) properties = c("Number of m/z features", "Range of m/z values", "Number of pixels", "Range of x coordinates", "Range of y coordinates", - "Range of intensities") + "Range of intensities", + "Number of NA intensities", + "Number of Inf intensities") values = c(paste0(maxfeatures), paste0(minmz, " - ", maxmz), paste0(pixelcount), paste0(minimumx, " - ", maximumx), paste0(minimumy, " - ", maximumy), - paste0(minint, " - ", maxint)) + paste0(minint, " - ", maxint), + paste0(NAcount), + paste0(infcount)) property_df = data.frame(properties, values) ]]></token> @@ -144,9 +211,9 @@ <token name="@SPECTRA_TABULAR_INPUT_DESCRIPTION@"><