Mercurial > repos > galaxyp > msi_classification
comparison msi_classification.xml @ 0:b282225ccbe1 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_classification commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
| author | galaxyp |
|---|---|
| date | Fri, 06 Jul 2018 14:10:12 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b282225ccbe1 |
|---|---|
| 1 <tool id="mass_spectrometry_imaging_classification" name="MSI classification" version="1.10.0.0"> | |
| 2 <description>spatial classification of mass spectrometry imaging data</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | |
| 5 <requirement type="package" version="2.2.1">r-gridextra</requirement> | |
| 6 <requirement type="package" version="0.20-35">r-lattice</requirement> | |
| 7 <requirement type="package" version="2.2.1">r-ggplot2</requirement> | |
| 8 </requirements> | |
| 9 <command detect_errors="exit_code"> | |
| 10 <![CDATA[ | |
| 11 | |
| 12 #if $infile.ext == 'imzml' | |
| 13 ln -s '${infile.extra_files_path}/imzml' infile.imzML && | |
| 14 ln -s '${infile.extra_files_path}/ibd' infile.ibd && | |
| 15 #elif $infile.ext == 'analyze75' | |
| 16 ln -s '${infile.extra_files_path}/hdr' infile.hdr && | |
| 17 ln -s '${infile.extra_files_path}/img' infile.img && | |
| 18 ln -s '${infile.extra_files_path}/t2m' infile.t2m && | |
| 19 #else | |
| 20 ln -s $infile infile.RData && | |
| 21 #end if | |
| 22 cat '${MSI_segmentation}' && | |
| 23 echo ${MSI_segmentation} && | |
| 24 Rscript '${MSI_segmentation}' | |
| 25 | |
| 26 ]]> | |
| 27 </command> | |
| 28 <configfiles> | |
| 29 <configfile name="MSI_segmentation"><![CDATA[ | |
| 30 | |
| 31 | |
| 32 ################################# load libraries and read file ######################### | |
| 33 | |
| 34 library(Cardinal) | |
| 35 library(gridExtra) | |
| 36 library(lattice) | |
| 37 library(ggplot2) | |
| 38 | |
| 39 | |
| 40 #if $infile.ext == 'imzml' | |
| 41 #if str($processed_cond.processed_file) == "processed": | |
| 42 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") | |
| 43 #else | |
| 44 msidata <- readImzML('infile') | |
| 45 #end if | |
| 46 #elif $infile.ext == 'analyze75' | |
| 47 msidata = readAnalyze('infile') | |
| 48 #else | |
| 49 load('infile.RData') | |
| 50 #end if | |
| 51 | |
| 52 ## function to later read RData reference files in | |
| 53 | |
| 54 loadRData <- function(fileName){ | |
| 55 #loads an RData file, and returns it | |
| 56 load(fileName) | |
| 57 get(ls()[ls() != "fileName"]) | |
| 58 } | |
| 59 | |
| 60 ## create full matrix to make processed imzML files compatible with classification | |
| 61 iData(msidata) <- iData(msidata)[] | |
| 62 | |
| 63 ###################################### file properties in numbers ############## | |
| 64 | |
| 65 ## Number of features (mz) | |
| 66 maxfeatures = length(features(msidata)) | |
| 67 ## Range mz | |
| 68 minmz = round(min(mz(msidata)), digits=2) | |
| 69 maxmz = round(max(mz(msidata)), digits=2) | |
| 70 ## Number of spectra (pixels) | |
| 71 pixelcount = length(pixels(msidata)) | |
| 72 ## Range x coordinates | |
| 73 minimumx = min(coord(msidata)[,1]) | |
| 74 maximumx = max(coord(msidata)[,1]) | |
| 75 ## Range y coordinates | |
| 76 minimumy = min(coord(msidata)[,2]) | |
| 77 maximumy = max(coord(msidata)[,2]) | |
| 78 ## Range of intensities | |
| 79 minint = round(min(spectra(msidata)[]), digits=2) | |
| 80 maxint = round(max(spectra(msidata)[]), digits=2) | |
| 81 medint = round(median(spectra(msidata)[]), digits=2) | |
| 82 ## Number of intensities > 0 | |
| 83 npeaks= sum(spectra(msidata)[]>0) | |
| 84 ## Spectra multiplied with mz (potential number of peaks) | |
| 85 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
| 86 ## Percentage of intensities > 0 | |
| 87 percpeaks = round(npeaks/numpeaks*100, digits=2) | |
| 88 ## Number of empty TICs | |
| 89 TICs = colSums(spectra(msidata)[]) | |
| 90 NumemptyTIC = sum(TICs == 0) | |
| 91 | |
| 92 | |
| 93 ## Processing informations | |
| 94 processinginfo = processingData(msidata) | |
| 95 centroidedinfo = processinginfo@centroided # TRUE or FALSE | |
| 96 | |
| 97 ## if TRUE write processinginfo if no write FALSE | |
| 98 | |
| 99 ## normalization | |
| 100 if (length(processinginfo@normalization) == 0) { | |
| 101 normalizationinfo='FALSE' | |
| 102 } else { | |
| 103 normalizationinfo=processinginfo@normalization | |
| 104 } | |
| 105 ## smoothing | |
| 106 if (length(processinginfo@smoothing) == 0) { | |
| 107 smoothinginfo='FALSE' | |
| 108 } else { | |
| 109 smoothinginfo=processinginfo@smoothing | |
| 110 } | |
| 111 ## baseline | |
| 112 if (length(processinginfo@baselineReduction) == 0) { | |
| 113 baselinereductioninfo='FALSE' | |
| 114 } else { | |
| 115 baselinereductioninfo=processinginfo@baselineReduction | |
| 116 } | |
| 117 ## peak picking | |
| 118 if (length(processinginfo@peakPicking) == 0) { | |
| 119 peakpickinginfo='FALSE' | |
| 120 } else { | |
| 121 peakpickinginfo=processinginfo@peakPicking | |
| 122 } | |
| 123 | |
| 124 ############################################################################# | |
| 125 | |
| 126 properties = c("Number of mz features", | |
| 127 "Range of mz values", | |
| 128 "Number of pixels", | |
| 129 "Range of x coordinates", | |
| 130 "Range of y coordinates", | |
| 131 "Range of intensities", | |
| 132 "Median of intensities", | |
| 133 "Intensities > 0", | |
| 134 "Number of empty spectra", | |
| 135 "Preprocessing", | |
| 136 "Normalization", | |
| 137 "Smoothing", | |
| 138 "Baseline reduction", | |
| 139 "Peak picking", | |
| 140 "Centroided") | |
| 141 | |
| 142 values = c(paste0(maxfeatures), | |
| 143 paste0(minmz, " - ", maxmz), | |
| 144 paste0(pixelcount), | |
| 145 paste0(minimumx, " - ", maximumx), | |
| 146 paste0(minimumy, " - ", maximumy), | |
| 147 paste0(minint, " - ", maxint), | |
| 148 paste0(medint), | |
| 149 paste0(percpeaks, " %"), | |
| 150 paste0(NumemptyTIC), | |
| 151 paste0(" "), | |
| 152 paste0(normalizationinfo), | |
| 153 paste0(smoothinginfo), | |
| 154 paste0(baselinereductioninfo), | |
| 155 paste0(peakpickinginfo), | |
| 156 paste0(centroidedinfo)) | |
| 157 | |
| 158 property_df = data.frame(properties, values) | |
| 159 | |
| 160 | |
| 161 ######################################## PDF ################################### | |
| 162 ################################################################################ | |
| 163 ################################################################################ | |
| 164 | |
| 165 Title = "Prediction" | |
| 166 | |
| 167 #if str( $type_cond.type_method) == "training": | |
| 168 #if str( $type_cond.method_cond.class_method) == "PLS": | |
| 169 Title = "PLS" | |
| 170 #elif str( $type_cond.method_cond.class_method) == "OPLS": | |
| 171 Title = "OPLS" | |
| 172 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": | |
| 173 Title = "SSC" | |
| 174 #end if | |
| 175 #end if | |
| 176 | |
| 177 pdf("classificationpdf.pdf", fonts = "Times", pointsize = 12) | |
| 178 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 179 | |
| 180 | |
| 181 title(main=paste0(Title," for file: \n\n", "$infile.display_name")) | |
| 182 | |
| 183 | |
| 184 | |
| 185 ##################### I) numbers and control plots ############################# | |
| 186 ############################################################################### | |
| 187 | |
| 188 ## table with values | |
| 189 grid.table(property_df, rows= NULL) | |
| 190 | |
| 191 if (npeaks > 0){ | |
| 192 | |
| 193 opar <- par() | |
| 194 | |
| 195 ######################## II) Training ############################# | |
| 196 ############################################################################# | |
| 197 #if str( $type_cond.type_method) == "training": | |
| 198 print("training") | |
| 199 | |
| 200 | |
| 201 ## load y response (will be needed in every training scenario) | |
| 202 | |
| 203 #if str($type_cond.y_cond.y_vector) == "y_internal": | |
| 204 y_vector = msidata\$$type_cond.y_cond.y_name | |
| 205 #elif str($type_cond.y_cond.y_vector) == "y_external": | |
| 206 y_tabular = read.delim("$type_cond.y_cond.y_data", header = FALSE, stringsAsFactors = FALSE) | |
| 207 y_vector = as.factor(y_tabular[,$type_cond.y_cond.y_column]) | |
| 208 number_pixels = length(y_vector) ## should be same as in data | |
| 209 #end if | |
| 210 | |
| 211 ## plot of y vector | |
| 212 | |
| 213 position_df = cbind(coord(msidata)[,1:2], y_vector) | |
| 214 y_plot = ggplot(position_df, aes(x=x, y=y, fill=y_vector))+ | |
| 215 geom_tile() + | |
| 216 coord_fixed()+ | |
| 217 ggtitle("Distribution of the response variable y")+ | |
| 218 theme_bw()+ | |
| 219 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
| 220 theme(legend.position="bottom",legend.direction="vertical")+ | |
| 221 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
| 222 coord_labels = aggregate(cbind(x,y)~y_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
| 223 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$y_vector) | |
| 224 print(y_plot) | |
| 225 | |
| 226 | |
| 227 ######################## PLS ############################# | |
| 228 #if str( $type_cond.method_cond.class_method) == "PLS": | |
| 229 print("PLS") | |
| 230 | |
| 231 ######################## PLS - CV ############################# | |
| 232 #if str( $type_cond.method_cond.analysis_cond.PLS_method) == "cvapply": | |
| 233 print("PLS cv") | |
| 234 | |
| 235 ## folds | |
| 236 #if str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_internal": | |
| 237 | |
| 238 fold_vector = msidata\$$type_cond.method_cond.analysis_cond.fold_cond.fold_name | |
| 239 #elif str($type_cond.method_cond.analysis_cond.fold_cond.fold_vector) == "fold_external": | |
| 240 fold_tabular = read.delim("$type_cond.method_cond.analysis_cond.fold_cond.fold_data", header = FALSE, stringsAsFactors = FALSE) | |
| 241 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.analysis_cond.fold_cond.fold_column]) | |
| 242 number_pixels = length(fold_vector) ## should be same as in data | |
| 243 #end if | |
| 244 | |
| 245 ## plot of folds | |
| 246 | |
| 247 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
| 248 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
| 249 geom_tile() + | |
| 250 coord_fixed()+ | |
| 251 ggtitle("Distribution of the fold variable")+ | |
| 252 theme_bw()+ | |
| 253 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
| 254 theme(legend.position="bottom",legend.direction="vertical")+ | |
| 255 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
| 256 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
| 257 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
| 258 print(fold_plot) | |
| 259 | |
| 260 ## number of components | |
| 261 components = c($type_cond.method_cond.analysis_cond.plscv_comp) | |
| 262 | |
| 263 ## PLS-cvApply: | |
| 264 msidata.cv.pls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "PLS", ncomp = components) | |
| 265 | |
| 266 ## create table with summary | |
| 267 count = 1 | |
| 268 summary_plscv = list() | |
| 269 accuracy_vector = numeric() | |
| 270 for (iteration in components){ | |
| 271 | |
| 272 summary_iteration = summary(msidata.cv.pls)\$accuracy[[paste0("ncomp = ", iteration)]] | |
| 273 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table | |
| 274 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot | |
| 275 empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration | |
| 276 ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration) | |
| 277 ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration) | |
| 278 labeled_iteration = rbind(empty_row, summary_iteration) | |
| 279 | |
| 280 summary_plscv[[count]] = labeled_iteration | |
| 281 count = count+1} ## create list with summary table for each component | |
| 282 ## create dataframe from list | |
| 283 summary_plscv = do.call(rbind, summary_plscv) | |
| 284 summary_df = as.data.frame(summary_plscv) | |
| 285 rownames(summary_df) = NULL | |
| 286 | |
| 287 ## plots | |
| 288 ## plot to find ncomp with highest accuracy | |
| 289 plot(summary(msidata.cv.pls), main="Accuracy of PLS classification") | |
| 290 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | |
| 291 ## one image for each sample/fold, 4 images per page | |
| 292 image(msidata.cv.pls, model = list(ncomp = ncomp_max), layout = c(2, 2)) | |
| 293 | |
| 294 par(opar) | |
| 295 ## print table with summary in pdf | |
| 296 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 297 title(main="Summary for the different components\n", adj=0.5) | |
| 298 ## summary for 4 components (20 rows) fits in one page: | |
| 299 if (length(components)<5){ | |
| 300 grid.table(summary_df, rows= NULL) | |
| 301 }else{ | |
| 302 grid.table(summary_df[1:20,], rows= NULL) | |
| 303 mincount = 21 | |
| 304 maxcount = 40 | |
| 305 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
| 306 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 307 if (maxcount <= nrow(summary_df)){ | |
| 308 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
| 309 mincount = mincount+20 | |
| 310 maxcount = maxcount+20 | |
| 311 }else{### stop last page with last sample otherwise NA in table | |
| 312 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
| 313 } | |
| 314 } | |
| 315 | |
| 316 ## optional output as .RData | |
| 317 #if $output_rdata: | |
| 318 save(msidata.cv.pls, file="$classification_rdata") | |
| 319 #end if | |
| 320 ######################## PLS - analysis ########################### | |
| 321 #elif str( $type_cond.method_cond.analysis_cond.PLS_method) == "PLS_analysis": | |
| 322 print("PLS analysis") | |
| 323 | |
| 324 ## number of components | |
| 325 component = c($type_cond.method_cond.analysis_cond.pls_comp) | |
| 326 | |
| 327 ### pls analysis | |
| 328 msidata.pls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.analysis_cond.pls_scale) | |
| 329 | |
| 330 ### plot of PLS coefficients | |
| 331 plot(msidata.pls, main="PLS coefficients per m/z") | |
| 332 | |
| 333 ### summary table of PLS | |
| 334 summary_table = summary(msidata.pls)\$accuracy[[paste0("ncomp = ",component)]] | |
| 335 summary_table = cbind(rownames(summary_table), data.frame(summary_table)) | |
| 336 rownames(summary_table) = NULL | |
| 337 print(summary_table) | |
| 338 ###plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 339 ###grid.table(test, rows= TRUE) | |
| 340 | |
| 341 ### image of the best m/z | |
| 342 print(image(msidata, mz = topLabels(msidata.pls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap")) | |
| 343 | |
| 344 ## m/z and pixel information output | |
| 345 pls_classes = data.frame(msidata.pls\$classes[[1]]) | |
| 346 rownames(pls_classes) = names(pixels(msidata)) | |
| 347 colnames(pls_classes) = "predicted diagnosis" | |
| 348 pls_toplabels = topLabels(msidata.pls, n=$type_cond.method_cond.analysis_cond.pls_toplabels) | |
| 349 | |
| 350 write.table(pls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
| 351 write.table(pls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
| 352 | |
| 353 ## optional output as .RData | |
| 354 #if $output_rdata: | |
| 355 save(msidata.pls, file="$classification_rdata") | |
| 356 #end if | |
| 357 | |
| 358 #end if | |
| 359 | |
| 360 | |
| 361 ######################## OPLS ############################# | |
| 362 #elif str( $type_cond.method_cond.class_method) == "OPLS": | |
| 363 print("OPLS") | |
| 364 | |
| 365 ######################## OPLS -CV ############################# | |
| 366 #if str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_cvapply": | |
| 367 print("OPLS cv") | |
| 368 | |
| 369 ## folds | |
| 370 #if str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_internal": | |
| 371 fold_vector = msidata\$$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_name | |
| 372 #elif str($type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_vector) == "opls_fold_external": | |
| 373 fold_tabular = read.delim("$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_data", header = FALSE, stringsAsFactors = FALSE) | |
| 374 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.opls_analysis_cond.opls_fold_cond.opls_fold_column]) | |
| 375 number_pixels = length(fold_vector) ## should be same as in data | |
| 376 #end if | |
| 377 | |
| 378 ## plot of folds | |
| 379 | |
| 380 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
| 381 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
| 382 geom_tile() + | |
| 383 coord_fixed()+ | |
| 384 ggtitle("Distribution of the fold variable")+ | |
| 385 theme_bw()+ | |
| 386 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
| 387 theme(legend.position="bottom",legend.direction="vertical")+ | |
| 388 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
| 389 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
| 390 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
| 391 print(fold_plot) | |
| 392 | |
| 393 ## number of components | |
| 394 components = c($type_cond.method_cond.opls_analysis_cond.opls_cvcomp) | |
| 395 | |
| 396 ## OPLS-cvApply: | |
| 397 msidata.cv.opls <- cvApply(msidata, .y = y_vector, .fold = fold_vector, .fun = "OPLS", ncomp = components, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew_cv) | |
| 398 | |
| 399 ## create table with summary | |
| 400 count = 1 | |
| 401 summary_oplscv = list() | |
| 402 accuracy_vector = numeric() | |
| 403 for (iteration in components){ | |
| 404 summary_iteration = summary(msidata.cv.opls)\$accuracy[[paste0("ncomp = ", iteration)]] | |
| 405 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table | |
| 406 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot | |
| 407 empty_row = c(paste0("ncomp = ", iteration), rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration | |
| 408 ##rownames(labeled_iteration)[1] = paste0("ncomp = ", iteration) | |
| 409 ##labeled_iteration = cbind(rownames(labeled_iteration), labeled_iteration) | |
| 410 labeled_iteration = rbind(empty_row, summary_iteration) | |
| 411 summary_oplscv[[count]] = labeled_iteration ## create list with summary table for each component | |
| 412 count = count+1} | |
| 413 ## create dataframe from list | |
| 414 summary_oplscv = do.call(rbind, summary_oplscv) | |
| 415 summary_df = as.data.frame(summary_oplscv) | |
| 416 rownames(summary_df) = NULL | |
| 417 | |
| 418 ## plots | |
| 419 ## plot to find ncomp with highest accuracy | |
| 420 plot(summary(msidata.cv.opls), main="Accuracy of OPLS classification") | |
| 421 ncomp_max = components[which.max(accuracy_vector)] ## find ncomp with max. accuracy | |
| 422 ## one image for each sample/fold, 4 images per page | |
| 423 image(msidata.cv.opls, model = list(ncomp = ncomp_max), layout = c(2, 2)) | |
| 424 | |
| 425 par(opar) | |
| 426 ## print table with summary in pdf | |
| 427 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 428 title(main="Summary for the different components\n", adj=0.5) | |
| 429 ## summary for 4 components (20 rows) fits in one page: | |
| 430 if (length(components)<5){ | |
| 431 grid.table(summary_df, rows= NULL) | |
| 432 }else{ | |
| 433 grid.table(summary_df[1:20,], rows= NULL) | |
| 434 mincount = 21 | |
| 435 maxcount = 40 | |
| 436 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
| 437 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 438 if (maxcount <= nrow(summary_df)){ | |
| 439 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
| 440 mincount = mincount+20 | |
| 441 maxcount = maxcount+20 | |
| 442 }else{### stop last page with last sample otherwise NA in table | |
| 443 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
| 444 } | |
| 445 } | |
| 446 | |
| 447 ## optional output as .RData | |
| 448 #if $output_rdata: | |
| 449 save(msidata.cv.opls, file="$classification_rdata") | |
| 450 #end if | |
| 451 | |
| 452 ######################## OPLS -analysis ########################### | |
| 453 #elif str( $type_cond.method_cond.opls_analysis_cond.opls_method) == "opls_analysis": | |
| 454 print("OPLS analysis") | |
| 455 | |
| 456 ## number of components | |
| 457 component = c($type_cond.method_cond.opls_analysis_cond.opls_comp) | |
| 458 | |
| 459 ### opls analysis | |
| 460 msidata.opls <- PLS(msidata, y = y_vector, ncomp = component, scale=$type_cond.method_cond.opls_analysis_cond.opls_scale, keep.Xnew = $type_cond.method_cond.opls_analysis_cond.xnew) | |
| 461 | |
| 462 ### plot of OPLS coefficients | |
| 463 plot(msidata.opls, main="OPLS coefficients per m/z") | |
| 464 | |
| 465 ### summary table of OPLS | |
| 466 summary_table = summary(msidata.opls)\$accuracy[[paste0("ncomp = ",component)]] | |
| 467 summary_table = cbind(rownames(summary_table), summary_table) | |
| 468 rownames(summary_table) = NULL | |
| 469 summary_table = data.frame(summary_table) | |
| 470 print(summary_table) | |
| 471 ###plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 472 ###grid.table(test, rows= TRUE) | |
| 473 | |
| 474 ### image of the best m/z | |
| 475 print(image(msidata, mz = topLabels(msidata.opls)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap")) | |
| 476 | |
| 477 ## m/z and pixel information output | |
| 478 opls_classes = data.frame(msidata.opls\$classes[[1]]) | |
| 479 rownames(opls_classes) = names(pixels(msidata)) | |
| 480 colnames(opls_classes) = "predicted diagnosis" | |
| 481 opls_toplabels = topLabels(msidata.opls, n=$type_cond.method_cond.opls_analysis_cond.opls_toplabels) | |
| 482 | |
| 483 write.table(opls_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
| 484 write.table(opls_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
| 485 | |
| 486 ## optional output as .RData | |
| 487 #if $output_rdata: | |
| 488 save(msidata.opls, file="$classification_rdata") | |
| 489 #end if | |
| 490 | |
| 491 #end if | |
| 492 | |
| 493 ######################## SSC ############################# | |
| 494 #elif str( $type_cond.method_cond.class_method) == "spatialShrunkenCentroids": | |
| 495 print("SSC") | |
| 496 | |
| 497 ######################## SSC - CV ############################# | |
| 498 #if str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_cvapply": | |
| 499 print("SSC cv") | |
| 500 | |
| 501 ## folds | |
| 502 #if str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_internal": | |
| 503 fold_vector = msidata\$$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_name | |
| 504 | |
| 505 #elif str($type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_vector) == "ssc_fold_external": | |
| 506 fold_tabular = read.delim("$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_data", header = FALSE, stringsAsFactors = FALSE) | |
| 507 fold_vector = as.factor(fold_tabular[,$type_cond.method_cond.ssc_analysis_cond.ssc_fold_cond.ssc_fold_column]) | |
| 508 number_pixels = length(fold_vector) ## should be same as in data | |
| 509 #end if | |
| 510 | |
| 511 ## plot of folds | |
| 512 position_df = cbind(coord(msidata)[,1:2], fold_vector) | |
| 513 fold_plot = ggplot(position_df, aes(x=x, y=y, fill=fold_vector))+ | |
| 514 geom_tile() + | |
| 515 coord_fixed()+ | |
| 516 ggtitle("Distribution of the fold variable")+ | |
| 517 theme_bw()+ | |
| 518 theme(text=element_text(family="ArialMT", face="bold", size=15))+ | |
| 519 theme(legend.position="bottom",legend.direction="vertical")+ | |
| 520 guides(fill=guide_legend(ncol=4,byrow=TRUE)) | |
| 521 coord_labels = aggregate(cbind(x,y)~fold_vector, data=position_df, mean, na.rm=TRUE, na.action="na.pass") | |
| 522 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$fold_vector) | |
| 523 print(fold_plot) | |
| 524 | |
| 525 ## SSC-cvApply: | |
| 526 msidata.cv.ssc <- cvApply(msidata, .y = y_vector,.fold = fold_vector,.fun = "spatialShrunkenCentroids", r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | |
| 527 | |
| 528 ## create table with summary | |
| 529 count = 1 | |
| 530 summary_ssccv = list() | |
| 531 accuracy_vector = numeric() | |
| 532 | |
| 533 for (iteration in names(msidata.cv.ssc@resultData[[1]][,1])){ | |
| 534 summary_iteration = summary(msidata.cv.ssc)\$accuracy[[iteration]] | |
| 535 summary_iteration = cbind(rownames(summary_iteration), summary_iteration) ## include rownames in table | |
| 536 accuracy_vector[count] = summary_iteration[1,2] ## vector with accuracies to find later maximum for plot | |
| 537 empty_row = c(iteration, rep( "", length(levels(y_vector)))) ## add line with ncomp for each iteration | |
| 538 labeled_iteration = rbind(empty_row, summary_iteration) | |
| 539 summary_ssccv[[count]] = labeled_iteration ## create list with summary table for each component | |
| 540 count = count+1 | |
| 541 } | |
| 542 | |
| 543 ##create dataframe from list | |
| 544 summary_ssccv = do.call(rbind, summary_ssccv) | |
| 545 summary_df = as.data.frame(summary_ssccv) | |
| 546 rownames(summary_df) = NULL | |
| 547 | |
| 548 ## plot to find parameters with highest accuracy | |
| 549 plot(summary(msidata.cv.ssc), main="Accuracy of SSC classification") | |
| 550 best_params = names(msidata.cv.ssc@resultData[[1]][,1])[which.max(accuracy_vector)] ## find parameters with max. accuracy | |
| 551 r_value = as.numeric(substring(unlist(strsplit(best_params, ","))[1], 4)) | |
| 552 s_value = as.numeric(substring(unlist(strsplit(best_params, ","))[3], 5)) ## remove space | |
| 553 | |
| 554 image(msidata.cv.ssc, model = list( r = r_value, s = s_value ), layout=c(2,2)) | |
| 555 | |
| 556 par(opar) | |
| 557 ## print table with summary in pdf | |
| 558 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 559 title(main="Summary for the different parameters\n", adj=0.5) | |
| 560 ## summary for 4 parameters (20 rows) fits in one page: | |
| 561 if (length(names(msidata.cv.ssc@resultData[[1]][,1]))<5){ | |
| 562 grid.table(summary_df, rows= NULL) | |
| 563 }else{ | |
| 564 grid.table(summary_df[1:20,], rows= NULL) | |
| 565 mincount = 21 | |
| 566 maxcount = 40 | |
| 567 for (count20 in 1:(ceiling(nrow(summary_df)/20)-1)){ | |
| 568 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 569 if (maxcount <= nrow(summary_df)){ | |
| 570 grid.table(summary_df[mincount:maxcount,], rows= NULL) | |
| 571 mincount = mincount+20 | |
| 572 maxcount = maxcount+20 | |
| 573 }else{### stop last page with last sample otherwise NA in table | |
| 574 grid.table(summary_df[mincount:nrow(summary_df),], rows= NULL)} | |
| 575 } | |
| 576 } | |
| 577 | |
| 578 ## optional output as .RData | |
| 579 #if $output_rdata: | |
| 580 save(msidata.cv.opls, file="$classification_rdata") | |
| 581 #end if | |
| 582 | |
| 583 ######################## SSC -analysis ########################### | |
| 584 #elif str( $type_cond.method_cond.ssc_analysis_cond.ssc_method) == "ssc_analysis": | |
| 585 print("SSC analysis") | |
| 586 | |
| 587 ## SSC analysis | |
| 588 msidata.ssc <- spatialShrunkenCentroids(msidata, y = y_vector, .fold = fold_vector, | |
| 589 r = c($type_cond.method_cond.ssc_r), s = c($type_cond.method_cond.ssc_s), method = "$type_cond.method_cond.ssc_kernel_method") | |
| 590 | |
| 591 plot(msidata.ssc, mode = "tstatistics", model = list("r" = c($type_cond.method_cond.ssc_r), "s" = c($type_cond.method_cond.ssc_s))) | |
| 592 | |
| 593 ### summary table SSC | |
| 594 | |
| 595 ##summary(msidata.ssc)\$accuracy[[names(msidata.ssc@resultData)]] | |
| 596 summary_table = summary(msidata.ssc) | |
| 597 print(summary_table) | |
| 598 ##summary_table = cbind(rownames(summary_table), summary_table) | |
| 599 ##rownames(summary_table) = NULL | |
| 600 | |
| 601 ###plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 602 ###grid.table(summary_table, rows= TRUE) | |
| 603 | |
| 604 ### image of the best m/z | |
| 605 print(image(msidata, mz = topLabels(msidata.ssc)[1,1], normalize.image = "linear", contrast.enhance = "histogram",smooth.image="gaussian", main="best m/z heatmap")) | |
| 606 | |
| 607 ## m/z and pixel information output | |
| 608 ssc_classes = data.frame(msidata.ssc\$classes[[1]]) | |
| 609 rownames(ssc_classes) = names(pixels(msidata)) | |
| 610 colnames(ssc_classes) = "predicted diagnosis" | |
| 611 ssc_toplabels = topLabels(msidata.ssc) | |
| 612 | |
| 613 write.table(ssc_toplabels, file="$mzfeatures", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
| 614 write.table(ssc_classes, file="$pixeloutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
| 615 | |
| 616 ## optional output as .RData | |
| 617 #if $output_rdata: | |
| 618 save(msidata.ssc, file="$classification_rdata") | |
| 619 #end if | |
| 620 | |
| 621 #end if | |
| 622 #end if | |
| 623 | |
| 624 | |
| 625 ######################## II) Prediction ############################# | |
| 626 ############################################################################# | |
| 627 #elif str( $type_cond.type_method) == "prediction": | |
| 628 print("prediction") | |
| 629 | |
| 630 #if str($type_cond.new_y.new_y_values) == "no_new_y": | |
| 631 new_y_vector = FALSE | |
| 632 #elif str($type_cond.new_y.new_y_values) == "new_y_internal": | |
| 633 new_y_vector = msidata\$$type_cond.new_y.new_y_name | |
| 634 #elif str($type_cond.new_y.new_y_values) == "new_y_external": | |
| 635 | |
| 636 new_y_tabular = read.delim("$type_cond.new_y.new_y_data", header = FALSE, stringsAsFactors = FALSE) | |
| 637 new_y_vector = new_y_tabular[,$type_cond.new_y.new_y_column] | |
| 638 number_pixels = length(new_y_vector) ## should be same as in data | |
| 639 #end if | |
| 640 | |
| 641 training_data = loadRData("$type_cond.training_result") | |
| 642 prediction = predict(training_data,msidata, newy = new_y_vector) | |
| 643 | |
| 644 ## optional output as .RData | |
| 645 #if $output_rdata: | |
| 646 msidata = prediction | |
| 647 save(msidata, file="$classification_rdata") | |
| 648 #end if | |
| 649 #end if | |
| 650 | |
| 651 dev.off() | |
| 652 }else{ | |
| 653 print("Inputfile has no intensities > 0") | |
| 654 dev.off() | |
| 655 } | |
| 656 | |
| 657 ]]></configfile> | |
| 658 </configfiles> | |
| 659 <inputs> | |
| 660 <param name="infile" type="data" format="imzml, rdata, analyze75" | |
| 661 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" | |
| 662 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> | |
| 663 <conditional name="processed_cond"> | |
| 664 <param name="processed_file" type="select" label="Is the input file a processed imzML file "> | |
| 665 <option value="no_processed" selected="True">not a processed imzML</option> | |
| 666 <option value="processed">processed imzML</option> | |
| 667 </param> | |
| 668 <when value="no_processed"/> | |
| 669 <when value="processed"> | |
| 670 <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> | |
| 671 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm"> | |
| 672 <option value="mz" >mz</option> | |
| 673 <option value="ppm" selected="True" >ppm</option> | |
| 674 </param> | |
| 675 </when> | |
| 676 </conditional> | |
| 677 | |
| 678 <conditional name="type_cond"> | |
| 679 <param name="type_method" type="select" label="Analysis step to perform"> | |
| 680 <option value="training" selected="True">training</option> | |
| 681 <option value="prediction">prediction</option> | |
| 682 </param> | |
| 683 <when value="training"> | |
| 684 | |
| 685 <conditional name="method_cond"> | |
| 686 <param name="class_method" type="select" label="Select the method for classification"> | |
| 687 <option value="PLS" selected="True">PLS</option> | |
| 688 <option value="OPLS">OPLS</option> | |
| 689 <option value="spatialShrunkenCentroids">spatial shrunken centroids</option> | |
| 690 </param> | |
| 691 <when value="PLS"> | |
| 692 | |
| 693 <conditional name="analysis_cond"> | |
| 694 <param name="PLS_method" type="select" label="Crossvalidation or analysis"> | |
| 695 <option value="cvapply" selected="True">cvApply</option> | |
| 696 <option value="PLS_analysis">PLS analysis</option> | |
| 697 </param> | |
| 698 <when value="cvapply"> | |
| 699 | |
| 700 <param name="plscv_comp" type="text" value="1:2" | |
| 701 label="The number of PLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
| 702 <conditional name="fold_cond"> | |
| 703 <param name="fold_vector" type="select" label="Define the fold variable"> | |
| 704 <option value="fold_internal" selected="True">dataset contains already fold</option> | |
| 705 <option value="fold_external">use fold from tabular file</option> | |
| 706 </param> | |
| 707 <when value="fold_internal"> | |
| 708 <param name="fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/> | |
| 709 </when> | |
| 710 <when value="fold_external"> | |
| 711 <param name="fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/> | |
| 712 <param name="fold_column" data_ref="fold_data" label="Column with folds" type="data_column"/> | |
| 713 </when> | |
| 714 </conditional> | |
| 715 </when> | |
| 716 | |
| 717 <when value="PLS_analysis"> | |
| 718 <param name="pls_comp" type="integer" value="5" | |
| 719 label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/> | |
| 720 <param name="pls_scale" type="boolean" display="radio" label="data scaling" truevalue="TRUE" falsevalue="FALSE"/> | |
| 721 <param name="pls_toplabels" type="integer" value="100" | |
| 722 label="Number of toplabels (masses) which should be written in tabular output"/> | |
| 723 </when> | |
| 724 </conditional> | |
| 725 </when> | |
| 726 | |
| 727 <when value="OPLS"> | |
| 728 | |
| 729 <conditional name="opls_analysis_cond"> | |
| 730 <param name="opls_method" type="select" label="Analysis step to perform"> | |
| 731 <option value="opls_cvapply" selected="True">cvApply</option> | |
| 732 <option value="opls_analysis">OPLS analysis</option> | |
| 733 </param> | |
| 734 | |
| 735 <when value="opls_cvapply"> | |
| 736 <param name="opls_cvcomp" type="text" value="1:2" | |
| 737 label="The number of OPLS components" help="Multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
| 738 <param name="xnew_cv" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | |
| 739 <conditional name="opls_fold_cond"> | |
| 740 <param name="opls_fold_vector" type="select" label="Define the fold variable"> | |
| 741 <option value="opls_fold_internal" selected="True">dataset contains already fold</option> | |
| 742 <option value="opls_fold_external">use fold from tabular file</option> | |
| 743 </param> | |
| 744 <when value="opls_fold_internal"> | |
| 745 <param name="opls_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/> | |
| 746 </when> | |
| 747 <when value="opls_fold_external"> | |
| 748 <param name="opls_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/> | |
| 749 <param name="opls_fold_column" data_ref="opls_fold_data" label="Column with folds" type="data_column"/> | |
| 750 </when> | |
| 751 </conditional> | |
| 752 </when> | |
| 753 | |
| 754 <when value="opls_analysis"> | |
| 755 <param name="opls_comp" type="integer" value="5" | |
| 756 label="The optimal number of PLS components as indicated by cross-validations" help="Run cvApply first to optain optiaml number of PLS components"/> | |
| 757 <param name="xnew" type="boolean" display="radio" truevalue="TRUE" falsevalue="FALSE" label="Keep new matrix"/> | |
| 758 <param name="opls_scale" type="select" label="data scaling" display="radio" optional="False"> | |
| 759 <option value="TRUE">yes</option> | |
| 760 <option value="FALSE" selected="True">no</option> | |
| 761 </param> | |
| 762 <param name="opls_toplabels" type="integer" value="100" | |
| 763 label="Number of toplabels (features) which should be written in tabular output"/> | |
| 764 </when> | |
| 765 </conditional> | |
| 766 </when> | |
| 767 | |
| 768 <when value="spatialShrunkenCentroids"> | |
| 769 <conditional name="ssc_analysis_cond"> | |
| 770 <param name="ssc_method" type="select" label="Analysis step to perform"> | |
| 771 <option value="ssc_cvapply" selected="True">cvApply</option> | |
| 772 <option value="ssc_analysis">spatial shrunken centroids analysis</option> | |
| 773 </param> | |
| 774 <when value="ssc_cvapply"> | |
| 775 | |
| 776 <conditional name="ssc_fold_cond"> | |
| 777 <param name="ssc_fold_vector" type="select" label="Define the fold variable"> | |
| 778 <option value="ssc_fold_internal" selected="True">dataset contains already fold</option> | |
| 779 <option value="ssc_fold_external">use fold from tabular file</option> | |
| 780 </param> | |
| 781 <when value="ssc_fold_internal"> | |
| 782 <param name="ssc_fold_name" type="text" value="sample" label="Name of the pData slot where fold is stored" help="each fold must contain pixels of all categories"/> | |
| 783 </when> | |
| 784 <when value="ssc_fold_external"> | |
| 785 <param name="ssc_fold_data" type="data" format="tabular" label="Tabular file with column for folds" help="Number of rows must be number of pixels"/> | |
| 786 <param name="ssc_fold_column" data_ref="ssc_fold_data" label="Column with folds" type="data_column"/> | |
| 787 </when> | |
| 788 </conditional> | |
| 789 </when> | |
| 790 | |
| 791 <when value="ssc_analysis"> | |
| 792 | |
| 793 <param name="ssc_toplabels" type="integer" value="100" | |
| 794 label="Number of toplabels (features) which should be written in tabular output"/> | |
| 795 </when> | |
| 796 </conditional> | |
| 797 <param name="ssc_r" type="text" value="2" | |
| 798 label="The spatial neighborhood radius of nearby pixels to consider (r)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
| 799 <param name="ssc_s" type="text" value="2" | |
| 800 label="The sparsity thresholding parameter by which to shrink the t-statistics (s)" help="For cvapply multiple values are allowed (e.g. 1,2,3 or 2:5)"/> | |
| 801 <param name="ssc_kernel_method" type="select" display="radio" label = "The method to use to calculate the spatial smoothing kernels for the embedding. The 'gaussian' method refers to spatially-aware (SA) weights, and 'adaptive' refers to spatially-aware structurally-adaptive (SASA) weights"> | |
| 802 <option value="gaussian">gaussian</option> | |
| 803 <option value="adaptive" selected="True">adaptive</option> | |
| 804 </param> | |
| 805 | |
| 806 </when> | |
| 807 </conditional> | |
| 808 <conditional name="y_cond"> | |
| 809 <param name="y_vector" type="select" label="Define the response variable y"> | |
| 810 <option value="y_internal" selected="True">dataset contains already y</option> | |
| 811 <option value="y_external">use y from tabular file</option> | |
| 812 </param> | |
| 813 <when value="y_internal"> | |
| 814 <param name="y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="Outputs of MSI_combine tool have 'combined_sample' as name"/> | |
| 815 </when> | |
| 816 <when value="y_external"> | |
| 817 <param name="y_data" type="data" format="tabular" label="Tabular file with column for y response"/> | |
| 818 <param name="y_column" data_ref="y_data" label="Column with y response" type="data_column"/> | |
| 819 </when> | |
| 820 </conditional> | |
| 821 </when> | |
| 822 | |
| 823 <when value="prediction"> | |
| 824 <param name="training_result" type="data" format="rdata" label="Result from previous classification training"/> | |
| 825 <conditional name="new_y"> | |
| 826 <param name="new_y_values" type="select" label="Define the new response y"> | |
| 827 <option value="no_new_y" >no new y response</option> | |
| 828 <option value="new_y_internal" selected="True">dataset contains already y</option> | |
| 829 <option value="new_y_external">use y from tabular file</option> | |
| 830 </param> | |
| 831 <when value="no_new_y"/> | |
| 832 <when value="new_y_internal"> | |
| 833 <param name="new_y_name" type="text" value="combined_sample" label="Name of the pData slot where y is stored" help="data merged with MSI_combine tool has 'combined_sample' as name"/> | |
| 834 </when> | |
| 835 | |
| 836 <when value="new_y_external"> | |
| 837 <param name="new_y_data" type="data" format="tabular" label="Tabular file with column for y response"/> | |
| 838 <param name="new_y_column" data_ref="new_y_data" label="Column with y response" type="data_column"/> | |
| 839 </when> | |
| 840 </conditional> | |
| 841 </when> | |
| 842 </conditional> | |
| 843 <param name="output_rdata" type="boolean" display="radio" label="Results as .RData output"/> | |
| 844 </inputs> | |
| 845 <outputs> | |
| 846 <data format="pdf" name="classification_images" from_work_dir="classificationpdf.pdf" label = "$infile.display_name classification"/> | |
| 847 <data format="tabular" name="mzfeatures" label="$infile.display_name features"/> | |
| 848 <data format="tabular" name="pixeloutput" label="$infile.display_name pixels"/> | |
| 849 <data format="rdata" name="classification_rdata" label="$infile.display_name classification"> | |
| 850 <filter>output_rdata</filter> | |
| 851 </data> | |
| 852 </outputs> | |
| 853 <tests> | |
| 854 <test expect_num_outputs="3"> | |
| 855 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 856 <conditional name="type_cond"> | |
| 857 <param name="type_method" value="training"/> | |
| 858 <conditional name="method_cond"> | |
| 859 <param name="class_method" value="PLS"/> | |
| 860 <conditional name="analysis_cond"> | |
| 861 <param name="PLS_method" value="cvapply"/> | |
| 862 | |
| 863 <param name="plscv_comp" value="2:4"/> | |
| 864 <conditional name="fold_cond"> | |
| 865 <param name="fold_vector" value="fold_external"/> | |
| 866 <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 867 <param name="fold_column" value="1"/> | |
| 868 </conditional> | |
| 869 | |
| 870 </conditional> | |
| 871 </conditional> | |
| 872 <conditional name="y_cond"> | |
| 873 <param name="y_vector" value="y_external"/> | |
| 874 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 875 <param name="y_column" value="2"/> | |
| 876 </conditional> | |
| 877 </conditional> | |
| 878 <output name="mzfeatures" file="features_test1.tabular"/> | |
| 879 <output name="pixeloutput" file="pixels_test1.tabular"/> | |
| 880 <output name="classification_images" file="test1.pdf" compare="sim_size" delta="20000"/> | |
| 881 </test> | |
| 882 | |
| 883 <test expect_num_outputs="4"> | |
| 884 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 885 <conditional name="type_cond"> | |
| 886 <param name="type_method" value="training"/> | |
| 887 <conditional name="method_cond"> | |
| 888 <param name="class_method" value="PLS"/> | |
| 889 <conditional name="analysis_cond"> | |
| 890 <param name="PLS_method" value="PLS_analysis"/> | |
| 891 | |
| 892 <param name="pls_comp" value="2"/> | |
| 893 <param name="pls_scale" value="TRUE"/> | |
| 894 <param name="pls_toplabels" value="100"/> | |
| 895 <conditional name="fold_cond"> | |
| 896 <param name="fold_vector" value="fold_external"/> | |
| 897 <param name="fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 898 <param name="fold_column" value="1"/> | |
| 899 </conditional> | |
| 900 | |
| 901 </conditional> | |
| 902 </conditional> | |
| 903 <conditional name="y_cond"> | |
| 904 <param name="y_vector" value="y_external"/> | |
| 905 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 906 <param name="y_column" value="2"/> | |
| 907 </conditional> | |
| 908 </conditional> | |
| 909 <param name="output_rdata" value="True"/> | |
| 910 <output name="mzfeatures" file="features_test2.tabular"/> | |
| 911 <output name="pixeloutput" file="pixels_test2.tabular"/> | |
| 912 <output name="classification_images" file="test2.pdf" compare="sim_size" delta="20000"/> | |
| 913 <output name="classification_rdata" file="test2.rdata" compare="sim_size" /> | |
| 914 </test> | |
| 915 | |
| 916 <test expect_num_outputs="3"> | |
| 917 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 918 <conditional name="type_cond"> | |
| 919 <param name="type_method" value="training"/> | |
| 920 <conditional name="method_cond"> | |
| 921 <param name="class_method" value="OPLS"/> | |
| 922 <conditional name="opls_analysis_cond"> | |
| 923 <param name="opls_method" value="opls_analysis"/> | |
| 924 | |
| 925 <param name="opls_cvcomp" value="1:2"/> | |
| 926 <param name="xnew_cv" value="FALSE"/> | |
| 927 <conditional name="opls_fold_cond"> | |
| 928 <param name="opls_fold_vector" value="opls_fold_external"/> | |
| 929 <param name="opls_fold_data" ftype="tabular" value="random_factors.tabular"/> | |
| 930 <param name="opls_fold_column" value="1"/> | |
| 931 </conditional> | |
| 932 </conditional> | |
| 933 </conditional> | |
| 934 <conditional name="y_cond"> | |
| 935 <param name="y_vector" value="y_external"/> | |
| 936 <param name="y_data" value="random_factors.tabular" ftype="tabular"/> | |
| 937 <param name="y_column" value="2"/> | |
| 938 </conditional> | |
| 939 </conditional> | |
| 940 <output name="mzfeatures" file="features_test3.tabular"/> | |
| 941 <output name="pixeloutput" file="pixels_test3.tabular"/> | |
| 942 <output name="classification_images" file="test3.pdf" compare="sim_size" delta="20000"/> | |
| 943 </test> | |
| 944 | |
| 945 <test expect_num_outputs="4"> | |
| 946 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 947 <conditional name="type_cond"> | |
| 948 <param name="type_method" value="training"/> | |
| 949 <conditional name="method_cond"> | |
| 950 <param name="class_method" value="OPLS"/> | |
| 951 <conditional name="opls_analysis_cond"> | |
| 952 | |
| 953 <param name="opls_method" value="opls_analysis"/> | |
| 954 <param name="opls_comp" value="3"/> | |
| 955 <param name="xnew" value="FALSE"/> | |
| 956 <param name="opls_scale" value="FALSE"/> | |
| 957 <param name="opls_toplabels" value="100"/> | |
| 958 </conditional> | |
| 959 | |
| 960 </conditional> | |
| 961 <conditional name="y_cond"> | |
| 962 <param name="y_vector" value="y_external"/> | |
| 963 <param name="y_data" value="random_factors.tabular" ftype="tabular"/> | |
| 964 <param name="y_column" value="2"/> | |
| 965 </conditional> | |
| 966 </conditional> | |
| 967 <param name="output_rdata" value="True"/> | |
| 968 <output name="mzfeatures" file="features_test4.tabular"/> | |
| 969 <output name="pixeloutput" file="pixels_test4.tabular"/> | |
| 970 <output name="classification_images" file="test4.pdf" compare="sim_size" delta="20000"/> | |
| 971 <output name="classification_rdata" file="test4.rdata" compare="sim_size" /> | |
| 972 </test> | |
| 973 | |
| 974 <test expect_num_outputs="3"> | |
| 975 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 976 <conditional name="type_cond"> | |
| 977 <param name="type_method" value="training"/> | |
| 978 <conditional name="method_cond"> | |
| 979 <param name="class_method" value="spatialShrunkenCentroids"/> | |
| 980 <conditional name="ssc_analysis_cond"> | |
| 981 <param name="ssc_method" value="ssc_cvapply"/> | |
| 982 <conditional name="ssc_fold_cond"> | |
| 983 <param name="ssc_fold_vector" value="ssc_fold_external"/> | |
| 984 <param name="ssc_fold_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 985 <param name="ssc_fold_column" value="1"/> | |
| 986 </conditional> | |
| 987 <param name="ssc_r" value="1:2"/> | |
| 988 <param name="ssc_s" value="2:3"/> | |
| 989 <param name="ssc_kernel_method" value="adaptive"/> | |
| 990 </conditional> | |
| 991 </conditional> | |
| 992 <conditional name="y_cond"> | |
| 993 <param name="y_vector" value="y_external"/> | |
| 994 <param name="y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 995 <param name="y_column" value="2"/> | |
| 996 </conditional> | |
| 997 </conditional> | |
| 998 <output name="mzfeatures" file="features_test5.tabular"/> | |
| 999 <output name="pixeloutput" file="pixels_test5.tabular"/> | |
| 1000 <output name="classification_images" file="test5.pdf" compare="sim_size" delta="20000"/> | |
| 1001 </test> | |
| 1002 | |
| 1003 <test expect_num_outputs="4"> | |
| 1004 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 1005 <conditional name="type_cond"> | |
| 1006 <param name="type_method" value="training"/> | |
| 1007 <conditional name="method_cond"> | |
| 1008 <param name="class_method" value="spatialShrunkenCentroids"/> | |
| 1009 <conditional name="ssc_analysis_cond"> | |
| 1010 <param name="ssc_method" value="ssc_analysis"/> | |
| 1011 <param name="ssc_toplabels" value="100"/> | |
| 1012 </conditional> | |
| 1013 <param name="ssc_r" value="2"/> | |
| 1014 <param name="ssc_s" value="2"/> | |
| 1015 <param name="ssc_kernel_method" value="adaptive"/> | |
| 1016 </conditional> | |
| 1017 <conditional name="y_cond"> | |
| 1018 <param name="y_vector" value="y_external"/> | |
| 1019 <param name="y_data" value="random_factors.tabular" ftype="tabular"/> | |
| 1020 <param name="y_column" value="2"/> | |
| 1021 </conditional> | |
| 1022 </conditional> | |
| 1023 <param name="output_rdata" value="True"/> | |
| 1024 <output name="mzfeatures" file="features_test6.tabular"/> | |
| 1025 <output name="pixeloutput" file="pixels_test6.tabular"/> | |
| 1026 <output name="classification_images" file="test6.pdf" compare="sim_size" delta="20000"/> | |
| 1027 <output name="classification_rdata" file="test6.rdata" compare="sim_size" /> | |
| 1028 </test> | |
| 1029 | |
| 1030 <test expect_num_outputs="4"> | |
| 1031 <param name="infile" value="testfile_squares.rdata" ftype="rdata"/> | |
| 1032 <conditional name="type_cond"> | |
| 1033 <param name="type_method" value="prediction"/> | |
| 1034 <param name="training_result" value="test2.rdata" ftype="rdata"/> | |
| 1035 <conditional name="new_y"> | |
| 1036 <param name="new_y_values" value="new_y_external"/> | |
| 1037 <param name="new_y_data" value="pixel_annotation_file1.tabular" ftype="tabular"/> | |
| 1038 <param name="new_y_column" value="2"/> | |
| 1039 </conditional> | |
| 1040 </conditional> | |
| 1041 <param name="output_rdata" value="True"/> | |
| 1042 <output name="mzfeatures" file="features_test7.tabular"/> | |
| 1043 <output name="pixeloutput" file="pixels_test7.tabular"/> | |
| 1044 <output name="classification_images" file="test7.pdf" compare="sim_size" delta="20000"/> | |
| 1045 <output name="classification_rdata" file="test7.rdata" compare="sim_size" /> | |
| 1046 </test> | |
| 1047 | |
| 1048 </tests> | |
| 1049 <help> | |
| 1050 <![CDATA[ | |
| 1051 | |
| 1052 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ | |
| 1053 | |
| 1054 This tool provides three different Cardinal functions for supervised classification of mass-spectrometry imaging data. | |
| 1055 | |
| 1056 Input data: 3 types of input data can be used: | |
| 1057 | |
| 1058 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | |
| 1059 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) | |
| 1060 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) | |
| 1061 | |
| 1062 Options: | |
| 1063 | |
| 1064 - PLS(-DA): partial least square (discriminant analysis) | |
| 1065 - O-PLS(-DA): Orthogonal partial least squares (discriminant analysis) | |
| 1066 - Spatial shrunken centroids | |
| 1067 | |
| 1068 Output: | |
| 1069 | |
| 1070 - Pdf with the heatmaps and plots for the classification | |
| 1071 - Tabular file with information on masses and pixels: toplabels/classes (PLS, spatial shrunken centroids) | |
| 1072 - optional RData output to further explore the results with Cardinal in R | |
| 1073 | |
| 1074 ]]> | |
| 1075 </help> | |
| 1076 <citations> | |
| 1077 <citation type="doi">10.1093/bioinformatics/btv146</citation> | |
| 1078 </citations> | |
| 1079 </tool> |
