Mercurial > repos > galaxyp > cardinal_data_exporter
comparison data_exporter.xml @ 0:b15bb1daeaa6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
| author | galaxyp |
|---|---|
| date | Mon, 01 Oct 2018 00:58:37 -0400 |
| parents | |
| children | 3dcfb4eb7738 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b15bb1daeaa6 |
|---|---|
| 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0"> | |
| 2 <description> | |
| 3 exports imzML and Analyze7.5 to tabular files | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="requirements"/> | |
| 9 <command detect_errors="exit_code"> | |
| 10 <![CDATA[ | |
| 11 | |
| 12 @INPUT_LINKING@ | |
| 13 cat '${cardinal_imzml_exporter}' && | |
| 14 Rscript '${cardinal_imzml_exporter}' | |
| 15 | |
| 16 ]]> | |
| 17 </command> | |
| 18 <configfiles> | |
| 19 <configfile name="cardinal_imzml_exporter"><![CDATA[ | |
| 20 | |
| 21 ################################# load libraries and read file ################# | |
| 22 | |
| 23 library(Cardinal) | |
| 24 | |
| 25 @READING_MSIDATA@ | |
| 26 | |
| 27 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) | |
| 28 | |
| 29 if (npeaks > 0){ | |
| 30 | |
| 31 ###################### Intensity matrix output ################################ | |
| 32 | |
| 33 #if "int_matrix" in str($output_options).split(","): | |
| 34 print("intensity matrix output") | |
| 35 | |
| 36 spectramatrix = spectra(msidata)[] | |
| 37 mz_names = gsub(" = ", "_", names(features(msidata))) | |
| 38 mz_names = gsub("/", "", mz_names) | |
| 39 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | |
| 40 pixel_names = gsub(" = ", "y_", pixel_names) | |
| 41 | |
| 42 spectramatrix = cbind(mz_names,spectramatrix) | |
| 43 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) | |
| 44 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
| 45 | |
| 46 #end if | |
| 47 | |
| 48 | |
| 49 ############################## m/z feature output ########################## | |
| 50 #if "mz_tabular" in str($output_options).split(","): | |
| 51 print("mz feature output") | |
| 52 | |
| 53 mz_names = gsub(" = ", "_", names(features(msidata))) | |
| 54 mz_names = gsub("/", "", mz_names) | |
| 55 | |
| 56 ## mean, median, sd and SEM intensity per file and mz | |
| 57 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) | |
| 58 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) | |
| 59 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) | |
| 60 full_sample_sem = full_sample_sd/full_sample_mean*100 | |
| 61 ## npeaks and sum of all intensities per spectrum and mz | |
| 62 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z | |
| 63 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) | |
| 64 | |
| 65 ## combine into dataframe, order is the same for all vectors | |
| 66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) | |
| 67 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") | |
| 68 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
| 69 #end if | |
| 70 | |
| 71 ###################### summarized m/z feature output ####################### | |
| 72 | |
| 73 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | |
| 74 print("summarized annotation output") | |
| 75 | |
| 76 ## read and extract x,y,annotation information | |
| 77 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) | |
| 78 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] | |
| 79 colnames(annotation_input) = c("x", "y", "annotation") | |
| 80 | |
| 81 ## merge with coordinate information of msidata | |
| 82 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata))) | |
| 83 colnames(msidata_coordinates)[3] = "pixel_index" | |
| 84 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) | |
| 85 merged_annotation[is.na(merged_annotation)] = "NA" | |
| 86 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] | |
| 87 msidata\$annotation = as.factor(merged_annotation[,4]) | |
| 88 | |
| 89 ## create m/z feature name | |
| 90 mz_names = gsub(" = ", "_", names(features(msidata))) | |
| 91 mz_names = gsub("/", "", mz_names) | |
| 92 | |
| 93 #if "mean" in str($tabular_annotation.summary_type).split(","): | |
| 94 print("summarized mean") | |
| 95 | |
| 96 ## calculate mean per annotation group | |
| 97 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | |
| 98 count = 1 | |
| 99 for (subsample in levels(msidata\$annotation)){ | |
| 100 subsample_pixels = msidata[,msidata\$annotation == subsample] | |
| 101 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) | |
| 102 sample_matrix = cbind(sample_matrix, subsample_calc) | |
| 103 count = count+1} | |
| 104 sample_matrix_mean = cbind(mz_names,sample_matrix) | |
| 105 sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean) | |
| 106 write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
| 107 #end if | |
| 108 | |
| 109 #if "median" in str($tabular_annotation.summary_type).split(","): | |
| 110 print("summarized median") | |
| 111 | |
| 112 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | |
| 113 count = 1 | |
| 114 for (subsample in levels(msidata\$annotation)){ | |
| 115 subsample_pixels = msidata[,msidata\$annotation == subsample] | |
| 116 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) | |
| 117 sample_matrix = cbind(sample_matrix, subsample_calc) | |
| 118 count = count+1} | |
| 119 sample_matrix_median = cbind(mz_names,sample_matrix) | |
| 120 sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median) | |
| 121 write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
| 122 #end if | |
| 123 | |
| 124 #if "sd" in str($tabular_annotation.summary_type).split(","): | |
| 125 print("summarized sd") | |
| 126 | |
| 127 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | |
| 128 count = 1 | |
| 129 for (subsample in levels(msidata\$annotation)){ | |
| 130 subsample_pixels = msidata[,msidata\$annotation == subsample] | |
| 131 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) | |
| 132 sample_matrix = cbind(sample_matrix, subsample_calc) | |
| 133 count = count+1} | |
| 134 sample_matrix_sd = cbind(mz_names,sample_matrix) | |
| 135 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) | |
| 136 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | |
| 137 #end if | |
| 138 | |
| 139 #end if | |
| 140 | |
| 141 | |
| 142 ############################ spectra (pixel) output ############################ | |
| 143 #if "pixel_tabular" in str($output_options).split(","): | |
| 144 print("pixel output") | |
| 145 | |
| 146 ## coordinates | |
| 147 xycoordinates = coord(msidata)[,1:2] | |
| 148 | |
| 149 ## pixel name | |
| 150 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | |
| 151 pixel_names = gsub(" = ", "y_", pixel_names) | |
| 152 | |
| 153 ## pixel order | |
| 154 pixelxyarray=1:length(pixels(msidata)) | |
| 155 | |
| 156 ## number of pixels per spectrum: every intensity value > 0 counts as peak | |
| 157 peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) | |
| 158 | |
| 159 ## Total ion chromatogram per spectrum | |
| 160 TICs = round(colSums(spectra(msidata)[], na.rm=TRUE), digits = 2) | |
| 161 | |
| 162 ## Highest m/z per spectrum | |
| 163 highestmz = apply(spectra(msidata)[],2,which.max) | |
| 164 highestmz_data = mz(msidata)[highestmz] | |
| 165 | |
| 166 ## Combine into dataframe; order is the same for all vectors | |
| 167 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, TICs, highestmz_data) | |
| 168 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz") | |
| 169 | |
| 170 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": | |
| 171 | |
| 172 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) | |
| 173 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] | |
| 174 ### calculate how many input calibrant m/z are valid: | |
| 175 inputcalibrants = calibrant_list[calibrant_list[,1]>min(mz(msidata)) & calibrant_list[,1]<max(mz(msidata)),,drop = FALSE] | |
| 176 inputcalibrantmasses = inputcalibrants[,1] | |
| 177 | |
| 178 ##QC plot number 2) Number of calibrants per spectrum | |
| 179 | |
| 180 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not | |
| 181 pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0) | |
| 182 | |
| 183 if (length(inputcalibrantmasses) != 0){ | |
| 184 | |
| 185 ## calculate plusminus values in m/z for each calibrant | |
| 186 plusminusvalues = rep($counting_calibrants.plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses | |
| 187 | |
| 188 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 | |
| 189 | |
| 190 for (mass in 1:length(inputcalibrantmasses)){ | |
| 191 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] | |
| 192 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ | |
| 193 ## intensity of all m/z > 0 | |
| 194 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 | |
| 195 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ | |
| 196 ## intensity of only m/z > 0 | |
| 197 intensity_sum = spectra(filtered_data)[] > 0 | |
| 198 }else{ | |
| 199 intensity_sum = rep(FALSE, ncol(filtered_data))} | |
| 200 ## for each pixel add sum of intensities > 0 in the given m/z range | |
| 201 pixelmatrix = rbind(pixelmatrix, intensity_sum) | |
| 202 } | |
| 203 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) | |
| 204 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) | |
| 205 }else{countvector = rep(0,ncol(msidata))} | |
| 206 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts | |
| 207 colnames(countdf) = c("x_values", "y_values", "input m/z count") | |
| 208 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) | |
| 209 | |
| 210 ## sort columns to have spectra_names as rowname in first column | |
| 211 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count")] | |
| 212 | |
| 213 #end if | |
| 214 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | |
| 215 | |
| 216 colnames(annotation_input) = c("x_values", "y_values", "annotation") | |
| 217 spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values")) | |
| 218 | |
| 219 ## sort columns to have spectra_names as rowname in first column | |
| 220 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": | |
| 221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count", "annotation")] | |
| 222 #else | |
| 223 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "annotation")] | |
| 224 #end if | |
| 225 | |
| 226 #end if | |
| 227 ## sort rows according to original pixel order | |
| 228 spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),] | |
| 229 | |
| 230 ## Create list and output tabular | |
| 231 write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | |
| 232 #end if | |
| 233 | |
| 234 | |
| 235 }else{ | |
| 236 print("file has no features or pixels left") | |
| 237 } | |
| 238 | |
| 239 | |
| 240 ]]></configfile> | |
| 241 </configfiles> | |
| 242 <inputs> | |
| 243 <expand macro="reading_msidata"/> | |
| 244 <param name="output_options" type="select" display="checkboxes" optional="False" multiple="true" label="Multiple output files can be selected"> | |
| 245 <option value="int_matrix" selected="True" >intensity matrix</option> | |
| 246 <option value="mz_tabular">mz feature output</option> | |
| 247 <option value="pixel_tabular">pixel output</option> | |
| 248 </param> | |
| 249 <conditional name="counting_calibrants"> | |
| 250 <param name="pixel_with_calibrants" type="select" label="Add number of m/z of interest per spectrum to pixel output"> | |
| 251 <option value="no_calibrants" selected="True">no</option> | |
| 252 <option value="yes_calibrants">yes</option> | |
| 253 </param> | |
| 254 <when value="no_calibrants"/> | |
| 255 <when value="yes_calibrants"> | |
| 256 <expand macro="reading_1_column_mz_tabular" label="For each spectrum the occurrence of the provided m/z values is counted"/> | |
| 257 <param name="plusminus_ppm" value="200" type="float" label="ppm range will be added in both directions to input m/z" help="The m/z window is used to search for peaks, if intensity > 0 found in the window the m/z is considered present, if all intensities are 0 the m/z is considered not present"/> | |
| 258 </when> | |
| 259 </conditional> | |
| 260 <conditional name="tabular_annotation"> | |
| 261 <param name="load_annotation" type="select" label="Pixel annotation can be used to summarize intensities per annotation group"> | |
| 262 <option value="no_annotation" selected="True">no</option> | |
| 263 <option value="yes_annotation">yes</option> | |
| 264 </param> | |
| 265 <when value="no_annotation"/> | |
| 266 <when value="yes_annotation"> | |
| 267 <expand macro="reading_pixel_annotations"/> | |
| 268 <param name="summary_type" type="select" display="checkboxes" optional="False" multiple="true" label="Calculation for each m/z and all pixels of a annotation group" help="This step will only work if pixel annotations are provided"> | |
| 269 <option value="mean">mean</option> | |
| 270 <option value="median">median</option> | |
| 271 <option value="sd">standard deviation</option> | |
| 272 </param> | |
| 273 </when> | |
| 274 </conditional> | |
| 275 </inputs> | |
| 276 <outputs> | |
| 277 <data format="tabular" name="intensity_matrix" label="${tool.name} on ${on_string}: intensity_matrix"> | |
| 278 <filter>"int_matrix" in output_options</filter> | |
| 279 </data> | |
| 280 <data format="tabular" name="pixel_output" label="${tool.name} on ${on_string}: spectra"> | |
| 281 <filter>"pixel_tabular" in output_options</filter> | |
| 282 </data> | |
| 283 <data format="tabular" name="feature_output" label="${tool.name} on ${on_string}: features"> | |
| 284 <filter>"mz_tabular" in output_options</filter> | |
| 285 </data> | |
| 286 <data format="tabular" name="summarized_mean" label="${tool.name} on ${on_string}: group_mean"> | |
| 287 <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'mean' in tabular_annotation['summary_type']</filter> | |
| 288 </data> | |
| 289 <data format="tabular" name="summarized_median" label="${tool.name} on ${on_string}: group_median"> | |
| 290 <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'median' in tabular_annotation['summary_type']</filter> | |
| 291 </data> | |
| 292 <data format="tabular" name="summarized_sd" label="${tool.name} on ${on_string}: group_sd"> | |
| 293 <filter>tabular_annotation['load_annotation'] == 'yes_annotation' and 'sd' in tabular_annotation['summary_type']</filter> | |
| 294 </data> | |
| 295 </outputs> | |
| 296 <tests> | |
| 297 <test expect_num_outputs="2"> | |
| 298 <expand macro="infile_imzml"/> | |
| 299 <param name="output_options" value="int_matrix,mz_tabular"/> | |
| 300 <output name="intensity_matrix" file="int_matrix1.tabular"/> | |
| 301 <output name="feature_output" file="features_out1.tabular"/> | |
| 302 </test> | |
| 303 <test expect_num_outputs="3"> | |
| 304 <expand macro="infile_analyze75"/> | |
| 305 <param name="output_options" value="pixel_tabular"/> | |
| 306 <conditional name="tabular_annotation"> | |
| 307 <param name="load_annotation" value="yes_annotation"/> | |
| 308 <param name="annotation_file" value="annotations.tabular"/> | |
| 309 <param name="column_x" value="1"/> | |
| 310 <param name="column_y" value="2"/> | |
| 311 <param name="column_names" value="4"/> | |
| 312 <param name="tabular_header" value="True"/> | |
| 313 <param name="summary_type" value="mean,sd"/> | |
| 314 </conditional> | |
| 315 <output name="pixel_output" file="pixel_out2.tabular"/> | |
| 316 <output name="summarized_mean" file="mean_out2.tabular"/> | |
| 317 <output name="summarized_sd" file="sd_out2.tabular"/> | |
| 318 </test> | |
| 319 <test expect_num_outputs="3"> | |
| 320 <expand macro="infile_imzml"/> | |
| 321 <param name="output_options" value="int_matrix,pixel_tabular,mz_tabular"/> | |
| 322 <conditional name="counting_calibrants"> | |
| 323 <param name="pixel_with_calibrants" value="yes_calibrants"/> | |
| 324 <param name="mz_tabular" value="inputcalibrantfile2.txt"/> | |
| 325 <param name="feature_column" value="1"/> | |
| 326 <param name="feature_header" value="False"/> | |
| 327 <param name="plusminus_ppm" value="200"/> | |
| 328 </conditional> | |
| 329 <output name="intensity_matrix" file="int_matrix3.tabular"/> | |
| 330 <output name="feature_output" file="features_out3.tabular"/> | |
| 331 <output name="pixel_output" file="pixel_out3.tabular"/> | |
| 332 </test> | |
| 333 </tests> | |
| 334 <help> | |
| 335 <![CDATA[ | |
| 336 | |
| 337 @CARDINAL_DESCRIPTION@ | |
| 338 | |
| 339 ----- | |
| 340 | |
| 341 This tool provides multiple tabular output options for mass spectrometry imaging data files. | |
| 342 | |
| 343 @MSIDATA_INPUT_DESCRIPTION@ | |
| 344 | |
| 345 @SPECTRA_TABULAR_INPUT_DESCRIPTION@ | |
| 346 | |
| 347 @MZ_TABULAR_INPUT_DESCRIPTION@ | |
| 348 | |
| 349 **Output options** | |
| 350 | |
| 351 - intensity matrix: m/z in rows, spectra in columns, filled with intensity values | |
| 352 - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), highest m/z feature per spectrum, optional count of input m/z per spectrum, optional spectrum annotation | |
| 353 - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z | |
| 354 - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group | |
| 355 | |
| 356 ]]> | |
| 357 </help> | |
| 358 <expand macro="citations"/> | |
| 359 </tool> |
