Mercurial > repos > galaxyp > cardinal_data_exporter
comparison data_exporter.xml @ 2:3dcfb4eb7738 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
| author | galaxyp |
|---|---|
| date | Fri, 15 Feb 2019 10:06:56 -0500 |
| parents | b15bb1daeaa6 |
| children | 7f4830f0bcf3 |
comparison
equal
deleted
inserted
replaced
| 1:818a0717bd6f | 2:3dcfb4eb7738 |
|---|---|
| 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.0"> | 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.1"> |
| 2 <description> | 2 <description> |
| 3 exports imzML and Analyze7.5 to tabular files | 3 exports imzML and Analyze7.5 to tabular files |
| 4 </description> | 4 </description> |
| 5 <macros> | 5 <macros> |
| 6 <import>macros.xml</import> | 6 <import>macros.xml</import> |
| 20 | 20 |
| 21 ################################# load libraries and read file ################# | 21 ################################# load libraries and read file ################# |
| 22 | 22 |
| 23 library(Cardinal) | 23 library(Cardinal) |
| 24 | 24 |
| 25 @READING_MSIDATA@ | 25 @READING_MSIDATA_INRAM@ |
| 26 | 26 |
| 27 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) | 27 ## to make sure that processed files work as well: |
| 28 | 28 iData(msidata) = iData(msidata)[] |
| 29 if (npeaks > 0){ | |
| 30 | 29 |
| 31 ###################### Intensity matrix output ################################ | 30 ###################### Intensity matrix output ################################ |
| 32 | 31 |
| 33 #if "int_matrix" in str($output_options).split(","): | 32 #if "int_matrix" in str($output_options).split(","): |
| 34 print("intensity matrix output") | 33 print("intensity matrix output") |
| 35 | 34 |
| 36 spectramatrix = spectra(msidata)[] | |
| 37 mz_names = gsub(" = ", "_", names(features(msidata))) | 35 mz_names = gsub(" = ", "_", names(features(msidata))) |
| 38 mz_names = gsub("/", "", mz_names) | 36 mz_names = gsub("/", "", mz_names) |
| 39 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | 37 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) |
| 40 pixel_names = gsub(" = ", "y_", pixel_names) | 38 pixel_names = gsub(" = ", "y_", pixel_names) |
| 41 | 39 |
| 42 spectramatrix = cbind(mz_names,spectramatrix) | 40 spectramatrix = cbind(mz_names,spectra(msidata)[]) |
| 43 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) | 41 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) |
| 44 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 42 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
| 43 ## free up RAM space in case furhter steps will be run: | |
| 44 rm(newmatrix) | |
| 45 rm(spectramatrix) | |
| 46 gc() | |
| 45 | 47 |
| 46 #end if | 48 #end if |
| 47 | 49 |
| 48 | 50 |
| 49 ############################## m/z feature output ########################## | 51 ############################## m/z feature output ########################## |
| 57 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) | 59 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) |
| 58 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) | 60 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) |
| 59 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) | 61 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) |
| 60 full_sample_sem = full_sample_sd/full_sample_mean*100 | 62 full_sample_sem = full_sample_sd/full_sample_mean*100 |
| 61 ## npeaks and sum of all intensities per spectrum and mz | 63 ## npeaks and sum of all intensities per spectrum and mz |
| 64 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) | |
| 62 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z | 65 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z |
| 63 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) | 66 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) |
| 64 | 67 |
| 65 ## combine into dataframe, order is the same for all vectors | 68 ## combine into dataframe, order is the same for all vectors |
| 66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) | 69 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) |
| 145 | 148 |
| 146 ## coordinates | 149 ## coordinates |
| 147 xycoordinates = coord(msidata)[,1:2] | 150 xycoordinates = coord(msidata)[,1:2] |
| 148 | 151 |
| 149 ## pixel name | 152 ## pixel name |
| 150 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | 153 pixel_names = paste0("xy_", xycoordinates\$x, "_", xycoordinates\$y) |
| 151 pixel_names = gsub(" = ", "y_", pixel_names) | |
| 152 | 154 |
| 153 ## pixel order | 155 ## pixel order |
| 154 pixelxyarray=1:length(pixels(msidata)) | 156 pixelxyarray=1:length(pixels(msidata)) |
| 155 | 157 |
| 156 ## number of pixels per spectrum: every intensity value > 0 counts as peak | 158 ## number of pixels per spectrum: every intensity value > 0 counts as peak |
| 157 peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE) | 159 peaksperpixel = apply(spectra(msidata)[]> 0, 2, sum, na.rm=TRUE) |
| 158 | 160 |
| 159 ## Total ion chromatogram per spectrum | 161 ## Total ion chromatogram per spectrum |
| 160 TICs = round(colSums(spectra(msidata)[], na.rm=TRUE), digits = 2) | 162 TICs = round(apply(spectra(msidata)[],2, sum, na.rm=TRUE), digits = 2) |
| 163 | |
| 164 ## Median ion intensity per spectrum | |
| 165 med_int = round(apply(spectra(msidata)[], 2, median, na.rm=TRUE), digits = 2) | |
| 166 | |
| 167 ## Maximum ion intensity per spectrum | |
| 168 max_int = round(apply(spectra(msidata)[], 2, max, na.rm=TRUE), digits = 2) | |
| 161 | 169 |
| 162 ## Highest m/z per spectrum | 170 ## Highest m/z per spectrum |
| 163 highestmz = apply(spectra(msidata)[],2,which.max) | 171 highestmz = apply(spectra(msidata)[],2,which.max) |
| 164 highestmz_data = mz(msidata)[highestmz] | 172 highestmz_data = mz(msidata)[highestmz] |
| 165 | 173 |
| 166 ## Combine into dataframe; order is the same for all vectors | 174 ## Combine into dataframe; order is the same for all vectors |
| 167 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, TICs, highestmz_data) | 175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) |
| 168 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz") | 176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") |
| 169 | 177 |
| 170 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": | 178 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": |
| 171 | 179 |
| 172 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) | 180 calibrant_list = read.delim("$counting_calibrants.mz_tabular", header = $counting_calibrants.feature_header, na.strings=c("","NA"), stringsAsFactors = FALSE) |
| 173 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] | 181 calibrant_list = calibrant_list[,$counting_calibrants.feature_column, drop=FALSE] |
| 189 | 197 |
| 190 for (mass in 1:length(inputcalibrantmasses)){ | 198 for (mass in 1:length(inputcalibrantmasses)){ |
| 191 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] | 199 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] |
| 192 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ | 200 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ |
| 193 ## intensity of all m/z > 0 | 201 ## intensity of all m/z > 0 |
| 194 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 | 202 intensity_sum = apply(spectra(filtered_data)[],2,sum, na.rm=TRUE) > 0 |
| 203 | |
| 195 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ | 204 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ |
| 196 ## intensity of only m/z > 0 | 205 ## intensity of only m/z > 0 |
| 197 intensity_sum = spectra(filtered_data)[] > 0 | 206 intensity_sum = spectra(filtered_data)[] > 0 |
| 198 }else{ | 207 }else{ |
| 199 intensity_sum = rep(FALSE, ncol(filtered_data))} | 208 intensity_sum = rep(FALSE, ncol(filtered_data))} |
| 200 ## for each pixel add sum of intensities > 0 in the given m/z range | 209 ## for each pixel add sum of intensities > 0 in the given m/z range |
| 201 pixelmatrix = rbind(pixelmatrix, intensity_sum) | 210 pixelmatrix = rbind(pixelmatrix, intensity_sum) |
| 202 } | 211 } |
| 203 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) | 212 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) |
| 204 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) | 213 countvector= as.factor(apply(pixelmatrix, 2,sum,na.rm=TRUE)) |
| 214 | |
| 205 }else{countvector = rep(0,ncol(msidata))} | 215 }else{countvector = rep(0,ncol(msidata))} |
| 206 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts | 216 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts |
| 207 colnames(countdf) = c("x_values", "y_values", "input m/z count") | 217 colnames(countdf) = c("x_values", "y_values", "m/z count") |
| 208 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) | 218 spectra_df = merge(spectra_df, countdf, by=c("x_values", "y_values")) |
| 209 | 219 |
| 210 ## sort columns to have spectra_names as rowname in first column | 220 ## sort columns to have spectra_names as rowname in first column |
| 211 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count")] | 221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count")] |
| 212 | 222 |
| 213 #end if | 223 #end if |
| 214 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 224 #if str($tabular_annotation.load_annotation) == 'yes_annotation': |
| 215 | 225 |
| 216 colnames(annotation_input) = c("x_values", "y_values", "annotation") | 226 colnames(annotation_input) = c("x_values", "y_values", "annotation") |
| 217 spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values")) | 227 spectra_df = merge(annotation_input,spectra_df, by=c("x_values", "y_values")) |
| 218 | 228 |
| 219 ## sort columns to have spectra_names as rowname in first column | 229 ## sort columns to have spectra_names as rowname in first column |
| 220 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": | 230 #if str($counting_calibrants.pixel_with_calibrants) == "yes_calibrants": |
| 221 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "input m/z count", "annotation")] | 231 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "m/z count", "annotation")] |
| 222 #else | 232 #else |
| 223 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "spectrum_TIC", "most_abundant_mz", "annotation")] | 233 spectra_df = spectra_df[c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz", "annotation")] |
| 224 #end if | 234 #end if |
| 225 | 235 |
| 226 #end if | 236 #end if |
| 227 ## sort rows according to original pixel order | 237 ## sort rows according to original pixel order |
| 228 spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),] | 238 spectra_df = spectra_df[match(pixel_names, spectra_df\$spectra_names),] |
| 229 | 239 |
| 230 ## Create list and output tabular | 240 ## Create list and output tabular |
| 231 write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 241 write.table(spectra_df, file="$pixel_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
| 232 #end if | 242 #end if |
| 233 | |
| 234 | |
| 235 }else{ | |
| 236 print("file has no features or pixels left") | |
| 237 } | |
| 238 | 243 |
| 239 | 244 |
| 240 ]]></configfile> | 245 ]]></configfile> |
| 241 </configfiles> | 246 </configfiles> |
| 242 <inputs> | 247 <inputs> |
| 245 <option value="int_matrix" selected="True" >intensity matrix</option> | 250 <option value="int_matrix" selected="True" >intensity matrix</option> |
| 246 <option value="mz_tabular">mz feature output</option> | 251 <option value="mz_tabular">mz feature output</option> |
| 247 <option value="pixel_tabular">pixel output</option> | 252 <option value="pixel_tabular">pixel output</option> |
| 248 </param> | 253 </param> |
| 249 <conditional name="counting_calibrants"> | 254 <conditional name="counting_calibrants"> |
| 250 <param name="pixel_with_calibrants" type="select" label="Add number of m/z of interest per spectrum to pixel output"> | 255 <param name="pixel_with_calibrants" type="select" label="Use file with m/z of interest to calculate their occurrence in each spectrum"> |
| 251 <option value="no_calibrants" selected="True">no</option> | 256 <option value="no_calibrants" selected="True">no</option> |
| 252 <option value="yes_calibrants">yes</option> | 257 <option value="yes_calibrants">yes</option> |
| 253 </param> | 258 </param> |
| 254 <when value="no_calibrants"/> | 259 <when value="no_calibrants"/> |
| 255 <when value="yes_calibrants"> | 260 <when value="yes_calibrants"> |
| 347 @MZ_TABULAR_INPUT_DESCRIPTION@ | 352 @MZ_TABULAR_INPUT_DESCRIPTION@ |
| 348 | 353 |
| 349 **Output options** | 354 **Output options** |
| 350 | 355 |
| 351 - intensity matrix: m/z in rows, spectra in columns, filled with intensity values | 356 - intensity matrix: m/z in rows, spectra in columns, filled with intensity values |
| 352 - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), highest m/z feature per spectrum, optional count of input m/z per spectrum, optional spectrum annotation | 357 - spectra output: spectra in rows - for each spectrum: name, x and y coordinates,order, number of peaks (intensities > 0), total ion chromatogram (TIC), median intensity, maximum intensity, highest m/z feature per spectrum, optional count of m/z per spectrum, optional spectrum annotation |
| 353 - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z | 358 - mz feature output: m/z in rows - for each m/z: name, m/z, mean, median, standard deviation (sd), standard error of the mean (sem), sum of all intensities per m/z, number of peaks (intensity > 0) per m/z |
| 354 - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group | 359 - summarized intensities: pixel annotations will be used to group spectra into annotation groups and calculate mean, median and sd of the intensities per group |
| 355 | 360 |
| 356 ]]> | 361 ]]> |
| 357 </help> | 362 </help> |
