Mercurial > repos > galaxyp > cardinal_quality_report
changeset 10:e4dda61bb5c8 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit c8d3adac445b4e08e2724e22d7201bfc38bbf40f"
line wrap: on
line diff
--- a/macros.xml Wed May 13 17:55:17 2020 +0000 +++ b/macros.xml Sun Aug 29 07:16:49 2021 +0000 @@ -1,10 +1,10 @@ <macros> - <token name="@VERSION@">2.4.0</token> + <token name="@VERSION@">2.10.0</token> <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">bioconductor-cardinal</requirement> - <requirement type="package" version="3.6.1">r-base</requirement> + <!--requirement type="package" version="3.6.1">r-base</requirement--> <yield/> </requirements> </xml> @@ -117,6 +117,13 @@ <token name="@DATA_PROPERTIES_INRAM@"><![CDATA[ ########################### QC numbers ######################## ## including intensity calculations which need data in RAM + + int_matrix = as.matrix(spectra(msidata)) ## only load once into RAM, then re-use + ## Number of NA in spectra matrix + NAcount = sum(is.na(int_matrix)) + ## replace NA with zero to calculate data properties based on intensity matrix, no change in msidata + int_matrix[is.na(int_matrix)] <- 0 + ## Number of features (mz) maxfeatures = length(features(msidata)) ## Range mz @@ -131,14 +138,12 @@ minimumy = min(coord(msidata)[,2]) maximumy = max(coord(msidata)[,2]) ## Range of intensities - minint = round(min(as.matrix(spectra(msidata)), na.rm=TRUE), digits=2) - maxint = round(max(as.matrix(spectra(msidata)), na.rm=TRUE), digits=2) + minint = round(min(int_matrix), digits=2) + maxint = round(max(int_matrix), digits=2) ## Number of intensities > 0, for if conditions - npeaks= sum(as.matrix(spectra(msidata))>0, na.rm=TRUE) + npeaks= sum(int_matrix>0) ## Number of NA in spectra matrix - NAcount = sum(is.na(spectra(msidata))) - ## Number of NA in spectra matrix - infcount = sum(is.infinite(as.matrix(spectra(msidata)))) + infcount = sum(is.infinite(int_matrix)) ## Number of duplicated coordinates dupl_coord = sum(duplicated(coord(msidata))) properties = c("Number of m/z features", @@ -175,7 +180,7 @@ - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - - Cardinal "MSImageSet" data saved as .RData + - Cardinal "MSImageSet" or "MSImagingExperiment" saved as .RData ]]></token> <token name="@MZ_TABULAR_INPUT_DESCRIPTION@"><![CDATA[ - Optional tabular file with m/z values: @@ -243,7 +248,7 @@ </param> <when value="no_processed"/> <when value="processed"> - <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> + <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned"/> <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm"> <option value="mz" >mz</option> <option value="ppm" selected="True" >ppm</option>
--- a/quality_report.xml Wed May 13 17:55:17 2020 +0000 +++ b/quality_report.xml Sun Aug 29 07:16:49 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.1"> +<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0"> <description> mass spectrometry imaging QC </description> @@ -7,11 +7,11 @@ </macros> <expand macro="requirements"> <requirement type="package" version="2.3">r-gridextra</requirement> - <requirement type="package" version="3.2.1">r-ggplot2</requirement> + <requirement type="package" version="3.3.5">r-ggplot2</requirement> <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> - <requirement type="package" version="2.23_16">r-kernsmooth</requirement> - <requirement type="package" version="1.1.0">r-scales</requirement> - <requirement type="package" version="1.0.12"> r-pheatmap</requirement> + <requirement type="package" version="2.23_20">r-kernsmooth</requirement> + <requirement type="package" version="1.1.1">r-scales</requirement> + <requirement type="package" version="1.0.12">r-pheatmap</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ @@ -60,7 +60,7 @@ merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE) merged_annotation[is.na(merged_annotation)] = "NA" merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] - msidata\$annotation = as.factor(merged_annotation[,4]) + msidata\$annotation = as.character(merged_annotation[,4]) #end if @@ -68,13 +68,13 @@ @DATA_PROPERTIES_INRAM@ ## Median intensities -medint = round(median(spectra(msidata), na.rm=TRUE), digits=2) +medint = round(median(int_matrix), digits=2) ## Spectra multiplied with m/z (potential number of peaks) -numpeaks = ncol(msidata)*nrow(msidata) +numpeaks = as.numeric(ncol(msidata)*nrow(msidata)) ## Percentage of intensities > 0 percpeaks = round(npeaks/numpeaks*100, digits=2) ## Number of empty TICs -TICs = pixelApply(msidata, sum) +TICs = pixelApply(msidata, sum, na.rm=TRUE) NumemptyTIC = sum(TICs == 0) ## Median und sd TIC medTIC = round(median(TICs), digits=1) @@ -82,6 +82,8 @@ ## Median and sd # peaks per spectrum medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0) sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0) +##max window size +max_window = round(mz(msidata)[nrow(msidata)]-mz(msidata)[nrow(msidata)-1], digits=2) ## Processing informations centroidedinfo = centroided(msidata) @@ -137,6 +139,7 @@ "Number of empty spectra", "Median TIC ± sd", "Median # peaks per spectrum ± sd", + "maximum m/z window size", "Centroided", paste0("input m/z (#valid/#input) in \n", "$calibrant_file.display_name")) @@ -145,6 +148,7 @@ paste0(NumemptyTIC), paste0(medTIC, " ± ", sdTIC), paste0(medpeaks, " ± ",sdpeaks), + paste0(max_window), paste0(centroidedinfo), paste0(number_calibrants_valid, " / ", number_calibrants_in)) @@ -183,12 +187,13 @@ ### only for previously combined data, same plot as in combine QC pdf - if (!is.null(levels(msidata\$annotation))){ + if (!is.null(unique(msidata\$annotation))){ - number_combined = length(levels(msidata\$annotation)) + number_combined = length(unique(msidata\$annotation)) position_df = data.frame(coord(msidata)\$x, coord(msidata)\$y, msidata\$annotation) colnames(position_df) = c("x", "y","annotation") + print(position_df) combine_plot = ggplot(position_df, aes(x=x, y=y, fill=annotation))+ geom_tile() + @@ -220,7 +225,7 @@ pixelxyarray=data.frame(coord(msidata)\$x, coord(msidata)\$y,pixelnumber) colnames(pixelxyarray) = c("x", "y", "pixelnumber") gg_title = "Pixel order" - + print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))+ geom_tile() + coord_fixed()+ ggtitle(gg_title) + theme_bw()+ @@ -354,24 +359,29 @@ #end if #################### 4) m/z heatmaps ####################################### - par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0) - if (length(inputcalibrants[,1]) != 0){ - for (mass in 1:length(inputcalibrants[,1])){ - par(oma=c(0,0,0,1))## margin for image legend + + #if $report_depth: + + par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0) + if (length(inputcalibrants[,1]) != 0){ + for (mass in 1:length(inputcalibrants[,1])){ + par(oma=c(0,0,0,1))## margin for image legend - tryCatch( - { - print(image(msidata, mz=inputcalibrants[,1][mass], plusminus=plusminusvalues[mass], - main= paste0(inputcalibrants[,2][mass], ": ", round(inputcalibrants[,1][mass], digits = 2)," (±",$plusminus_ppm, " ppm)"), - contrast.enhance = "histogram", strip=FALSE, ylim= c(maximumy,minimumy))) - }, - error=function(cond) { - ## if there are not enough intensities in the mz range skip creating an image - print(paste0("Not enough intensities > 0 for m/z ", inputcalibrants[,1][mass])) - } - ) - } - } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")} + tryCatch( + { + print(image(msidata, mz=inputcalibrants[,1][mass], plusminus=plusminusvalues[mass], + main= paste0(inputcalibrants[,2][mass], ": ", round(inputcalibrants[,1][mass], digits = 2)," (±",$plusminus_ppm, " ppm)"), + contrast.enhance = "histogram", strip=FALSE, ylim= c(maximumy,minimumy))) + }, + error=function(cond) { + ## if there are not enough intensities in the mz range skip creating an image + print(paste0("Not enough intensities > 0 for m/z ", inputcalibrants[,1][mass])) + } + ) + } + } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")} + + #end if #################### 5) Number of peaks per pixel - image ################## @@ -414,72 +424,75 @@ ############################### 6b) median int image ############################### - median_int = pixelApply(msidata, median) + #if $report_depth: + + median_int = pixelApply(msidata, median, na.rm=TRUE) - median_coordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, median_int) - colnames(median_coordarray) = c("x", "y", "median_int") - print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+ - geom_tile() + coord_fixed() + - ggtitle("Median intensity per spectrum")+ - theme_bw() + - theme(plot.title = element_text(hjust = 0.5))+ - theme(text=element_text(family="ArialMT", face="bold", size=12))+ - scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") - ,space = "Lab", na.value = "black", name = "median\nintensity")) + median_coordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, median_int) + colnames(median_coordarray) = c("x", "y", "median_int") + print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+ + geom_tile() + coord_fixed() + + ggtitle("Median intensity per spectrum")+ + theme_bw() + + theme(plot.title = element_text(hjust = 0.5))+ + theme(text=element_text(family="ArialMT", face="bold", size=12))+ + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") + ,space = "Lab", na.value = "black", name = "median\nintensity")) - ## remove median_coordarray to clean up RAM space - rm(median_coordarray) - gc() + ## remove median_coordarray to clean up RAM space + rm(median_coordarray) + gc() - ############################### 6c) max int image ############################### - - max_int = pixelApply(msidata, max) + ############################### 6c) max int image ############################### + + max_int = pixelApply(msidata, max, na.rm=TRUE) - max_coordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, max_int) - colnames(max_coordarray) = c("x", "y", "max_int") - print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+ - geom_tile() + coord_fixed() + - ggtitle("Maximum intensity per spectrum")+ - theme_bw() + - theme(plot.title = element_text(hjust = 0.5))+ - theme(text=element_text(family="ArialMT", face="bold", size=12))+ - scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") - ,space = "Lab", na.value = "black", name = "max\nintensity")) + max_coordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, max_int) + colnames(max_coordarray) = c("x", "y", "max_int") + print(ggplot(max_coordarray, aes(x=x, y=y, fill=max_int))+ + geom_tile() + coord_fixed() + + ggtitle("Maximum intensity per spectrum")+ + theme_bw() + + theme(plot.title = element_text(hjust = 0.5))+ + theme(text=element_text(family="ArialMT", face="bold", size=12))+ + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") + ,space = "Lab", na.value = "black", name = "max\nintensity")) - ## remove median_coordarray to clean up RAM space - rm(max_coordarray) - gc() + ## remove median_coordarray to clean up RAM space + rm(max_coordarray) + gc() + + ############################### 7) Most abundant m/z image ################# + + ## for each spectrum find the row (m/z) with the highest intensity + highestmz = pixelApply(msidata, which.max) - ############################### 7) Most abundant m/z image ################# - - ## for each spectrum find the row (m/z) with the highest intensity - highestmz = pixelApply(msidata, which.max) + ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted + if (class(highestmz) == "list"){ + ##find zero-length values + zero_entry <- !(sapply(highestmz, length)) + ### replace these values with NA + highestmz[zero_entry] <- NA + ### unlist list to get a vector + highestmz = unlist(highestmz)} - ## in case for some spectra max returns integer(0), highestmz is a list and integer(0) have to be replaced with NA and unlisted - if (class(highestmz) == "list"){ - ##find zero-length values - zero_entry <- !(sapply(highestmz, length)) - ### replace these values with NA - highestmz[zero_entry] <- NA - ### unlist list to get a vector - highestmz = unlist(highestmz)} - - highestmz_matrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,mz(msidata)[highestmz]) - colnames(highestmz_matrix) = c("x", "y", "highestmzinDa") + highestmz_matrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,mz(msidata)[highestmz]) + colnames(highestmz_matrix) = c("x", "y", "highestmzinDa") - print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))+ - geom_tile() + coord_fixed() + - ggtitle("Most abundant m/z in each spectrum")+ - theme_bw() + - theme(plot.title = element_text(hjust = 0.5))+ - scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", - limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))+ - theme(text=element_text(family="ArialMT", face="bold", size=12))) + print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))+ + geom_tile() + coord_fixed() + + ggtitle("Most abundant m/z in each spectrum")+ + theme_bw() + + theme(plot.title = element_text(hjust = 0.5))+ + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", + limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))+ + theme(text=element_text(family="ArialMT", face="bold", size=12))) - ## remove highestmz_matrix to clean up RAM space - rm(highestmz_matrix) - gc() + ## remove highestmz_matrix to clean up RAM space + rm(highestmz_matrix) + gc() + #end if ########################## 8) optional pca image for two components ################# @@ -495,7 +508,7 @@ par(oma=c(0,0,0,1))## margin for image legend print(image(pca, column = "PC1" , strip=FALSE, superpose = FALSE, main="PC1", col.regions = risk.colors(100), layout=c(2,1), ylim= c(maximumy+0.2*maximumy,minimumy-1))) print(image(pca, column = "PC2" , strip=FALSE, superpose = FALSE, main="PC2", col.regions = risk.colors(100), layout=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))) - ## remove pca to clean up RAM space + ## remove pca to clean up space rm(pca) gc() @@ -508,38 +521,44 @@ ########################## 9) number of peaks per spectrum ################# ## 9a) scatterplot + + #if $report_depth: - plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum") - title(xlab="Spectra index", line=3) - title(ylab="Number of peaks", line=4) + plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum") + title(xlab="Spectra index", line=3) + title(ylab="Number of peaks", line=4) - if (!is.null(levels(msidata\$annotation))){ - abline(v=abline_vector, lty = 3)} - - ## 9b) histogram + if (!is.null(unique(msidata\$annotation))){ + abline(v=abline_vector, lty = 3)} + + ## 9b) histogram + - hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") - title(main="Number of peaks per spectrum", line=2) - title(ylab="Frequency = # spectra", line=4) - abline(v=median(peaksperpixel), col="blue") + + hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") + title(main="Number of peaks per spectrum", line=2) + title(ylab="Frequency = # spectra", line=4) + abline(v=median(peaksperpixel), col="blue") - ## 9c) additional histogram to show contribution of annotation groups + ## 9c) additional histogram to show contribution of annotation groups - if (!is.null(levels(msidata\$annotation))){ - - df_9 = data.frame(peaksperpixel, msidata\$annotation) - colnames(df_9) = c("Npeaks", "annotation") + if (!is.null(unique(msidata\$annotation))){ - hist_9 = ggplot(df_9, aes(x=Npeaks, fill=annotation)) + - geom_histogram()+ theme_bw()+ - theme(text=element_text(family="ArialMT", face="bold", size=12))+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ - theme(legend.position="bottom",legend.direction="vertical")+ - labs(title="Number of peaks per spectrum and annotation group", x="Number of peaks per spectrum", y = "Frequency = # spectra") + - guides(fill=guide_legend(ncol=5,byrow=TRUE))+ - geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed") - print(hist_9)} + df_9 = data.frame(peaksperpixel, msidata\$annotation) + colnames(df_9) = c("Npeaks", "annotation") + + hist_9 = ggplot(df_9, aes(x=Npeaks, fill=annotation)) + + geom_histogram()+ theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=12))+ + theme(plot.title = element_text(hjust = 0.5))+ + theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ + theme(legend.position="bottom",legend.direction="vertical")+ + labs(title="Number of peaks per spectrum and annotation group", x="Number of peaks per spectrum", y = "Frequency = # spectra") + + guides(fill=guide_legend(ncol=5,byrow=TRUE))+ + geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed") + print(hist_9)} + + #end if ########################## 10) TIC per spectrum ########################### @@ -555,17 +574,17 @@ title(xlab="Spectra index", line=3) title(ylab = "Total ion current intensity", line=4) - if (!is.null(levels(msidata\$annotation))){ + if (!is.null(unique(msidata\$annotation))){ abline(v=abline_vector, lty = 3)} ## 10b) histogram - hist((TICs), main="", las=1, xlab = "TIC per spectrum", ylab="") + hist(TICs, main="", las=1, xlab = "TIC per spectrum", ylab="") title(main= "TIC per spectrum", line=2) title(ylab="Frequency = # spectra", line=4) abline(v=median(TICs[TICs>0]), col="blue") ## 10c) additional histogram to show annotation contributions - if (!is.null(levels(msidata\$annotation))){ + if (!is.null(unique(msidata\$annotation))){ df_10 = data.frame((TICs), msidata\$annotation) colnames(df_10) = c("TICs", "annotation") @@ -591,68 +610,71 @@ ########################## 12) Number of peaks per m/z ##################### - peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) + #if $report_depth: + + peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) - par(mfrow = c(2,1), mar=c(5,6,4,4.5)) - ## 12a) scatterplot - plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="") - title(xlab="m/z", line=2.5) - title(ylab = "Number of peaks", line=4) - axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) - mtext("Coverage of spectra [%]", 4, line=3, adj=1) + par(mfrow = c(2,1), mar=c(5,6,4,4.5)) + ## 12a) scatterplot + plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="") + title(xlab="m/z", line=2.5) + title(ylab = "Number of peaks", line=4) + axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) + mtext("Coverage of spectra [%]", 4, line=3, adj=1) - ## 12b) histogram - hist(peakspermz, main="", las=1, ylab="", xlab="") - title(ylab = "Frequency", line=4) - title(main="Number of peaks per m/z", xlab = "Number of peaks per m/z", line=2) - abline(v=median(peakspermz), col="blue") + ## 12b) histogram + hist(peakspermz, main="", las=1, ylab="", xlab="") + title(ylab = "Frequency", line=4) + title(main="Number of peaks per m/z", xlab = "Number of peaks per m/z", line=2) + abline(v=median(peakspermz), col="blue") - ########################## 13) Sum of intensities per m/z ################## + ########################## 13) Sum of intensities per m/z ################## - ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel) - mzTIC = featureApply(msidata, sum, na.rm=TRUE) ## calculate intensity sum for each m/z + ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel) + mzTIC = featureApply(msidata, sum, na.rm=TRUE) ## calculate intensity sum for each m/z - par(mfrow = c(2,1), mar=c(5,6,4,2)) - ## 13a) scatterplot - plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="") - title(xlab="m/z", line=2.5) - title(ylab="Intensity sum", line=4) + par(mfrow = c(2,1), mar=c(5,6,4,2)) + ## 13a) scatterplot + plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="") + title(xlab="m/z", line=2.5) + title(ylab="Intensity sum", line=4) - ## 13b) histogram - hist(mzTIC, main="", xlab = "", las=1, ylab="") - title(main="Sum of intensities per m/z", line=2, ylab="") - title(xlab = "sum of intensities per m/z") - title(ylab = "Frequency", line=4) - abline(v=median(mzTIC[mzTIC>0]), col="blue") + ## 13b) histogram + hist(mzTIC, main="", xlab = "", las=1, ylab="") + title(main="Sum of intensities per m/z", line=2, ylab="") + title(xlab = "sum of intensities per m/z") + title(ylab = "Frequency", line=4) + abline(v=median(mzTIC[mzTIC>0]), col="blue") - ################################## V) intensity plots ######################## - ############################################################################ - print("intensity plots") - ########################## 14) Intensity distribution ###################### + ################################## V) intensity plots ######################## + ############################################################################ + print("intensity plots") + ########################## 14) Intensity distribution ###################### - par(mfrow = c(2,1), mar=c(5,6,4,2)) + par(mfrow = c(2,1), mar=c(5,6,4,2)) - ## 14a) Median intensity over spectra - medianint_spectra = pixelApply(msidata, median) - plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="") - title(ylab="Median spectrum intensity", line=4) - if (!is.null(levels(msidata\$annotation))){ - abline(v=abline_vector, lty = 3)} + ## 14a) Median intensity over spectra + medianint_spectra = pixelApply(msidata, median, na.rm=TRUE) + plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="") + title(ylab="Median spectrum intensity", line=4) + if (!is.null(unique(msidata\$annotation))){ + abline(v=abline_vector, lty = 3)} - ## 14b) histogram: - hist(as.matrix(spectra(msidata)), main="", xlab = "", ylab="", las=1) - title(main="Intensity histogram", line=2) - title(xlab="intensities") - title(ylab="Frequency", line=4) - abline(v=median(as.matrix(spectra(msidata))[(as.matrix(spectra(msidata))>0)], na.rm=TRUE), col="blue") + ## 14b) histogram: + hist(int_matrix, main="", xlab = "", ylab="", las=1) + title(main="Intensity histogram", line=2) + title(xlab="intensities") + title(ylab="Frequency", line=4) + abline(v=median(int_matrix)[(as.matrix(spectra(msidata))>0)], col="blue") + #end if ## 14c) histogram to show contribution of annotation groups - if (!is.null(levels(msidata\$annotation))){ + if (!is.null(unique(msidata\$annotation))){ df_13 = data.frame(matrix(,ncol=2, nrow=0)) - for (subsample in levels(msidata\$annotation)){ + for (subsample in unique(msidata\$annotation)){ log2_int_subsample = spectra(msidata)[,msidata\$annotation==subsample] df_subsample = data.frame(as.numeric(log2_int_subsample)) df_subsample\$annotation = subsample @@ -668,43 +690,43 @@ theme(legend.position="bottom",legend.direction="vertical")+ theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = 8))+ guides(fill=guide_legend(ncol=5,byrow=TRUE))+ - geom_vline(xintercept = median(spectra(msidata)[(spectra(msidata)>0)]), size = 1, colour = "black",linetype = "dashed") + geom_vline(xintercept = median(int_matrix)[(int_matrix>0)], size = 1, colour = "black",linetype = "dashed") print(hist_13) ## 14d) boxplots to visualize in a different way the intensity distributions - par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1)) + par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(10,4.1,5.1,2.1)) mean_matrix = matrix(,ncol=0, nrow = nrow(msidata)) - for (subsample in levels(msidata\$annotation)){ + for (subsample in unique(msidata\$annotation)){ mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE) mean_matrix = cbind(mean_matrix, mean_mz_sample)} - - boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n") - (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2)) + + boxplot(log10(as.data.frame(mean_matrix)), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n") + (axis(1, at = c(1:number_combined), cex.axis=0.9, labels=unique(msidata\$annotation), las=2)) ## 14e) Heatmap of mean intensities of annotation groups - colnames(mean_matrix) = levels(msidata\$annotation) + colnames(mean_matrix) = unique(msidata\$annotation) mean_matrix[is.na(mean_matrix)] = 0 heatmap.parameters <- list(mean_matrix, show_rownames = T, show_colnames = T, main = "Heatmap of mean intensities per annotation group") - par(oma=c(3,0,0,0)) - print(heatmap(mean_matrix),margins = c(10, 10)) + par(oma=c(5,0,0,0)) + heatmap(mean_matrix) ## 14f) PCA of mean intensities of annotation groups - + par(mar=c(4.1, 4.1, 4.1, 8.5)) ## define annotation by colour - annotation_colour = rainbow(length(levels(msidata\$annotation)))[as.factor(levels(msidata\$annotation))] + annotation_colour = rainbow(length(unique(msidata\$annotation)))[as.factor(unique(msidata\$annotation))] ## transform and scale dataframe pca = prcomp(t(mean_matrix),center=FALSE,scale.=FALSE) ## plot single plot plot(pca\$x[,c(1,2)],col=annotation_colour,pch=19) + legend("topright",xpd=TRUE, bty="n", inset=c(-0.3,0), cex=0.8, legend=unique(msidata\$annotation), col=rainbow(length(unique(msidata\$annotation))), pch=19) ## plot pca with colours for max first 5 PCs pc_comp = ifelse(ncol(pca\$x)<5 , ncol(pca\$x), 5) pairs(pca\$x[,1:pc_comp],col=annotation_colour,pch=19) - legend("bottom", horiz = TRUE, legend=levels(msidata\$annotation), col=rainbow(length(levels(msidata\$annotation))), pch=19) } @@ -714,36 +736,40 @@ ############################ 15) Mass spectra ############################## + ## replace any NA with 0, otherwise plot function will not work at all msidata_no_NA = msidata + + #if $report_depth: - ## find three equal m/z ranges for the average mass spectra plots: - third_mz_range = round(nrow(msidata_no_NA)/3,0) + ## find three equal m/z ranges for the average mass spectra plots: + third_mz_range = round(nrow(msidata_no_NA)/3,0) - par(cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) - print(plot(msidata_no_NA, run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum")) - print(plot(msidata_no_NA[1:third_mz_range,], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum")) - print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum")) - print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum")) + par(cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) + print(plot(msidata_no_NA, run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum", col="black")) + print(plot(msidata_no_NA[1:third_mz_range,], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum", col="black")) + print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum", col="black")) + print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum", col="black")) - ## plot one average mass spectrum for each pixel annotation group + ## plot one average mass spectrum for each pixel annotation group - if (!is.null(levels(msidata\$annotation))){ - ## print legend only for less than 10 samples - if (length(levels(msidata\$annotation)) < 10){ - key_legend = TRUE - }else{key_legend = FALSE} - par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) - print(plot(msidata, run="infile", pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(levels(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups")) - } + if (!is.null(unique(msidata\$annotation))){ + ## print legend only for less than 10 samples + if (length(unique(msidata\$annotation)) < 10){ + key_legend = TRUE + }else{key_legend = FALSE} + par(mfrow = c(1,1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) + print(plot(msidata, run="infile", pixel.groups=msidata\$annotation, key=key_legend, col=hue_pal()(length(unique(msidata\$annotation))),superpose=TRUE, main="Average mass spectra for annotation groups")) + } - ## plot 4 random mass spectra - ## find four random, not empty pixel to plot their spectra in the following plots: - pixel_vector = sample(which(TICs != 0),4) + ## plot 4 random mass spectra + ## find four random, not empty pixel to plot their spectra in the following plots: + pixel_vector = sample(which(TICs != 0),4) - par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) - print(plot(msidata_no_NA, pixel = pixel_vector)) + par(mfrow = c(2, 2), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) + print(plot(msidata_no_NA, pixel = pixel_vector, col="black")) + #end if ################### 16) Zoomed in mass spectra for calibrants ############## @@ -753,6 +779,7 @@ if (length(inputcalibrantmasses) != 0){ + ### calculate plusminus values in m/z for each calibrant, this is used for all following plots plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses)) * inputcalibrantmasses @@ -765,6 +792,17 @@ maxmasspixel2 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+0.5) minmasspixel3 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-1.5) maxmasspixel3 = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+3) + + ## test if some values are lower than min(mz) + minmasspixel1 = ifelse(length(minmasspixel1)>0, minmasspixel1, 1) + minmasspixel2 = ifelse(length(minmasspixel2)>0, minmasspixel2, 1) + minmasspixel3 = ifelse(length(minmasspixel3)>0, minmasspixel3, 1) + + ## test if min and max are same (more likely for centroided data): + maxmasspixel1 = ifelse(minmasspixel1 != maxmasspixel1, maxmasspixel1, maxmasspixel1 + 1) + maxmasspixel2 = ifelse(minmasspixel2 != maxmasspixel2, maxmasspixel2, maxmasspixel1 + 1) + maxmasspixel3 = ifelse(minmasspixel3 != maxmasspixel3, maxmasspixel3, maxmasspixel1 + 1) + ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17 filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] @@ -790,20 +828,20 @@ par(oma=c(0,0,2,0)) ## average plot - print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum")) + print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum", col="black")) abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) abline(v=c(maxvalue), col="red", lty=2) abline(v=c(mzvalue), col="green2", lty=4) ## average plot including points per data point - print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=FALSE, strip=FALSE, main="Average spectrum with data points")) + print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=FALSE, strip=FALSE, main="Average spectrum with data points", col="black")) points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(spectra(msidata_no_NA)[minmasspixel1:maxmasspixel1,,drop=FALSE]), col="blue", pch=20) ## plot of third average plot - print(plot(msidata_no_NA[minmasspixel2:maxmasspixel2,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum")) + print(plot(msidata_no_NA[minmasspixel2:maxmasspixel2,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum", col="black")) abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) abline(v=c(maxvalue), col="red", lty=2) abline(v=c(mzvalue), col="green2", lty=4) ## plot of fourth average plot - print(plot(msidata_no_NA[minmasspixel3:maxmasspixel3,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum")) + print(plot(msidata_no_NA[minmasspixel3:maxmasspixel3,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum", col="black")) abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) abline(v=c(maxvalue), col="red", lty=2) abline(v=c(mzvalue), col="green2", lty=4) @@ -813,7 +851,7 @@ ### 16b) one large extra plot with different colours for different pixel annotation groups - if (!is.null(levels(msidata\$annotation))){ + if (!is.null(unique(msidata\$annotation))){ if (number_combined < 10){ key_zoomed = TRUE }else{key_zoomed = FALSE} @@ -831,12 +869,16 @@ ######### 17) ppm difference input calibrant m/z and m/z with max intensity in given m/z range######### + #if $report_depth: + par(mfrow = c(1,1)) ### plot the ppm difference calculated above: theor. m/z value to highest m/z value: calibrant_names = as.character(inputcalibrants[,2]) + diff_df = data.frame(differencevector, calibrant_names) + if (sum(is.na(diff_df[,1])) == nrow(diff_df)){ plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste("plot 17: no peaks in the chosen region, repeat with higher ppm range")) @@ -866,6 +908,8 @@ theme(axis.text.x = element_text(angle = 90, hjust = 1, size=14)) print(diff_plot2) + + #end if #################### 19) ppm difference over pixels ##################### @@ -910,11 +954,12 @@ for (each_cal in 1:ncol(ppm_df)){ lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")} legend("topright", inset=c(-0.2,0), xpd = TRUE, bty="n", cex=0.8,legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1) - if (!is.null(levels(msidata\$annotation))){ + if (!is.null(unique(msidata\$annotation))){ abline(v=abline_vector, lty = 3)}} ### make x-y-images for mz accuracy + #if $report_depth: ppm_dataframe = data.frame(coord(msidata)\$x, coord(msidata)\$y, ppm_df) colnames(ppm_dataframe) = c("x", "y", "ppm_df") @@ -931,6 +976,7 @@ theme(text=element_text(family="ArialMT", face="bold", size=12))+ scale_fill_gradient2(low = "navy", mid = "grey", high = "red", midpoint = 0 ,space = "Lab", na.value = "black", name = "ppm\nerror"))} + #end if }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")} }else{ @@ -957,6 +1003,7 @@ <expand macro="reading_2_column_mz_tabular" optional="true"/> <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/> <param name="do_pca" type="boolean" label="PCA with 2 components"/> + <param name="report_depth" type="boolean" label="Generate full QC report" truevalue="TRUE" falsevalue="FALSE" checked="True" help="No: does not generate all plots but only the most informatives"/> <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10"> <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/> <param name="mass2" value="2222" type="float" label="M/z 2" help="Second m/z"/> @@ -982,7 +1029,7 @@ </param> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> - <param name="accuracy" value="200"/> + <param name="accuracy" value="400"/> <param name="units" value="ppm"/> </conditional> <conditional name="tabular_annotation"> @@ -1002,7 +1049,6 @@ </repeat> <output name="QC_report" file="QC_imzml.pdf" compare="sim_size"/> </test> - <test> <expand macro="infile_analyze75"/> <conditional name="tabular_annotation"> @@ -1012,7 +1058,6 @@ <param name="do_pca" value="True"/> <output name="QC_report" file="QC_analyze75.pdf" compare="sim_size"/> </test> - <test> <param name="infile" value="3_files_combined.RData" ftype="rdata"/> <conditional name="tabular_annotation"> @@ -1043,6 +1088,25 @@ <param name="do_pca" value="False"/> <output name="QC_report" file="QC_empty_spectra.pdf" compare="sim_size"/> </test> + <test> + <param name="infile" value="" ftype="imzml"> + <composite_data value="Example_Processed.imzML"/> + <composite_data value="Example_Processed.ibd"/> + </param> + <conditional name="processed_cond"> + <param name="processed_file" value="processed"/> + <param name="accuracy" value="200"/> + <param name="units" value="ppm"/> + </conditional> + <conditional name="tabular_annotation"> + <param name="load_annotation" value="no_annotation"/> + </conditional> + <param name="calibrant_file" value="inputcalibrantfile1.tabular" ftype="tabular"/> + <param name="mz_column" value="1"/> + <param name="name_column" value="1"/> + <param name="report_depth" value="False"/> + <output name="QC_report" file="QC_imzml_shortreport.pdf" compare="sim_size"/> + </test> </tests> <help> <