Mercurial > repos > tomnl > create_msp
comparison anticipated_purity_dims.R @ 0:4b417094bf71 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 0aa10df0ec1ed71601f932cfb11d7d4d4f620d80-dirty
| author | tomnl |
|---|---|
| date | Wed, 02 May 2018 13:09:23 -0400 |
| parents | |
| children | 3f1f2d545f3a |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4b417094bf71 |
|---|---|
| 1 library(msPurity) | |
| 2 library(optparse) | |
| 3 print(sessionInfo()) | |
| 4 | |
| 5 option_list <- list( | |
| 6 make_option(c("--mzML_file"), type="character"), | |
| 7 make_option(c("--peaks_file"), type="character"), | |
| 8 make_option(c("-o", "--out_dir"), type="character"), | |
| 9 make_option("--minOffset", default=0.5), | |
| 10 make_option("--maxOffset", default=0.5), | |
| 11 make_option("--ilim", default=0.05), | |
| 12 make_option("--ppm", default=4), | |
| 13 make_option("--dimspy", action="store_true"), | |
| 14 make_option("--sim", action="store_true"), | |
| 15 make_option("--remove_nas", action="store_true"), | |
| 16 make_option("--iwNorm", default="none", type="character"), | |
| 17 make_option("--file_num_dimspy", default=1), | |
| 18 make_option("--exclude_isotopes", action="store_true"), | |
| 19 make_option("--isotope_matrix", type="character") | |
| 20 ) | |
| 21 | |
| 22 # store options | |
| 23 opt<- parse_args(OptionParser(option_list=option_list)) | |
| 24 | |
| 25 print(sessionInfo()) | |
| 26 print(opt) | |
| 27 | |
| 28 if (is.null(opt$dimspy)){ | |
| 29 | |
| 30 df <- read.table(opt$peaks_file, header = TRUE, sep='\t') | |
| 31 filename = NA | |
| 32 }else{ | |
| 33 indf <- read.table(opt$peaks_file, | |
| 34 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
| 35 | |
| 36 filename = colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] | |
| 37 # check if the data file is mzML or RAW (can only use mzML currently) so | |
| 38 # we expect an mzML file of the same name in the same folder | |
| 39 indf$i <- indf[,colnames(indf)==filename] | |
| 40 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) | |
| 41 | |
| 42 filename = sub("raw", "mzML", filename, ignore.case = TRUE) | |
| 43 | |
| 44 df <- indf[4:nrow(indf),] | |
| 45 if ('blank_flag' %in% colnames(df)){ | |
| 46 df <- df[df$blank_flag==1,] | |
| 47 } | |
| 48 | |
| 49 colnames(df)[colnames(df)=='m.z'] <- 'mz' | |
| 50 | |
| 51 if ('nan' %in% df$mz){ | |
| 52 df[df$mz=='nan',]$mz <- NA | |
| 53 } | |
| 54 df$mz <- as.numeric(df$mz) | |
| 55 | |
| 56 | |
| 57 | |
| 58 | |
| 59 } | |
| 60 | |
| 61 if (!is.null(opt$remove_nas)){ | |
| 62 df <- df[!is.na(df$mz),] | |
| 63 } | |
| 64 | |
| 65 if (is.null(opt$isotope_matrix)){ | |
| 66 im <- NULL | |
| 67 }else{ | |
| 68 im <- read.table(opt$isotope_matrix, | |
| 69 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
| 70 } | |
| 71 | |
| 72 if (is.null(opt$exclude_isotopes)){ | |
| 73 isotopes <- FALSE | |
| 74 }else{ | |
| 75 isotopes <- TRUE | |
| 76 } | |
| 77 | |
| 78 | |
| 79 if (dir.exists(opt$mzML_file)){ | |
| 80 # if directory then we need to add a file name | |
| 81 print(filename) | |
| 82 if (is.na(filename)){ | |
| 83 print('ERROR: If a directory is provided then a filename needs to be entered | |
| 84 directory or automatically obtained by using a dimspy output') | |
| 85 quit() | |
| 86 }else{ | |
| 87 mzml_file <- file.path(opt$mzML_file, filename) | |
| 88 } | |
| 89 }else{ | |
| 90 mzml_file <- opt$mzML_file | |
| 91 } | |
| 92 | |
| 93 if (is.null(opt$sim)){ | |
| 94 sim=FALSE | |
| 95 }else{ | |
| 96 sim=TRUE | |
| 97 } | |
| 98 | |
| 99 minOffset = as.numeric(opt$minOffset) | |
| 100 maxOffset = as.numeric(opt$maxOffset) | |
| 101 | |
| 102 | |
| 103 | |
| 104 if (opt$iwNorm=='none'){ | |
| 105 iwNorm = FALSE | |
| 106 iwNormFun = NULL | |
| 107 }else if (opt$iwNorm=='gauss'){ | |
| 108 iwNorm = TRUE | |
| 109 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) | |
| 110 }else if (opt$iwNorm=='rcosine'){ | |
| 111 iwNorm = TRUE | |
| 112 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) | |
| 113 }else if (opt$iwNorm=='QE5'){ | |
| 114 iwNorm = TRUE | |
| 115 iwNormFun = msPurity::iwNormQE.5() | |
| 116 } | |
| 117 | |
| 118 print('FIRST ROWS OF PEAK FILE') | |
| 119 print(head(df)) | |
| 120 | |
| 121 predicted <- msPurity::dimsPredictPuritySingle(df$mz, | |
| 122 filepth=mzml_file, | |
| 123 minOffset=minOffset, | |
| 124 maxOffset=maxOffset, | |
| 125 ppm=opt$ppm, | |
| 126 mzML=TRUE, | |
| 127 sim = sim, | |
| 128 ilim = opt$ilim, | |
| 129 isotopes = isotopes, | |
| 130 im = im, | |
| 131 iwNorm = iwNorm, | |
| 132 iwNormFun = iwNormFun | |
| 133 ) | |
| 134 predicted <- cbind(df, predicted) | |
| 135 | |
| 136 print(head(predicted)) | |
| 137 print(file.path(opt$out_dir, 'anticipated_purity_dims.tsv')) | |
| 138 write.table(predicted, file.path(opt$out_dir, 'anticipated_purity_dims.tsv'), row.names=FALSE, sep='\t') |
