| 0 | 1 #!/usr/bin/env Rscript | 
|  | 2 library(optparse, quiet = TRUE) | 
|  | 3 library(parallel) | 
|  | 4 initial.options <- commandArgs(trailingOnly = FALSE) | 
|  | 5 file.arg.name <- "--file=" | 
|  | 6 script.name <- sub(file.arg.name,"", | 
|  | 7                    initial.options[grep(file.arg.name, initial.options)] | 
|  | 8 ) | 
|  | 9 script.dir <- normalizePath(dirname(script.name)) | 
|  | 10 oridir=getwd() | 
|  | 11 options(OGDF = paste0(script.dir,"/OGDF/runOGDFlayout2015.5")) | 
|  | 12 CPU =  detectCores() | 
|  | 13 source(paste(script.dir,"/","methods.R", sep='')) | 
|  | 14 source(paste(script.dir,"/","logo_methods.R", sep='')) | 
|  | 15 source(paste(script.dir,"/","htmlheader.R", sep='')) | 
|  | 16 | 
|  | 17 option_list = list( | 
|  | 18     make_option(c('-i', '--input_sequences_list'), | 
|  | 19                 action='store',type='character', | 
|  | 20                 help='list of fasta sequences file for tarean analysis' | 
|  | 21                 ), | 
|  | 22     make_option(c('-o', '--output_dir'), | 
|  | 23                 action='store',type='character', | 
|  | 24                 help='output directory', | 
|  | 25                 default="./kmer_analysis"), | 
|  | 26     make_option(c('-t', '--tRNA_database'), | 
|  | 27                 action='store',type='character', | 
|  | 28                 help='path to tRNA database', | 
|  | 29                 default=NULL), | 
|  | 30     make_option(c('-p', '--parallel'), | 
|  | 31                 action='store_true', | 
|  | 32                 type='logical', | 
|  | 33                 help='run in parallel (faster but can exhaust RAM)', | 
|  | 34                 default=FALSE), | 
|  | 35     make_option(c('-N', '--not_paired'), | 
|  | 36                 action='store_true', | 
|  | 37                 type='logical', | 
|  | 38                 help='reads are not paired', | 
|  | 39                 default=FALSE) | 
|  | 40 | 
|  | 41     ) | 
|  | 42 | 
|  | 43 description = paste (strwrap(" put decription here"), collapse ="\n") | 
|  | 44 epilogue = paste (strwrap(" put epilogue here"), collapse ="\n") | 
|  | 45 parser=OptionParser( | 
|  | 46     option_list=option_list, | 
|  | 47     epilogue=epilogue, | 
|  | 48     description=description, | 
|  | 49     ) | 
|  | 50 | 
|  | 51 opt = parse_args(parser, args=commandArgs(TRUE)) | 
|  | 52 paired = !opt$not_paired | 
|  | 53 print(opt) | 
|  | 54 dir.create(opt$output_dir) | 
|  | 55 fl = readLines(opt$input_sequences_list) | 
|  | 56 ## reorder to avoid running large top graphs at once | 
|  | 57 ord = sample(seq_along(fl), length(fl)) | 
|  | 58 | 
|  | 59 | 
|  | 60 index=0 | 
|  | 61 info=list() | 
|  | 62 save.image(paste0(opt$output_dir,"/info.RData")) # for debugin purposes | 
|  | 63 if (opt$parallel){ | 
|  | 64     cat("processing in parallel") | 
|  | 65     info=mcmapply( | 
|  | 66         FUN=tarean, | 
|  | 67         input_sequences = fl[ord], | 
|  | 68         output_dir = paste0(opt$output_dir,"/",sprintf("%04d",ord)), | 
|  | 69         min_kmer_length = 11, | 
|  | 70         max_kmer_length = 27, | 
|  | 71         CPU = CPU, | 
|  | 72         sample_size = 30000, | 
|  | 73         reorient_reads = TRUE, | 
|  | 74         tRNA_database_path = opt$tRNA_database, | 
|  | 75         paired = paired, | 
|  | 76         include_layout=FALSE, | 
|  | 77         mc.cores=round(1+detectCores()/9), | 
|  | 78         mc.set.seed = TRUE, | 
|  | 79         mc.preschedule = FALSE, | 
|  | 80         SIMPLIFY = FALSE | 
|  | 81     ) | 
|  | 82 }else{ | 
|  | 83     for (i in fl){ | 
|  | 84         index = index + 1 | 
|  | 85         dirout=paste0(opt$output_dir,"/",sprintf("%04d",index)) | 
|  | 86         try({ | 
|  | 87             info[[i]] = tarean(i, dirout, 11, 27, CPU, 30000, TRUE, opt$tRNA_database, include_layout=FALSE) | 
|  | 88             cat("-----------------------------------------------------\n") | 
|  | 89             print(info[[i]]) | 
|  | 90         }) | 
|  | 91     } | 
|  | 92 } | 
|  | 93 save(info, file = paste0(opt$output_dir,"/info.RData")) | 
|  | 94 save.image("tmp.RData") | 
|  | 95 ## export as csv table | 
|  | 96 ## 'graph_info' is always include: | 
|  | 97 | 
|  | 98 tr_info = data.frame(do.call(rbind, info[sapply(info,length)>1])) | 
|  | 99 if (nrow(tr_info)>0){ | 
|  | 100     ## TR detected | 
|  | 101     graph_info = data.frame (do.call(rbind, lapply(info, "[[", "graph_info"))) | 
|  | 102     graph_info$source=rownames(graph_info) | 
|  | 103     tr_info$graph_info=NULL | 
|  | 104     tr_info$source = rownames(tr_info) | 
|  | 105     graph_tr_info = merge(graph_info, tr_info, all=TRUE, by='source') | 
|  | 106     if (any(sapply(graph_tr_info,class)=='list')){ | 
|  | 107         for (i in colnames(graph_tr_info)){ | 
|  | 108             graph_tr_info[,i] = unname(unlist(graph_tr_info[,i])) | 
|  | 109         } | 
|  | 110     } | 
|  | 111     write.table(graph_tr_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote= TRUE) | 
|  | 112 }else{ | 
|  | 113     ## TR not detected | 
|  | 114     graph_info = data.frame (do.call(rbind, lapply(info, function(x) unlist(x[['graph_info']])))) | 
|  | 115     graph_info$source=rownames(graph_info) | 
|  | 116     write.table(graph_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote = FALSE) | 
|  | 117 } | 
|  | 118 | 
|  | 119 |