Mercurial > repos > petrn > repeatexplorer
comparison lib/tarean/tarean_batch_mode.R @ 8:3bc73f5dc785 draft
Uploaded
| author | petrn |
|---|---|
| date | Fri, 20 Dec 2019 14:17:59 +0000 |
| parents | f6ebec6e235e |
| children |
comparison
equal
deleted
inserted
replaced
| 7:c56807be3b72 | 8:3bc73f5dc785 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 library(optparse, quiet = TRUE) | |
| 3 library(parallel) | |
| 4 initial.options <- commandArgs(trailingOnly = FALSE) | |
| 5 file.arg.name <- "--file=" | |
| 6 script.name <- sub(file.arg.name,"", | |
| 7 initial.options[grep(file.arg.name, initial.options)] | |
| 8 ) | |
| 9 script.dir <- normalizePath(dirname(script.name)) | |
| 10 oridir=getwd() | |
| 11 options(OGDF = paste0(script.dir,"/OGDF/runOGDFlayout2015.5")) | |
| 12 CPU = detectCores() | |
| 13 source(paste(script.dir,"/","methods.R", sep='')) | |
| 14 source(paste(script.dir,"/","logo_methods.R", sep='')) | |
| 15 source(paste(script.dir,"/","htmlheader.R", sep='')) | |
| 16 | |
| 17 option_list = list( | |
| 18 make_option(c('-i', '--input_sequences_list'), | |
| 19 action='store',type='character', | |
| 20 help='list of fasta sequences file for tarean analysis' | |
| 21 ), | |
| 22 make_option(c('-o', '--output_dir'), | |
| 23 action='store',type='character', | |
| 24 help='output directory', | |
| 25 default="./kmer_analysis"), | |
| 26 make_option(c('-t', '--tRNA_database'), | |
| 27 action='store',type='character', | |
| 28 help='path to tRNA database', | |
| 29 default=NULL), | |
| 30 make_option(c('-p', '--parallel'), | |
| 31 action='store_true', | |
| 32 type='logical', | |
| 33 help='run in parallel (faster but can exhaust RAM)', | |
| 34 default=FALSE), | |
| 35 make_option(c('-N', '--not_paired'), | |
| 36 action='store_true', | |
| 37 type='logical', | |
| 38 help='reads are not paired', | |
| 39 default=FALSE) | |
| 40 | |
| 41 ) | |
| 42 | |
| 43 description = paste (strwrap(" put decription here"), collapse ="\n") | |
| 44 epilogue = paste (strwrap(" put epilogue here"), collapse ="\n") | |
| 45 parser=OptionParser( | |
| 46 option_list=option_list, | |
| 47 epilogue=epilogue, | |
| 48 description=description, | |
| 49 ) | |
| 50 | |
| 51 opt = parse_args(parser, args=commandArgs(TRUE)) | |
| 52 paired = !opt$not_paired | |
| 53 print(opt) | |
| 54 dir.create(opt$output_dir) | |
| 55 fl = readLines(opt$input_sequences_list) | |
| 56 ## reorder to avoid running large top graphs at once | |
| 57 ord = sample(seq_along(fl), length(fl)) | |
| 58 | |
| 59 | |
| 60 index=0 | |
| 61 info=list() | |
| 62 save.image(paste0(opt$output_dir,"/info.RData")) # for debugin purposes | |
| 63 if (opt$parallel){ | |
| 64 cat("processing in parallel") | |
| 65 info=mcmapply( | |
| 66 FUN=tarean, | |
| 67 input_sequences = fl[ord], | |
| 68 output_dir = paste0(opt$output_dir,"/",sprintf("%04d",ord)), | |
| 69 min_kmer_length = 11, | |
| 70 max_kmer_length = 27, | |
| 71 CPU = CPU, | |
| 72 sample_size = 30000, | |
| 73 reorient_reads = TRUE, | |
| 74 tRNA_database_path = opt$tRNA_database, | |
| 75 paired = paired, | |
| 76 include_layout=FALSE, | |
| 77 mc.cores=round(1+detectCores()/9), | |
| 78 mc.set.seed = TRUE, | |
| 79 mc.preschedule = FALSE, | |
| 80 SIMPLIFY = FALSE | |
| 81 ) | |
| 82 }else{ | |
| 83 for (i in fl){ | |
| 84 index = index + 1 | |
| 85 dirout=paste0(opt$output_dir,"/",sprintf("%04d",index)) | |
| 86 try({ | |
| 87 info[[i]] = tarean(i, dirout, 11, 27, CPU, 30000, TRUE, opt$tRNA_database, include_layout=FALSE) | |
| 88 cat("-----------------------------------------------------\n") | |
| 89 print(info[[i]]) | |
| 90 }) | |
| 91 } | |
| 92 } | |
| 93 save(info, file = paste0(opt$output_dir,"/info.RData")) | |
| 94 save.image("tmp.RData") | |
| 95 ## export as csv table | |
| 96 ## 'graph_info' is always include: | |
| 97 | |
| 98 tr_info = data.frame(do.call(rbind, info[sapply(info,length)>1])) | |
| 99 if (nrow(tr_info)>0){ | |
| 100 ## TR detected | |
| 101 graph_info = data.frame (do.call(rbind, lapply(info, "[[", "graph_info"))) | |
| 102 graph_info$source=rownames(graph_info) | |
| 103 tr_info$graph_info=NULL | |
| 104 tr_info$source = rownames(tr_info) | |
| 105 graph_tr_info = merge(graph_info, tr_info, all=TRUE, by='source') | |
| 106 if (any(sapply(graph_tr_info,class)=='list')){ | |
| 107 for (i in colnames(graph_tr_info)){ | |
| 108 graph_tr_info[,i] = unname(unlist(graph_tr_info[,i])) | |
| 109 } | |
| 110 } | |
| 111 write.table(graph_tr_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote= TRUE) | |
| 112 }else{ | |
| 113 ## TR not detected | |
| 114 graph_info = data.frame (do.call(rbind, lapply(info, function(x) unlist(x[['graph_info']])))) | |
| 115 graph_info$source=rownames(graph_info) | |
| 116 write.table(graph_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote = FALSE) | |
| 117 } | |
| 118 | |
| 119 |
