| 
0
 | 
     1 #!/usr/bin/env Rscript
 | 
| 
 | 
     2 library(optparse, quiet = TRUE)
 | 
| 
 | 
     3 library(parallel)
 | 
| 
 | 
     4 initial.options <- commandArgs(trailingOnly = FALSE)
 | 
| 
 | 
     5 file.arg.name <- "--file="
 | 
| 
 | 
     6 script.name <- sub(file.arg.name,"",
 | 
| 
 | 
     7                    initial.options[grep(file.arg.name, initial.options)]
 | 
| 
 | 
     8 )
 | 
| 
 | 
     9 script.dir <- normalizePath(dirname(script.name))
 | 
| 
 | 
    10 oridir=getwd()
 | 
| 
 | 
    11 options(OGDF = paste0(script.dir,"/OGDF/runOGDFlayout2015.5"))
 | 
| 
 | 
    12 CPU =  detectCores()
 | 
| 
 | 
    13 source(paste(script.dir,"/","methods.R", sep=''))
 | 
| 
 | 
    14 source(paste(script.dir,"/","logo_methods.R", sep=''))
 | 
| 
 | 
    15 source(paste(script.dir,"/","htmlheader.R", sep=''))
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 option_list = list(
 | 
| 
 | 
    18     make_option(c('-i', '--input_sequences_list'),
 | 
| 
 | 
    19                 action='store',type='character',
 | 
| 
 | 
    20                 help='list of fasta sequences file for tarean analysis'
 | 
| 
 | 
    21                 ),
 | 
| 
 | 
    22     make_option(c('-o', '--output_dir'),
 | 
| 
 | 
    23                 action='store',type='character',
 | 
| 
 | 
    24                 help='output directory',
 | 
| 
 | 
    25                 default="./kmer_analysis"),
 | 
| 
 | 
    26     make_option(c('-t', '--tRNA_database'),
 | 
| 
 | 
    27                 action='store',type='character',
 | 
| 
 | 
    28                 help='path to tRNA database',
 | 
| 
 | 
    29                 default=NULL),
 | 
| 
 | 
    30     make_option(c('-p', '--parallel'),
 | 
| 
 | 
    31                 action='store_true',
 | 
| 
 | 
    32                 type='logical',
 | 
| 
 | 
    33                 help='run in parallel (faster but can exhaust RAM)',
 | 
| 
 | 
    34                 default=FALSE),
 | 
| 
 | 
    35     make_option(c('-N', '--not_paired'),
 | 
| 
 | 
    36                 action='store_true',
 | 
| 
 | 
    37                 type='logical',
 | 
| 
 | 
    38                 help='reads are not paired',
 | 
| 
 | 
    39                 default=FALSE)
 | 
| 
 | 
    40 
 | 
| 
 | 
    41     )
 | 
| 
 | 
    42 
 | 
| 
 | 
    43 description = paste (strwrap(" put decription here"), collapse ="\n")
 | 
| 
 | 
    44 epilogue = paste (strwrap(" put epilogue here"), collapse ="\n")
 | 
| 
 | 
    45 parser=OptionParser(
 | 
| 
 | 
    46     option_list=option_list,
 | 
| 
 | 
    47     epilogue=epilogue,
 | 
| 
 | 
    48     description=description,
 | 
| 
 | 
    49     )
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 opt = parse_args(parser, args=commandArgs(TRUE))
 | 
| 
 | 
    52 paired = !opt$not_paired
 | 
| 
 | 
    53 print(opt)
 | 
| 
 | 
    54 dir.create(opt$output_dir)
 | 
| 
 | 
    55 fl = readLines(opt$input_sequences_list)
 | 
| 
 | 
    56 ## reorder to avoid running large top graphs at once
 | 
| 
 | 
    57 ord = sample(seq_along(fl), length(fl))
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 
 | 
| 
 | 
    60 index=0
 | 
| 
 | 
    61 info=list()
 | 
| 
 | 
    62 save.image(paste0(opt$output_dir,"/info.RData")) # for debugin purposes
 | 
| 
 | 
    63 if (opt$parallel){
 | 
| 
 | 
    64     cat("processing in parallel")
 | 
| 
 | 
    65     info=mcmapply(
 | 
| 
 | 
    66         FUN=tarean,
 | 
| 
 | 
    67         input_sequences = fl[ord],
 | 
| 
 | 
    68         output_dir = paste0(opt$output_dir,"/",sprintf("%04d",ord)),
 | 
| 
 | 
    69         min_kmer_length = 11,
 | 
| 
 | 
    70         max_kmer_length = 27,
 | 
| 
 | 
    71         CPU = CPU,
 | 
| 
 | 
    72         sample_size = 30000,
 | 
| 
 | 
    73         reorient_reads = TRUE,
 | 
| 
 | 
    74         tRNA_database_path = opt$tRNA_database,
 | 
| 
 | 
    75         paired = paired,
 | 
| 
 | 
    76         include_layout=FALSE,
 | 
| 
 | 
    77         mc.cores=round(1+detectCores()/9),
 | 
| 
 | 
    78         mc.set.seed = TRUE,
 | 
| 
 | 
    79         mc.preschedule = FALSE,
 | 
| 
 | 
    80         SIMPLIFY = FALSE
 | 
| 
 | 
    81     )
 | 
| 
 | 
    82 }else{
 | 
| 
 | 
    83     for (i in fl){
 | 
| 
 | 
    84         index = index + 1
 | 
| 
 | 
    85         dirout=paste0(opt$output_dir,"/",sprintf("%04d",index))
 | 
| 
 | 
    86         try({
 | 
| 
 | 
    87             info[[i]] = tarean(i, dirout, 11, 27, CPU, 30000, TRUE, opt$tRNA_database, include_layout=FALSE)
 | 
| 
 | 
    88             cat("-----------------------------------------------------\n")
 | 
| 
 | 
    89             print(info[[i]])
 | 
| 
 | 
    90         })
 | 
| 
 | 
    91     }
 | 
| 
 | 
    92 }
 | 
| 
 | 
    93 save(info, file = paste0(opt$output_dir,"/info.RData"))
 | 
| 
 | 
    94 save.image("tmp.RData")
 | 
| 
 | 
    95 ## export as csv table
 | 
| 
 | 
    96 ## 'graph_info' is always include:
 | 
| 
 | 
    97 
 | 
| 
 | 
    98 tr_info = data.frame(do.call(rbind, info[sapply(info,length)>1]))
 | 
| 
 | 
    99 if (nrow(tr_info)>0){
 | 
| 
 | 
   100     ## TR detected
 | 
| 
 | 
   101     graph_info = data.frame (do.call(rbind, lapply(info, "[[", "graph_info")))
 | 
| 
 | 
   102     graph_info$source=rownames(graph_info)
 | 
| 
 | 
   103     tr_info$graph_info=NULL
 | 
| 
 | 
   104     tr_info$source = rownames(tr_info)
 | 
| 
 | 
   105     graph_tr_info = merge(graph_info, tr_info, all=TRUE, by='source')
 | 
| 
 | 
   106     if (any(sapply(graph_tr_info,class)=='list')){
 | 
| 
 | 
   107         for (i in colnames(graph_tr_info)){
 | 
| 
 | 
   108             graph_tr_info[,i] = unname(unlist(graph_tr_info[,i]))
 | 
| 
 | 
   109         }
 | 
| 
 | 
   110     }
 | 
| 
 | 
   111     write.table(graph_tr_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote= TRUE)
 | 
| 
 | 
   112 }else{
 | 
| 
 | 
   113     ## TR not detected
 | 
| 
 | 
   114     graph_info = data.frame (do.call(rbind, lapply(info, function(x) unlist(x[['graph_info']]))))
 | 
| 
 | 
   115     graph_info$source=rownames(graph_info)
 | 
| 
 | 
   116     write.table(graph_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote = FALSE)
 | 
| 
 | 
   117 }
 | 
| 
 | 
   118 
 | 
| 
 | 
   119 
 |