annotate tools/dada2-filter-and-trim.R @ 0:44579a4814b7 draft

planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
author dfornika
date Tue, 01 May 2018 19:10:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
1 #!/usr/bin/env Rscript
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
2
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
3 suppressMessages(library(optparse))
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
4
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
5 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
6
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
7 args <- commandArgs(trailingOnly = TRUE)
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
8
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
9 option_list <- list(
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
10 make_option(c("", "--fwd"), dest="fnFs",
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
11 help="The path(s) to the input fastq file(s)."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
12 make_option(c("", "--rev"), dest="fnRs",
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
13 help="The path(s) to the input reverse fastq file(s) from paired-end sequence data corresponding to those provided to the fwd argument. If NULL, the fwd files are processed as single-reads."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
14 make_option(c("", "--filt"), dest="filtFs",
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
15 help="The path(s) to the output filtered file(s) corresponding to the fwd input files. If containing directory does not exist, it will be created."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
16 make_option(c("", "--filt.rev"), dest="filtRs",
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
17 help="The path(s) to the output fastq file(s) corresponding to the rev input. Can also provide a directory, which if not existing will be created (how to differentiate between dir/file in len1 case?)."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
18 make_option(c("", "--truncQ"), dest="truncQ", default=2,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
19 help="Truncate reads at the first instance of a quality score less than or equal to truncQ."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
20 make_option(c("", "--truncLen"), dest="truncLen", default=0,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
21 help="Truncate reads after truncLen bases. Reads shorter than this are discarded."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
22 make_option(c("", "--trimLeft"), dest="trimLeft", default=0,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
23 help="The number of nucleotides to remove from the start of each read. If both truncLen and trimLeft are provided, filtered reads will have length truncLen-trimLeft."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
24 make_option(c("", "--maxLen"), dest="maxLen", default=Inf,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
25 help="Remove reads with length greater than maxLen. maxLen is enforced before trimming and truncation."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
26 make_option(c("", "--minLen"), dest="minLen", default=20,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
27 help="Remove reads with length less than minLen. minLen is enforced after trimming and truncation."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
28 make_option(c("", "--maxN"), dest="maxN", default=0,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
29 help="After truncation, sequences with more than maxN Ns will be discarded. Note that dada does not allow Ns."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
30 make_option(c("", "--minQ"), dest="minQ", default=0,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
31 help="After truncation, reads contain a quality score less than minQ will be discarded."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
32 make_option(c("", "--maxEE"), dest="maxEE", default=Inf,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
33 help="After truncation, reads with higher than maxEE \"expected errors\" will be discarded. Expected errors are calculated from the nominal definition of the quality score: EE = sum(10^(-Q/10))"),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
34 make_option(c("", "--primer.fwd"), dest="primer.fwd", default=NULL,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
35 help="Paired-read filtering only. A character string defining the forward primer. Only allows unambiguous nucleotides. The primer will be compared to the first len(primer.fwd) nucleotides at the start of the read. If there is not an exact match, the read is filtered out. For paired reads, the reverse read is also interrogated, and if the primer is detected on the reverse read, the forward/reverse reads are swapped."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
36 make_option(c("", "--matchIDs"), dest="matchIDs", action="store_true", default=FALSE,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
37 help="Paired-read filtering only. Whether to enforce matching between the id-line sequence identifiers of the forward and reverse fastq files. If TRUE, only paired reads that share id fields (see below) are output. If FALSE, no read ID checking is done. Note: matchIDs=FALSE essentially assumes matching order between forward and reverse reads. If that matched order is not present future processing steps may break (in particular mergePairs)."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
38 make_option(c("", "--id.sep"), dest="id.sep", default="\\s",
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
39 help="Paired-read filtering only. The separator between fields in the id-line of the input fastq files. Passed to the strsplit."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
40 make_option(c("", "--id.field"), dest="id.field", default=NULL,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
41 help="Paired-read filtering only. The field of the id-line containing the sequence identifier. If NULL (the default) and matchIDs is TRUE, the function attempts to automatically detect the sequence identifier field under the assumption of Illumina formatted output."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
42 make_option(c("", "--multithreaded"), dest="multithreaded", action="store_true", default=FALSE,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
43 help="If TRUE, input files are filtered in parallel via mclapply. If an integer is provided, it is passed to the mc.cores argument of mclapply. Note that the parallelization here is by forking, and each process is loading another fastq file into memory. This option is ignored in Windows, as Windows does not support forking, with mc.cores set to 1. If memory is an issue, execute in a clean environment and reduce the chunk size n and/or the number of threads."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
44 make_option(c("", "--n"), dest="n", default=1e5,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
45 help="The number of records (reads) to read in and filter at any one time. This controls the peak memory requirement so that very large fastq files are supported. See FastqStreamer for details."),
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
46 make_option(c("", "--verbose"), dest="verbose", action="store_true", default=FALSE,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
47 help="Whether to output status messages.")
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
48 )
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
49
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
50 opt <- parse_args(OptionParser(option_list=option_list), args)
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
51
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
52 opt$fnFs <- strsplit(as.character(opt$fnFs), ',')[[1]]
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
53 opt$fnRs <- strsplit(as.character(opt$fnRs), ',')[[1]]
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
54 if(!is.null(opt$filtFs)){
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
55 opt$filtFs <- strsplit(as.character(opt$filtFs), ',')[[1]]
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
56 }
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
57 if(!is.null(opt$filtRs)){
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
58 opt$filtRs <- strsplit(as.character(opt$filtRs), ',')[[1]]
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
59 }
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
60 print(opt)
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
61
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
62 suppressMessages(library(dada2))
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
63
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
64 filterAndTrim(
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
65 opt$fnFs,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
66 opt$filtFs,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
67 opt$fnRs,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
68 opt$filtRs,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
69 truncLen=opt$truncLen,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
70 maxN=opt$maxN,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
71 maxEE=opt$maxEE,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
72 truncQ=opt$truncQ,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
73 rm.phix=TRUE,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
74 compress=TRUE,
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
75 multithread=opt$multithreaded
44579a4814b7 planemo upload for repository https://github.com/dfornika/dada2-galaxy commit a2ae309ba8ff0ef18a1e4abbe5d1ef412e4f69a9
dfornika
parents:
diff changeset
76 )