Mercurial > repos > iuc > dada2_plotcomplexity
comparison test-data/gentest.R @ 2:10cd92444a38 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
| author | iuc |
|---|---|
| date | Mon, 16 Mar 2020 11:31:39 +0000 |
| parents | 923d45f161cf |
| children | 58a426392326 |
comparison
equal
deleted
inserted
replaced
| 1:adb016654604 | 2:10cd92444a38 |
|---|---|
| 1 library(dada2, quietly=T) | 1 library(dada2, quietly=T) |
| 2 library(ggplot2, quietly=T) | 2 library(ggplot2, quietly=T) |
| 3 | 3 |
| 4 fwd <- c('F3D0_S188_L001_R1_001.fastq.gz') | 4 sample.names <- c('F3D0_S188_L001', 'F3D141_S207_L001') |
| 5 rev <- c('F3D0_S188_L001_R2_001.fastq.gz') | 5 fwd <- c('F3D0_S188_L001_R1_001.fastq.gz', 'F3D141_S207_L001_R1_001.fastq.gz') |
| 6 rev <- c('F3D0_S188_L001_R2_001.fastq.gz', 'F3D141_S207_L001_R2_001.fastq.gz') | |
| 6 | 7 |
| 7 sample.names <- c('F3D0_S188_L001') | 8 filt.fwd <- c('filterAndTrim_F3D0_R1.fq.gz', 'filterAndTrim_F3D141_R1.fq.gz') |
| 9 filt.rev <- c('filterAndTrim_F3D0_R2.fq.gz', 'filterAndTrim_F3D141_R2.fq.gz') | |
| 10 | |
| 11 print("filterAndTrim") | |
| 12 | |
| 13 for(i in 1:length(fwd)){ | |
| 14 ftout <- filterAndTrim(fwd[i], filt.fwd[i], rev[i], filt.rev[i]) | |
| 15 b <- paste(strsplit(fwd[i], ".", fixed=T)[[1]][1], "tab", sep=".") | |
| 16 write.table(ftout, b, quote=F, sep="\t", col.names=NA) | |
| 17 } | |
| 18 | |
| 19 # In the test only the 1st data set is used | |
| 20 t <- data.frame() | |
| 21 t <- rbind(t, ftout[1,]) | |
| 22 colnames(t) <- colnames(ftout) | |
| 23 rownames(t) <- rownames(ftout)[1] | |
| 24 write.table(t, "filterAndTrim.tab", quote=F, sep="\t", col.names=NA) | |
| 8 | 25 |
| 9 names(fwd) <- sample.names | 26 names(fwd) <- sample.names |
| 10 names(rev) <- sample.names | 27 names(rev) <- sample.names |
| 11 | 28 names(filt.fwd) <- sample.names |
| 12 | 29 names(filt.rev) <- sample.names |
| 13 filt.fwd <- c('filterAndTrim_F3D0_R1.fq.gz') | |
| 14 filt.rev <- c('filterAndTrim_F3D0_R2.fq.gz') | |
| 15 | |
| 16 ftout <- filterAndTrim(fwd, filt.fwd, rev, filt.rev) | |
| 17 | |
| 18 # In the test no name can be given to the collection | |
| 19 rownames(ftout) <- c( 'Unnamed Collection' ) | |
| 20 write.table(ftout, "filterAndTrim_F3D0.tab", quote=F, sep="\t", col.names=NA) | |
| 21 | 30 |
| 22 # Plot quality profile (just for one file, Galaxy compares with sim_size) | 31 # Plot quality profile (just for one file, Galaxy compares with sim_size) |
| 23 | 32 print("plots") |
| 24 qp <- plotQualityProfile(fwd) | 33 qp <- plotQualityProfile(fwd) |
| 34 ggsave('qualityProfile_fwd.pdf', qp, width = 20,height = 15,units = c("cm")) | |
| 35 qp <- plotQualityProfile(rev) | |
| 36 ggsave('qualityProfile_rev.pdf', qp, width = 20,height = 15,units = c("cm")) | |
| 37 qp <- plotQualityProfile(fwd[1]) | |
| 25 ggsave('qualityProfile.pdf', qp, width = 20,height = 15,units = c("cm")) | 38 ggsave('qualityProfile.pdf', qp, width = 20,height = 15,units = c("cm")) |
| 26 | 39 |
| 27 # Plot complexity (just for one file, Galaxy compares with sim_size) | 40 # Plot complexity (just for one file, Galaxy compares with sim_size) |
| 28 | 41 |
| 29 cp <- plotComplexity(fwd) | 42 cp <- plotComplexity(fwd) |
| 43 ggsave('complexity_fwd.pdf', cp, width = 20,height = 15,units = c("cm")) | |
| 44 cp <- plotComplexity(rev) | |
| 45 ggsave('complexity_rev.pdf', cp, width = 20,height = 15,units = c("cm")) | |
| 46 cp <- plotComplexity(fwd[1]) | |
| 30 ggsave('complexity.pdf', cp, width = 20,height = 15,units = c("cm")) | 47 ggsave('complexity.pdf', cp, width = 20,height = 15,units = c("cm")) |
| 31 | 48 |
| 32 | 49 |
| 33 # learn Errors | 50 # learn Errors |
| 51 print("learnErrors") | |
| 34 err.fwd <- learnErrors(filt.fwd) | 52 err.fwd <- learnErrors(filt.fwd) |
| 35 saveRDS(err.fwd, file='learnErrors_F3D0_R1.Rdata') | 53 saveRDS(err.fwd, file='learnErrors_R1.Rdata') |
| 36 plot <- plotErrors(err.fwd) | 54 plot <- plotErrors(err.fwd) |
| 37 ggsave('learnErrors_F3D0_R1.pdf', plot, width = 20,height = 15,units = c("cm")) | 55 ggsave('learnErrors_R1.pdf', plot, width = 20,height = 15,units = c("cm")) |
| 38 | 56 |
| 39 err.rev <- learnErrors(filt.fwd) | 57 err.rev <- learnErrors(filt.rev) |
| 40 saveRDS(err.rev, file='learnErrors_F3D0_R2.Rdata') | 58 saveRDS(err.rev, file='learnErrors_R2.Rdata') |
| 41 plot <- plotErrors(err.rev) | 59 plot <- plotErrors(err.rev) |
| 42 ggsave('learnErrors_F3D0_R2.pdf', plot, width = 20,height = 15,units = c("cm")) | 60 ggsave('learnErrors.pdf', plot, width = 20,height = 15,units = c("cm")) |
| 43 | 61 |
| 44 # dada | 62 # dada |
| 63 print("dada") | |
| 45 dada.fwd <- dada(filt.fwd, err.fwd) | 64 dada.fwd <- dada(filt.fwd, err.fwd) |
| 46 saveRDS(dada.fwd, file="dada_F3D0_R1.Rdata") | |
| 47 dada.rev <- dada(filt.rev, err.rev) | 65 dada.rev <- dada(filt.rev, err.rev) |
| 48 saveRDS(dada.rev, file="dada_F3D0_R2.Rdata") | 66 for( id in sample.names ){ |
| 67 saveRDS(dada.fwd[[id]], file=paste("dada_", id,"_R1.Rdata", sep="")) | |
| 68 saveRDS(dada.rev[[id]], file=paste("dada_", id,"_R2.Rdata", sep="")) | |
| 69 } | |
| 49 | 70 |
| 50 # merge pairs | 71 # merge pairs |
| 72 print("mergePairs") | |
| 51 merged <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev) | 73 merged <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev) |
| 52 saveRDS(merged, file='mergePairs_F3D0.Rdata') | 74 for( id in sample.names ){ |
| 75 saveRDS(merged[[id]], file=paste("mergePairs_", id,".Rdata", sep="")) | |
| 76 } | |
| 77 | |
| 53 | 78 |
| 54 # make sequence table | 79 # make sequence table |
| 80 print("makeSequenceTable") | |
| 55 seqtab <- makeSequenceTable(merged) | 81 seqtab <- makeSequenceTable(merged) |
| 56 write.table(t(seqtab), file="makeSequenceTable_F3D0.tab", quote=F, sep="\t", row.names = T, col.names = NA) | 82 write.table(t(seqtab), file="makeSequenceTable.tab", quote=F, sep="\t", row.names = T, col.names = NA) |
| 57 | 83 |
| 58 reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) | 84 reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) |
| 59 df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen) | 85 df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen) |
| 60 pdf( 'makeSequenceTable_F3D0.pdf' ) | 86 pdf( 'makeSequenceTable.pdf' ) |
| 61 ggplot(data=df, aes(x=length, y=count)) + | 87 ggplot(data=df, aes(x=length, y=count)) + |
| 62 geom_col() + | 88 geom_col() + |
| 63 theme_bw() | 89 theme_bw() |
| 64 bequiet <- dev.off() | 90 bequiet <- dev.off() |
| 65 | 91 |
| 66 # remove bimera | 92 # remove bimera |
| 93 print("removeBimera") | |
| 67 seqtab.nochim <- removeBimeraDenovo(seqtab) | 94 seqtab.nochim <- removeBimeraDenovo(seqtab) |
| 68 write.table(t(seqtab), file="removeBimeraDenovo_F3D0.tab", quote=F, sep="\t", row.names = T, col.names = NA) | 95 write.table(t(seqtab), file="removeBimeraDenovo.tab", quote=F, sep="\t", row.names = T, col.names = NA) |
| 69 | 96 |
| 70 # assign taxonomy/species | 97 # assign taxonomy/species |
| 71 tl <- 'Level1,Level2,Level3,Level4,Level5' | 98 tl <- 'Level1,Level2,Level3,Level4,Level5' |
| 72 tl <- strsplit(tl, ",")[[1]] | 99 tl <- strsplit(tl, ",")[[1]] |
| 73 | 100 |
| 74 taxa <- assignTaxonomy(seqtab.nochim, 'reference.fa', outputBootstraps = T, taxLevels=c('Level1','Level2','Level3','Level4','Level5')) | 101 set.seed(42) |
| 102 print("assignTaxonomyAndSpecies") | |
| 103 taxa <- assignTaxonomy(seqtab.nochim, 'reference.fa.gz', outputBootstraps = T, taxLevels=tl, multithread = 1) | |
| 75 | 104 |
| 76 taxa$tax <- addSpecies(taxa$tax, 'reference_species.fa') | 105 taxa$tax <- addSpecies(taxa$tax, 'reference_species.fa.gz') |
| 77 write.table(taxa$tax, file = 'assignTaxonomyAddspecies_F3D0.tab', quote = F, sep = "\t", row.names = T, col.names = NA) | 106 write.table(taxa$tax, file = 'assignTaxonomyAddspecies.tab', quote = F, sep = "\t", row.names = T, col.names = NA) |
| 78 | 107 |
| 79 write.table(taxa$boot, file = 'assignTaxonomyAddspecies_F3D0_boot.tab', quote = F, sep = "\t", row.names = T, col.names = NA) | 108 write.table(taxa$boot, file = 'assignTaxonomyAddspecies_boot.tab', quote = F, sep = "\t", row.names = T, col.names = NA) |
| 80 | |
| 81 | 109 |
| 82 | 110 |
| 83 ## Generate extra test data for parameter testing | 111 ## Generate extra test data for parameter testing |
| 112 print("alternatives") | |
| 113 filterAndTrim(fwd, c('filterAndTrim_single_F3D0_R1.fq.gz', 'filterAndTrim_single_F3D141_R1.fq.gz'), rm.phix = T, orient.fwd = 'TACGG') | |
| 84 | 114 |
| 85 filterAndTrim(fwd, c('filterAndTrim_single_F3D0_R1.fq.gz'), rm.phix = T, orient.fwd = 'TACGG') | 115 filterAndTrim(fwd, c('filterAndTrim_single_trimmers_F3D0_R1.fq.gz', 'filterAndTrim_single_trimmers_F3D141_R1.fq.gz'), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2) |
| 86 | 116 |
| 87 filterAndTrim(fwd, c('filterAndTrim_single_trimmers_F3D0_R1.fq.gz'), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2) | 117 filterAndTrim(fwd, c('filterAndTrim_single_filters_F3D0_R1.fq.gz', 'filterAndTrim_single_filters_F3D141_R1.fq.gz'), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) |
| 88 | |
| 89 filterAndTrim(fwd, c('filterAndTrim_single_filters_F3D0_R1.fq.gz'), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) | |
| 90 | 118 |
| 91 | 119 |
| 92 merged_nondef <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) | 120 merged_nondef <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) |
| 93 saveRDS(merged_nondef, file='mergePairs_F3D0_nondefault.Rdata') | 121 for( id in sample.names ){ |
| 94 | 122 saveRDS(merged_nondef[[id]], file=paste("mergePairs_", id,"_nondefault.Rdata", sep="")) |
| 95 rb.dada.fwd <- removeBimeraDenovo(dada.fwd) | 123 } |
| 124 rb.dada.fwd <- removeBimeraDenovo(dada.fwd[["F3D0_S188_L001"]]) | |
| 96 write.table(rb.dada.fwd, file = 'removeBimeraDenovo_F3D0_dada_uniques.tab', quote = F, sep = "\t", row.names = T, col.names = F) | 125 write.table(rb.dada.fwd, file = 'removeBimeraDenovo_F3D0_dada_uniques.tab', quote = F, sep = "\t", row.names = T, col.names = F) |
| 97 | 126 |
| 98 rb.merged <- removeBimeraDenovo(merged, method="pooled") | 127 rb.merged <- removeBimeraDenovo(merged, method="pooled") |
| 99 saveRDS(rb.merged, file='removeBimeraDenovo_F3D0_mergepairs.Rdata') | 128 saveRDS(rb.merged, file='removeBimeraDenovo_F3D0_mergepairs.Rdata') |
| 129 | |
| 130 # SeqCounts | |
| 131 getN <- function(x){ sum(getUniques(x)) } | |
| 132 | |
| 133 read.uniques <- function ( fname ) { | |
| 134 p <- read.table(fname, header=F, sep="\t") | |
| 135 n <-x[,2] | |
| 136 names(n)<-x[,1] | |
| 137 } | |
| 138 | |
| 139 | |
| 140 print("seqCounts ft") | |
| 141 samples = list() | |
| 142 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header=T, sep="\t", row.names=1) | |
| 143 dname <- "filter" | |
| 144 tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] | |
| 145 names(tdf) <- paste( dname, names(tdf) ) | |
| 146 tdf <- cbind( data.frame(samples=names( samples )), tdf) | |
| 147 write.table(tdf, "seqCounts_filter.tab", quote=F, sep="\t", row.names = F, col.names = T) | |
| 148 | |
| 149 samples = list() | |
| 150 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header=T, sep="\t", row.names=1) | |
| 151 samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header=T, sep="\t", row.names=1) | |
| 152 dname <- "filter" | |
| 153 tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] | |
| 154 tdf <- rbind(tdf, samples[["F3D141_S207_L001_R1_001.tab"]]) | |
| 155 names(tdf) <- paste( dname, names(tdf) ) | |
| 156 tdf <- cbind( data.frame(samples=names( samples )), tdf) | |
| 157 write.table(tdf, "seqCounts_filter_both.tab", quote=F, sep="\t", row.names = F, col.names = T) | |
| 158 | |
| 159 print("seqCounts dada") | |
| 160 samples = list() | |
| 161 samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS('dada_F3D0_S188_L001_R1.Rdata') | |
| 162 samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS('dada_F3D141_S207_L001_R1.Rdata') | |
| 163 dname <- "dadaF" | |
| 164 tdf <- data.frame( samples = names(samples) ) | |
| 165 tdf[[ dname ]] <- sapply(samples, getN) | |
| 166 write.table(tdf, "seqCounts_dadaF.tab", quote=F, sep="\t", row.names = F, col.names = T) | |
| 167 | |
| 168 print("seqCounts mp") | |
| 169 samples = list() | |
| 170 samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS('mergePairs_F3D0_S188_L001.Rdata') | |
| 171 samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS('mergePairs_F3D141_S207_L001.Rdata') | |
| 172 dname <- "merge" | |
| 173 tdf <- data.frame( samples = names(samples) ) | |
| 174 tdf[[ dname ]] <- sapply(samples, getN) | |
| 175 write.table(tdf, "seqCounts_merge.tab", quote=F, sep="\t", row.names = F, col.names = T) | |
| 176 | |
| 177 print("seqCounts st") | |
| 178 samples = list() | |
| 179 samples <- t(as.matrix( read.table("makeSequenceTable.tab", header=T, sep="\t", row.names=1) )) | |
| 180 dname <- "seqtab" | |
| 181 tdf <- data.frame( samples = row.names(samples) ) | |
| 182 tdf[[ dname ]] <- rowSums(samples) | |
| 183 write.table(tdf, "seqCounts_seqtab.tab", quote=F, sep="\t", row.names = F, col.names = T) | |
| 184 | |
| 185 print("seqCounts rb") | |
| 186 samples = list() | |
| 187 samples <- t(as.matrix( read.table("removeBimeraDenovo.tab", header=T, sep="\t", row.names=1) )) | |
| 188 dname <- "nochim" | |
| 189 tdf <- data.frame( samples = row.names(samples) ) | |
| 190 tdf[[ dname ]] <- rowSums(samples) | |
| 191 write.table(tdf, "seqCounts_nochim.tab", quote=F, sep="\t", row.names = F, col.names = T) | |
| 192 |
