annotate exomedepth.R @ 9:a73cf7ce47e7 draft default tip

Uploaded
author crs4
date Thu, 16 Mar 2017 12:41:03 -0400
parents 5a3ac3ff6062
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
1 # Load ExomeDepth library (without warnings)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
2 suppressMessages(library(ExomeDepth))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
3
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
4 # Import parameters
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
5 args <- commandArgs(trailingOnly=TRUE)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
6 eval(parse(text=args[[1]]))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
7 param <- read.table(mypars,sep="=", as.is=TRUE)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
8
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
9 # Set common parameters
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
10 target <- param[match("target",param[,1]),2]
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
11 trans_prob <- as.numeric(param[match("trans_prob",param[,1]),2])
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
12 output <- param[match("output",param[,1]),2]
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
13 test_vs_ref <- as.logical(param[match("test_vs_ref",param[,1]),2])
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
14
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
15 # Create symbolic links for multiple bam and bai
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
16 bam <- param[param[,1]=="bam",2]
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
17 bam_bai <- param[param[,1]=="bam_bai",2]
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
18 bam_label <- param[param[,1]=="bam_label",2]
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
19 bam_label <- gsub(" ", "_", bam_label)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
20
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
21 for(i in 1:length(bam)){
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
22 stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep=".")))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
23 stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep=".")))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
24 }
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
25
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
26 # Generate read count data
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
27 BAMFiles <- paste(bam_label, "bam", sep=".")
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
28 sink("/dev/null")
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
29 ExomeCount <- suppressMessages(getBamCounts(bed.file=target, bam.files = BAMFiles))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
30 sink()
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
31
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
32 # Convert counts in a data frame
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
33 ExomeCount.dafr <- as(ExomeCount[, colnames(ExomeCount)], 'data.frame')
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
34
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
35 # Prepare the main matrix of read count data
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
36 ExomeCount.mat <- as.matrix(ExomeCount.dafr[, grep(names(ExomeCount.dafr), pattern='.bam')])
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
37
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
38 # Remove .bam from sample name
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
39 colnames(ExomeCount.mat) <- gsub(".bam", "", colnames(ExomeCount.mat))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
40
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
41 # Set nsamples == 1 if mode is test vs reference, assuming test is sample 1
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
42 nsamples <- ifelse(test_vs_ref, 1, ncol(ExomeCount.mat))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
43
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
44 # Loop over samples
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
45 for (i in 1:nsamples){
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
46
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
47 # Create the aggregate reference set for this sample
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
48 my.choice <- suppressWarnings(suppressMessages(
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
49 select.reference.set(test.counts = ExomeCount.mat[,i],
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
50 reference.counts = subset(ExomeCount.mat, select=-i),
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
51 bin.length = (ExomeCount.dafr$end - ExomeCount.dafr$start)/1000,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
52 n.bins.reduced = 10000)))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
53
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
54 my.reference.selected <- apply(X = ExomeCount.mat[, my.choice$reference.choice, drop=FALSE], MAR = 1, FUN = sum)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
55
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
56 # Now create the ExomeDepth object for the CNVs call
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
57 all.exons<-suppressWarnings(suppressMessages(new('ExomeDepth',
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
58 test = ExomeCount.mat[,i],
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
59 reference = my.reference.selected,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
60 formula = 'cbind(test,reference)~1')))
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
61
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
62
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
63 # Now call the CNVs
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
64 result <- try(all.exons<-suppressMessages(CallCNVs(x=all.exons,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
65 transition.probability = trans_prob,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
66 chromosome = ExomeCount.dafr$space,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
67 start = ExomeCount.dafr$start,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
68 end = ExomeCount.dafr$end,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
69 name = ExomeCount.dafr$names)), silent=T)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
70
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
71 # Next if CNVs are not detected
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
72 if (class(result)=="try-error"){
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
73 next
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
74 }
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
75
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
76 # Compute correlation between ref and test
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
77 my.cor <- cor(all.exons@reference, all.exons@test)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
78 n.call <- nrow(all.exons@CNV.calls)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
79
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
80 # Write results
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
81 my.results <- cbind(all.exons@CNV.calls[,c(7,5,6,3)],
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
82 sample=colnames(ExomeCount.mat)[i],
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
83 corr=my.cor,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
84 all.exons@CNV.calls[,c(4,9,12)])
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
85
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
86 # Re-order by chr and position
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
87 chrOrder<-c(paste("chr",1:22,sep=""),"chrX","chrY","chrM")
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
88 my.results[,1] <- factor(my.results[,1], levels=chrOrder)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
89 my.results <- my.results[order(my.results[,1], my.results[,2], my.results[,3]),]
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
90
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
91 write.table(sep='\t', quote=FALSE, file = output,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
92 x = my.results,
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
93 row.names = FALSE, col.names = FALSE, dec=".", append=TRUE)
5a3ac3ff6062 Uploaded
crs4
parents:
diff changeset
94 }