comparison RScript.r @ 8:e2972f0935e9 draft

Uploaded
author davidvanzessen
date Wed, 26 Mar 2014 11:55:07 -0400
parents 18ac92a69ef1
children 06777331fbd8
comparison
equal deleted inserted replaced
7:1d0ed31089c6 8:e2972f0935e9
132 VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")]) 132 VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")])
133 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) 133 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample])
134 VGenes = merge(VGenes, TotalPerSample, by="Sample") 134 VGenes = merge(VGenes, TotalPerSample, by="Sample")
135 VGenes$Frequency = VGenes$Count * 100 / VGenes$total 135 VGenes$Frequency = VGenes$Count * 100 / VGenes$total
136 VPlot = ggplot(VGenes) 136 VPlot = ggplot(VGenes)
137 VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 137 VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
138 ggtitle("Distribution of V gene families") +
139 ylab("Percentage of sequences")
138 png("VFPlot.png") 140 png("VFPlot.png")
139 VPlot 141 VPlot
140 dev.off(); 142 dev.off();
141 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 143 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
142 144
145 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) 147 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")])
146 TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample]) 148 TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample])
147 DGenes = merge(DGenes, TotalPerSample, by="Sample") 149 DGenes = merge(DGenes, TotalPerSample, by="Sample")
148 DGenes$Frequency = DGenes$Count * 100 / DGenes$total 150 DGenes$Frequency = DGenes$Count * 100 / DGenes$total
149 DPlot = ggplot(DGenes) 151 DPlot = ggplot(DGenes)
150 DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 152 DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
153 ggtitle("Distribution of D gene families") +
154 ylab("Percentage of sequences")
151 png("DFPlot.png") 155 png("DFPlot.png")
152 DPlot 156 DPlot
153 dev.off(); 157 dev.off();
154 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 158 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
155 159
158 JGenes = data.frame(data.table(JGenes)[, list(Count=.N), by=c("Sample", "Top.J.Gene")]) 162 JGenes = data.frame(data.table(JGenes)[, list(Count=.N), by=c("Sample", "Top.J.Gene")])
159 TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample]) 163 TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample])
160 JGenes = merge(JGenes, TotalPerSample, by="Sample") 164 JGenes = merge(JGenes, TotalPerSample, by="Sample")
161 JGenes$Frequency = JGenes$Count * 100 / JGenes$total 165 JGenes$Frequency = JGenes$Count * 100 / JGenes$total
162 JPlot = ggplot(JGenes) 166 JPlot = ggplot(JGenes)
163 JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 167 JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
168 ggtitle("Distribution of J gene families") +
169 ylab("Percentage of sequences")
164 png("JFPlot.png") 170 png("JFPlot.png")
165 JPlot 171 JPlot
166 dev.off(); 172 dev.off();
167 write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 173 write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
174
175 CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")])
176 TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample])
177 CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample")
178 CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total
179 CDR3LengthPlot = ggplot(CDR3Length)
180 CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
181 ggtitle("Length distribution of CDR3") +
182 xlab("CDR3 Length") +
183 ylab("Percentage of sequences")
184 png("CDR3LengthPlot.png",width = 1280, height = 720)
185 CDR3LengthPlot
186 dev.off()
187 write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T)
168 188
169 revVchain = Vchain 189 revVchain = Vchain
170 revDchain = Dchain 190 revDchain = Dchain
171 revVchain$chr.orderV = rev(revVchain$chr.orderV) 191 revVchain$chr.orderV = rev(revVchain$chr.orderV)
172 revDchain$chr.orderD = rev(revDchain$chr.orderD) 192 revDchain$chr.orderD = rev(revDchain$chr.orderD)