Mercurial > repos > davidvanzessen > report_clonality_igg
comparison RScript.r @ 8:e2972f0935e9 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 26 Mar 2014 11:55:07 -0400 |
| parents | 18ac92a69ef1 |
| children | 06777331fbd8 |
comparison
equal
deleted
inserted
replaced
| 7:1d0ed31089c6 | 8:e2972f0935e9 |
|---|---|
| 132 VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")]) | 132 VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")]) |
| 133 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) | 133 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) |
| 134 VGenes = merge(VGenes, TotalPerSample, by="Sample") | 134 VGenes = merge(VGenes, TotalPerSample, by="Sample") |
| 135 VGenes$Frequency = VGenes$Count * 100 / VGenes$total | 135 VGenes$Frequency = VGenes$Count * 100 / VGenes$total |
| 136 VPlot = ggplot(VGenes) | 136 VPlot = ggplot(VGenes) |
| 137 VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 137 VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 138 ggtitle("Distribution of V gene families") + | |
| 139 ylab("Percentage of sequences") | |
| 138 png("VFPlot.png") | 140 png("VFPlot.png") |
| 139 VPlot | 141 VPlot |
| 140 dev.off(); | 142 dev.off(); |
| 141 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 143 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) |
| 142 | 144 |
| 145 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) | 147 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) |
| 146 TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample]) | 148 TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample]) |
| 147 DGenes = merge(DGenes, TotalPerSample, by="Sample") | 149 DGenes = merge(DGenes, TotalPerSample, by="Sample") |
| 148 DGenes$Frequency = DGenes$Count * 100 / DGenes$total | 150 DGenes$Frequency = DGenes$Count * 100 / DGenes$total |
| 149 DPlot = ggplot(DGenes) | 151 DPlot = ggplot(DGenes) |
| 150 DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 152 DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 153 ggtitle("Distribution of D gene families") + | |
| 154 ylab("Percentage of sequences") | |
| 151 png("DFPlot.png") | 155 png("DFPlot.png") |
| 152 DPlot | 156 DPlot |
| 153 dev.off(); | 157 dev.off(); |
| 154 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 158 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) |
| 155 | 159 |
| 158 JGenes = data.frame(data.table(JGenes)[, list(Count=.N), by=c("Sample", "Top.J.Gene")]) | 162 JGenes = data.frame(data.table(JGenes)[, list(Count=.N), by=c("Sample", "Top.J.Gene")]) |
| 159 TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample]) | 163 TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample]) |
| 160 JGenes = merge(JGenes, TotalPerSample, by="Sample") | 164 JGenes = merge(JGenes, TotalPerSample, by="Sample") |
| 161 JGenes$Frequency = JGenes$Count * 100 / JGenes$total | 165 JGenes$Frequency = JGenes$Count * 100 / JGenes$total |
| 162 JPlot = ggplot(JGenes) | 166 JPlot = ggplot(JGenes) |
| 163 JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 167 JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 168 ggtitle("Distribution of J gene families") + | |
| 169 ylab("Percentage of sequences") | |
| 164 png("JFPlot.png") | 170 png("JFPlot.png") |
| 165 JPlot | 171 JPlot |
| 166 dev.off(); | 172 dev.off(); |
| 167 write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 173 write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) |
| 174 | |
| 175 CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")]) | |
| 176 TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) | |
| 177 CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") | |
| 178 CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total | |
| 179 CDR3LengthPlot = ggplot(CDR3Length) | |
| 180 CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
| 181 ggtitle("Length distribution of CDR3") + | |
| 182 xlab("CDR3 Length") + | |
| 183 ylab("Percentage of sequences") | |
| 184 png("CDR3LengthPlot.png",width = 1280, height = 720) | |
| 185 CDR3LengthPlot | |
| 186 dev.off() | |
| 187 write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T) | |
| 168 | 188 |
| 169 revVchain = Vchain | 189 revVchain = Vchain |
| 170 revDchain = Dchain | 190 revDchain = Dchain |
| 171 revVchain$chr.orderV = rev(revVchain$chr.orderV) | 191 revVchain$chr.orderV = rev(revVchain$chr.orderV) |
| 172 revDchain$chr.orderD = rev(revDchain$chr.orderD) | 192 revDchain$chr.orderD = rev(revDchain$chr.orderD) |
