annotate from_imgt.r @ 0:5560672b1ca4 draft default tip

Uploaded
author davidvanzessen
date Fri, 24 Jul 2015 04:44:39 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
1 library(data.table)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
2
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
3 args <- commandArgs(trailingOnly = TRUE)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
4
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
5 infile="D:/wd/prisca/Mouse data Groningen July 2015/JIVFXVQ01_MAAIKE_1_PB_IGH_MID8_10nt_trimmed/1_Summary.txt"
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
6 patient="JIVFXVQ01"
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
7 sample="sample1"
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
8 cell.count=10000
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
9 receptor="IgH"
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
10 output="D:/wd/prisca/mousetest.txt"
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
11
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
12 infile=args[1]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
13 patient=args[2]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
14 sample=args[3]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
15 cell.count=args[4]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
16 receptor=args[5]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
17 output=args[6]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
18
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
19 dat = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = dat[,c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
21
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
22 dat = dat[dat$V.GENE.and.allele != "",]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
23 dat = dat[dat$J.GENE.and.allele != "",]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
24 dat = dat[dat$Sequence != "",]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
25
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
26 dat$V.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$V.GENE.and.allele), ", "), "[[", 1)))
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
27 dat$J.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$J.GENE.and.allele), ", "), "[[", 1)))
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
28
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
29 dat$V.GENE.and.allele = gsub("Homsap ", "", dat$V.GENE.and.allele)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
30 dat$V.GENE.and.allele = gsub("\\*.*", "", dat$V.GENE.and.allele)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
31
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
32 dat$J.GENE.and.allele = gsub("Homsap ", "", dat$J.GENE.and.allele)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
33 dat$J.GENE.and.allele = gsub("\\*.*", "", dat$J.GENE.and.allele)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
34
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
35 dat = data.frame(data.table(dat)[, list(Clone_Molecule_Count_From_Spikes=.N), by=c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")])
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
36
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
37 dat = dat[order(-dat$Clone_Molecule_Count_From_Spikes),]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
38 dat$perc = 100 / nrow(dat) * dat$Clone_Molecule_Count_From_Spikes
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
39
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
40 dat$Log10_Frequency = log10(dat$perc / 100)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
41
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
42 dat$Patient = patient
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
43 dat$Sample = sample
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
44 dat$Receptor = receptor
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
45 dat$Cell_Count = cell.count
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
46 dat$Total_Read_Count = dat$Clone_Molecule_Count_From_Spikes
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
47 dat$Related_to_leukemia_clone = F
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
48
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
49 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V.GENE.and.allele", "J.GENE.and.allele", "Sequence" ,"AA.JUNCTION", "Related_to_leukemia_clone")]
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
50
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
51 names(dat) = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "Clone_Sequence" ,"CDR3_Sense_Sequence", "Related_to_leukemia_clone")
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
52
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
53 write.table(dat, output, quote=F, sep="\t", na="", dec=".", row.names=F, col.names=F)
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
54
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
55 output
5560672b1ca4 Uploaded
davidvanzessen
parents:
diff changeset
56