Mercurial > repos > davidvanzessen > from_imgt_clonal_pairs
comparison from_imgt.r @ 0:5560672b1ca4 draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 24 Jul 2015 04:44:39 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5560672b1ca4 |
---|---|
1 library(data.table) | |
2 | |
3 args <- commandArgs(trailingOnly = TRUE) | |
4 | |
5 infile="D:/wd/prisca/Mouse data Groningen July 2015/JIVFXVQ01_MAAIKE_1_PB_IGH_MID8_10nt_trimmed/1_Summary.txt" | |
6 patient="JIVFXVQ01" | |
7 sample="sample1" | |
8 cell.count=10000 | |
9 receptor="IgH" | |
10 output="D:/wd/prisca/mousetest.txt" | |
11 | |
12 infile=args[1] | |
13 patient=args[2] | |
14 sample=args[3] | |
15 cell.count=args[4] | |
16 receptor=args[5] | |
17 output=args[6] | |
18 | |
19 dat = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F) | |
20 dat = dat[,c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")] | |
21 | |
22 dat = dat[dat$V.GENE.and.allele != "",] | |
23 dat = dat[dat$J.GENE.and.allele != "",] | |
24 dat = dat[dat$Sequence != "",] | |
25 | |
26 dat$V.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$V.GENE.and.allele), ", "), "[[", 1))) | |
27 dat$J.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$J.GENE.and.allele), ", "), "[[", 1))) | |
28 | |
29 dat$V.GENE.and.allele = gsub("Homsap ", "", dat$V.GENE.and.allele) | |
30 dat$V.GENE.and.allele = gsub("\\*.*", "", dat$V.GENE.and.allele) | |
31 | |
32 dat$J.GENE.and.allele = gsub("Homsap ", "", dat$J.GENE.and.allele) | |
33 dat$J.GENE.and.allele = gsub("\\*.*", "", dat$J.GENE.and.allele) | |
34 | |
35 dat = data.frame(data.table(dat)[, list(Clone_Molecule_Count_From_Spikes=.N), by=c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")]) | |
36 | |
37 dat = dat[order(-dat$Clone_Molecule_Count_From_Spikes),] | |
38 dat$perc = 100 / nrow(dat) * dat$Clone_Molecule_Count_From_Spikes | |
39 | |
40 dat$Log10_Frequency = log10(dat$perc / 100) | |
41 | |
42 dat$Patient = patient | |
43 dat$Sample = sample | |
44 dat$Receptor = receptor | |
45 dat$Cell_Count = cell.count | |
46 dat$Total_Read_Count = dat$Clone_Molecule_Count_From_Spikes | |
47 dat$Related_to_leukemia_clone = F | |
48 | |
49 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V.GENE.and.allele", "J.GENE.and.allele", "Sequence" ,"AA.JUNCTION", "Related_to_leukemia_clone")] | |
50 | |
51 names(dat) = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "Clone_Sequence" ,"CDR3_Sense_Sequence", "Related_to_leukemia_clone") | |
52 | |
53 write.table(dat, output, quote=F, sep="\t", na="", dec=".", row.names=F, col.names=F) | |
54 | |
55 output | |
56 |