# HG changeset patch
# User davidvanzessen
# Date 1470741641 14400
# Node ID e7b550d52eb731942b593a714515f473ecb436b3
# Parent b84477f57318b12081fd22c8b75638c6346cee99
Uploaded
diff -r b84477f57318 -r e7b550d52eb7 baseline/Baseline_Functions.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/Baseline_Functions.r Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,2287 @@
+#########################################################################################
+# License Agreement
+#
+# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
+# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
+# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
+# OR COPYRIGHT LAW IS PROHIBITED.
+#
+# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
+# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
+# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
+# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
+#
+# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
+# Coded by: Mohamed Uduman & Gur Yaari
+# Copyright 2012 Kleinstein Lab
+# Version: 1.3 (01/23/2014)
+#########################################################################################
+
+# Global variables
+
+ FILTER_BY_MUTATIONS = 1000
+
+ # Nucleotides
+ NUCLEOTIDES = c("A","C","G","T")
+
+ # Amino Acids
+ AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
+ names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
+ names(AMINO_ACIDS) <- names(AMINO_ACIDS)
+
+ #Amino Acid Traits
+ #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
+ #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface")
+ TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
+ names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
+ TRAITS_AMINO_ACIDS <- array(NA,21)
+
+ # Codon Table
+ CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))
+
+ # Substitution Model: Smith DS et al. 1996
+ substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
+ substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
+ substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
+ load("FiveS_Substitution.RData")
+
+ # Mutability Models: Shapiro GS et al. 2002
+ triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
+ triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
+ triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
+ load("FiveS_Mutability.RData")
+
+# Functions
+
+ # Translate codon to amino acid
+ translateCodonToAminoAcid<-function(Codon){
+ return(AMINO_ACIDS[Codon])
+ }
+
+ # Translate amino acid to trait change
+ translateAminoAcidToTraitChange<-function(AminoAcid){
+ return(TRAITS_AMINO_ACIDS[AminoAcid])
+ }
+
+ # Initialize Amino Acid Trait Changes
+ initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
+ if(!is.null(traitChangeFileName)){
+ tryCatch(
+ traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
+ , error = function(ex){
+ cat("Error|Error reading trait changes. Please check file name/path and format.\n")
+ q()
+ }
+ )
+ }else{
+ traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
+ }
+ TRAITS_AMINO_ACIDS <<- traitChange
+ }
+
+ # Read in formatted nucleotide substitution matrix
+ initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
+ if(!is.null(subsMatFileName)){
+ tryCatch(
+ subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
+ , error = function(ex){
+ cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
+ q()
+ }
+ )
+ if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
+ }else{
+ if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
+ if(substitutionModel==2)subsMat <- substitution_Flu_Human
+ if(substitutionModel==3)subsMat <- substitution_Flu25_Human
+
+ }
+
+ if(substitutionModel==0){
+ subsMat <- matrix(1,4,4)
+ subsMat[,] = 1/3
+ subsMat[1,1] = 0
+ subsMat[2,2] = 0
+ subsMat[3,3] = 0
+ subsMat[4,4] = 0
+ }
+
+
+ NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
+ if(substitutionModel==5){
+ subsMat <- FiveS_Substitution
+ return(subsMat)
+ }else{
+ subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
+ return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
+ }
+ }
+
+
+ # Read in formatted Mutability file
+ initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
+ if(!is.null(mutabilityMatFileName)){
+ tryCatch(
+ mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
+ , error = function(ex){
+ cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
+ q()
+ }
+ )
+ }else{
+ mutabilityMat <- triMutability_Literature_Human
+ if(species==2) mutabilityMat <- triMutability_Literature_Mouse
+ }
+
+ if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}
+
+ if(mutabilityModel==5){
+ mutabilityMat <- FiveS_Mutability
+ return(mutabilityMat)
+ }else{
+ return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
+ }
+ }
+
+ # Read FASTA file formats
+ # Modified from read.fasta from the seqinR package
+ baseline.read.fasta <-
+ function (file = system.file("sequences/sample.fasta", package = "seqinr"),
+ seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE,
+ set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE,
+ strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong,
+ endian = .Platform$endian, apply.mask = TRUE)
+ {
+ seqtype <- match.arg(seqtype)
+
+ lines <- readLines(file)
+
+ if (legacy.mode) {
+ comments <- grep("^;", lines)
+ if (length(comments) > 0)
+ lines <- lines[-comments]
+ }
+
+
+ ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
+ lines_mod<-lines
+
+ if(!length(ind_groups)){
+ lines_mod<-c(">>>All sequences combined",lines)
+ }
+
+ ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")
+
+ lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
+ id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
+ lines[id] <- "THIS IS A FAKE SEQUENCE"
+ lines[-id] <- lines_mod
+ rm(lines_mod)
+
+ ind <- which(substr(lines, 1L, 1L) == ">")
+ nseq <- length(ind)
+ if (nseq == 0) {
+ stop("no line starting with a > character found")
+ }
+ start <- ind + 1
+ end <- ind - 1
+
+ while( any(which(ind%in%end)) ){
+ ind=ind[-which(ind%in%end)]
+ nseq <- length(ind)
+ if (nseq == 0) {
+ stop("no line starting with a > character found")
+ }
+ start <- ind + 1
+ end <- ind - 1
+ }
+
+ end <- c(end[-1], length(lines))
+ sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
+ if (seqonly)
+ return(sequences)
+ nomseq <- lapply(seq_len(nseq), function(i) {
+
+ #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
+ substr(lines[ind[i]], 2, nchar(lines[ind[i]]))
+
+ })
+ if (seqtype == "DNA") {
+ if (forceDNAtolower) {
+ sequences <- as.list(tolower(chartr(".","-",sequences)))
+ }else{
+ sequences <- as.list(toupper(chartr(".","-",sequences)))
+ }
+ }
+ if (as.string == FALSE)
+ sequences <- lapply(sequences, s2c)
+ if (set.attributes) {
+ for (i in seq_len(nseq)) {
+ Annot <- lines[ind[i]]
+ if (strip.desc)
+ Annot <- substr(Annot, 2L, nchar(Annot))
+ attributes(sequences[[i]]) <- list(name = nomseq[[i]],
+ Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA",
+ DNA = "SeqFastadna"))
+ }
+ }
+ names(sequences) <- nomseq
+ return(sequences)
+ }
+
+
+ # Replaces non FASTA characters in input files with N
+ replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
+ gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
+ }
+
+ # Find the germlines in the FASTA list
+ germlinesInFile <- function(seqIDs){
+ firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
+ secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
+ return(firstChar==">" & secondChar!=">")
+ }
+
+ # Find the groups in the FASTA list
+ groupsInFile <- function(seqIDs){
+ sapply(seqIDs,function(x){substr(x,1,2)})==">>"
+ }
+
+ # In the process of finding germlines/groups, expand from the start to end of the group
+ expandTillNext <- function(vecPosToID){
+ IDs = names(vecPosToID)
+ posOfInterests = which(vecPosToID)
+
+ expandedID = rep(NA,length(IDs))
+ expandedIDNames = gsub(">","",IDs[posOfInterests])
+ startIndexes = c(1,posOfInterests[-1])
+ stopIndexes = c(posOfInterests[-1]-1,length(IDs))
+ expandedID = unlist(sapply(1:length(startIndexes),function(i){
+ rep(i,stopIndexes[i]-startIndexes[i]+1)
+ }))
+ names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
+ rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
+ }))
+ return(expandedID)
+ }
+
+ # Process FASTA (list) to return a matrix[input, germline)
+ processInputAdvanced <- function(inputFASTA){
+
+ seqIDs = names(inputFASTA)
+ numbSeqs = length(seqIDs)
+ posGermlines1 = germlinesInFile(seqIDs)
+ numbGermlines = sum(posGermlines1)
+ posGroups1 = groupsInFile(seqIDs)
+ numbGroups = sum(posGroups1)
+ consDef = NA
+
+ if(numbGermlines==0){
+ posGermlines = 2
+ numbGermlines = 1
+ }
+
+ glPositionsSum = cumsum(posGermlines1)
+ glPositions = table(glPositionsSum)
+ #Find the position of the conservation row
+ consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1
+ if( length(consDefPos)> 0 ){
+ consDefID = match(consDefPos, glPositionsSum)
+ #The coservation rows need to be pulled out and stores seperately
+ consDef = inputFASTA[consDefID]
+ inputFASTA = inputFASTA[-consDefID]
+
+ seqIDs = names(inputFASTA)
+ numbSeqs = length(seqIDs)
+ posGermlines1 = germlinesInFile(seqIDs)
+ numbGermlines = sum(posGermlines1)
+ posGroups1 = groupsInFile(seqIDs)
+ numbGroups = sum(posGroups1)
+ if(numbGermlines==0){
+ posGermlines = 2
+ numbGermlines = 1
+ }
+ }
+
+ posGroups <- expandTillNext(posGroups1)
+ posGermlines <- expandTillNext(posGermlines1)
+ posGermlines[posGroups1] = 0
+ names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
+ posInput = rep(TRUE,numbSeqs)
+ posInput[posGroups1 | posGermlines1] = FALSE
+
+ matInput = matrix(NA, nrow=sum(posInput), ncol=2)
+ rownames(matInput) = seqIDs[posInput]
+ colnames(matInput) = c("Input","Germline")
+
+ vecInputFASTA = unlist(inputFASTA)
+ matInput[,1] = vecInputFASTA[posInput]
+ matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]
+
+ germlines = posGermlines[posInput]
+ groups = posGroups[posInput]
+
+ return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))
+ }
+
+
+ # Replace leading and trailing dashes in the sequence
+ replaceLeadingTrailingDashes <- function(x,readEnd){
+ iiGap = unlist(gregexpr("-",x[1]))
+ ggGap = unlist(gregexpr("-",x[2]))
+ #posToChange = intersect(iiGap,ggGap)
+
+
+ seqIn = replaceLeadingTrailingDashesHelper(x[1])
+ seqGL = replaceLeadingTrailingDashesHelper(x[2])
+ seqTemplate = rep('N',readEnd)
+ seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
+ seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
+# if(posToChange!=-1){
+# seqIn[posToChange] = "-"
+# seqGL[posToChange] = "-"
+# }
+
+ seqIn = c2s(seqIn[1:readEnd])
+ seqGL = c2s(seqGL[1:readEnd])
+
+ lenGL = nchar(seqGL)
+ if(lenGL seqLen )
+ trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )
+
+ return(trimmedSeq)
+ }
+
+ # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
+ # e.g. nuc 86 is part of nucs 85,86,87
+ getCodonPos <- function(nucPos){
+ codonNum = (ceiling(nucPos/3))*3
+ return( (codonNum-2):codonNum)
+ }
+
+ # Given a nuclotide position, returns the codon number
+ # e.g. nuc 86 = codon 29
+ getCodonNumb <- function(nucPos){
+ return( ceiling(nucPos/3) )
+ }
+
+ # Given a codon, returns all the nuc positions that make the codon
+ getCodonNucs <- function(codonNumb){
+ getCodonPos(codonNumb*3)
+ }
+
+ computeCodonTable <- function(testID=1){
+
+ if(testID<=4){
+ # Pre-compute every codons
+ intCounter = 1
+ for(pOne in NUCLEOTIDES){
+ for(pTwo in NUCLEOTIDES){
+ for(pThree in NUCLEOTIDES){
+ codon = paste(pOne,pTwo,pThree,sep="")
+ colnames(CODON_TABLE)[intCounter] = codon
+ intCounter = intCounter + 1
+ CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
+ }
+ }
+ }
+ chars = c("N","A","C","G","T", "-")
+ for(a in chars){
+ for(b in chars){
+ for(c in chars){
+ if(a=="N" | b=="N" | c=="N"){
+ #cat(paste(a,b,c),sep="","\n")
+ CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
+ }
+ }
+ }
+ }
+
+ chars = c("-","A","C","G","T")
+ for(a in chars){
+ for(b in chars){
+ for(c in chars){
+ if(a=="-" | b=="-" | c=="-"){
+ #cat(paste(a,b,c),sep="","\n")
+ CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
+ }
+ }
+ }
+ }
+ CODON_TABLE <<- as.matrix(CODON_TABLE)
+ }
+ }
+
+ collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
+ #print(length(vecInputSeqs))
+ vecInputSeqs = unique(vecInputSeqs)
+ if(length(vecInputSeqs)==1){
+ return( list( c(vecInputSeqs,glSeq), F) )
+ }else{
+ charInputSeqs <- sapply(vecInputSeqs, function(x){
+ s2c(x)[1:readEnd]
+ })
+ charGLSeq <- s2c(glSeq)
+ matClone <- sapply(1:readEnd, function(i){
+ posNucs = unique(charInputSeqs[i,])
+ posGL = charGLSeq[i]
+ error = FALSE
+ if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
+ return(c("-",error))
+ }
+ if(length(posNucs)==1)
+ return(c(posNucs[1],error))
+ else{
+ if("N"%in%posNucs){
+ error=TRUE
+ }
+ if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
+ return( c(posGL,error) )
+ }else{
+ #return( c(sample(posNucs[posNucs!="N"],1),error) )
+ if(nonTerminalOnly==0){
+ return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )
+ }else{
+ posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
+ posNucsTable = table(posNucs)
+ if(sum(posNucsTable>1)==0){
+ return( c(posGL,error) )
+ }else{
+ return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
+ }
+ }
+
+ }
+ }
+ })
+
+
+ #print(length(vecInputSeqs))
+ return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
+ }
+ }
+
+ # Compute the expected for each sequence-germline pair
+ getExpectedIndividual <- function(matInput){
+ if( any(grep("multicore",search())) ){
+ facGL <- factor(matInput[,2])
+ facLevels = levels(facGL)
+ LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){
+ computeMutabilities(facLevels[x])
+ })
+ facIndex = match(facGL,facLevels)
+
+ LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){
+ cInput = rep(NA,nchar(matInput[x,1]))
+ cInput[s2c(matInput[x,1])!="N"] = 1
+ LisGLs_MutabilityU[[facIndex[x]]] * cInput
+ })
+
+ LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){
+ computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+ })
+
+ LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){
+ #print(x)
+ computeMutationTypes(matInput[x,2])
+ })
+
+ LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){
+ computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
+ })
+
+ ul_LisGLs_Exp = unlist(LisGLs_Exp)
+ return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
+ }else{
+ facGL <- factor(matInput[,2])
+ facLevels = levels(facGL)
+ LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){
+ computeMutabilities(facLevels[x])
+ })
+ facIndex = match(facGL,facLevels)
+
+ LisGLs_Mutability = lapply(1:nrow(matInput), function(x){
+ cInput = rep(NA,nchar(matInput[x,1]))
+ cInput[s2c(matInput[x,1])!="N"] = 1
+ LisGLs_MutabilityU[[facIndex[x]]] * cInput
+ })
+
+ LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){
+ computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+ })
+
+ LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){
+ #print(x)
+ computeMutationTypes(matInput[x,2])
+ })
+
+ LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){
+ computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
+ })
+
+ ul_LisGLs_Exp = unlist(LisGLs_Exp)
+ return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
+
+ }
+ }
+
+ # Compute mutabilities of sequence based on the tri-nucleotide model
+ computeMutabilities <- function(paramSeq){
+ seqLen = nchar(paramSeq)
+ seqMutabilites = rep(NA,seqLen)
+
+ gaplessSeq = gsub("-", "", paramSeq)
+ gaplessSeqLen = nchar(gaplessSeq)
+ gaplessSeqMutabilites = rep(NA,gaplessSeqLen)
+
+ if(mutabilityModel!=5){
+ pos<- 3:(gaplessSeqLen)
+ subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
+ gaplessSeqMutabilites[pos] =
+ tapply( c(
+ getMutability( substr(subSeq,1,3), 3) ,
+ getMutability( substr(subSeq,2,4), 2),
+ getMutability( substr(subSeq,3,5), 1)
+ ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
+ )
+ #Pos 1
+ subSeq = substr(gaplessSeq,1,3)
+ gaplessSeqMutabilites[1] = getMutability(subSeq , 1)
+ #Pos 2
+ subSeq = substr(gaplessSeq,1,4)
+ gaplessSeqMutabilites[2] = mean( c(
+ getMutability( substr(subSeq,1,3), 2) ,
+ getMutability( substr(subSeq,2,4), 1)
+ ),na.rm=T
+ )
+ seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
+ return(seqMutabilites)
+ }else{
+
+ pos<- 3:(gaplessSeqLen)
+ subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
+ gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
+ seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
+ return(seqMutabilites)
+ }
+
+ }
+
+ # Returns the mutability of a triplet at a given position
+ getMutability <- function(codon, pos=1:3){
+ triplets <- rownames(mutability)
+ mutability[ match(codon,triplets) ,pos]
+ }
+
+ getMutability5 <- function(fivemer){
+ return(mutability[fivemer])
+ }
+
+ # Returns the substitution probabilty
+ getTransistionProb <- function(nuc){
+ substitution[nuc,]
+ }
+
+ getTransistionProb5 <- function(fivemer){
+ if(any(which(fivemer==colnames(substitution)))){
+ return(substitution[,fivemer])
+ }else{
+ return(array(NA,4))
+ }
+ }
+
+ # Given a nuc, returns the other 3 nucs it can mutate to
+ canMutateTo <- function(nuc){
+ NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
+ }
+
+ # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to
+ canMutateToProb <- function(nuc){
+ substitution[nuc,canMutateTo(nuc)]
+ }
+
+ # Compute targeting, based on precomputed mutatbility & substitution
+ computeTargeting <- function(param_strSeq,param_vecMutabilities){
+
+ if(substitutionModel!=5){
+ vecSeq = s2c(param_strSeq)
+ matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )
+ #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
+ dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq)))
+ return (matTargeting)
+ }else{
+
+ seqLen = nchar(param_strSeq)
+ seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
+ paramSeq <- param_strSeq
+ gaplessSeq = gsub("-", "", paramSeq)
+ gaplessSeqLen = nchar(gaplessSeq)
+ gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4)
+
+ pos<- 3:(gaplessSeqLen)
+ subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
+ gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
+ seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
+ #matTargeting <- param_vecMutabilities %*% seqsubstitution
+ matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
+ dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen))
+ return (matTargeting)
+ }
+ }
+
+ # Compute the mutations types
+ computeMutationTypes <- function(param_strSeq){
+ #cat(param_strSeq,"\n")
+ #vecSeq = trimToLastCodon(param_strSeq)
+ lenSeq = nchar(param_strSeq)
+ vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
+ matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
+ dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
+ return(matMutationTypes)
+ }
+ computeMutationTypesFast <- function(param_strSeq){
+ matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
+ #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq)))
+ return(matMutationTypes)
+ }
+ mutationTypeOptimized <- function( matOfCodons ){
+ apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } )
+ }
+
+ # Returns a vector of codons 1 mutation away from the given codon
+ permutateAllCodon <- function(codon){
+ cCodon = s2c(codon)
+ matCodons = t(array(cCodon,dim=c(3,12)))
+ matCodons[1:4,1] = NUCLEOTIDES
+ matCodons[5:8,2] = NUCLEOTIDES
+ matCodons[9:12,3] = NUCLEOTIDES
+ apply(matCodons,1,c2s)
+ }
+
+ # Given two codons, tells you if the mutation is R or S (based on your definition)
+ mutationType <- function(codonFrom,codonTo){
+ if(testID==4){
+ if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
+ return(NA)
+ }else{
+ mutationType = "S"
+ if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
+ mutationType = "R"
+ }
+ if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
+ mutationType = "Stop"
+ }
+ return(mutationType)
+ }
+ }else if(testID==5){
+ if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
+ return(NA)
+ }else{
+ if(codonFrom==codonTo){
+ mutationType = "S"
+ }else{
+ codonFrom = s2c(codonFrom)
+ codonTo = s2c(codonTo)
+ mutationType = "Stop"
+ nucOfI = codonFrom[which(codonTo!=codonFrom)]
+ if(nucOfI=="C"){
+ mutationType = "R"
+ }else if(nucOfI=="G"){
+ mutationType = "S"
+ }
+ }
+ return(mutationType)
+ }
+ }else{
+ if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
+ return(NA)
+ }else{
+ mutationType = "S"
+ if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
+ mutationType = "R"
+ }
+ if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
+ mutationType = "Stop"
+ }
+ return(mutationType)
+ }
+ }
+ }
+
+
+ #given a mat of targeting & it's corresponding mutationtypes returns
+ #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
+ computeExpected <- function(paramTargeting,paramMutationTypes){
+ # Replacements
+ RPos = which(paramMutationTypes=="R")
+ #FWR
+ Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
+ #CDR
+ Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
+ # Silents
+ SPos = which(paramMutationTypes=="S")
+ #FWR
+ Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
+ #CDR
+ Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)
+
+ return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
+ }
+
+ # Count the mutations in a sequence
+ # each mutation is treated independently
+ analyzeMutations2NucUri_website <- function( rev_in_matrix ){
+ paramGL = rev_in_matrix[2,]
+ paramSeq = rev_in_matrix[1,]
+
+ #Fill seq with GL seq if gapped
+ #if( any(paramSeq=="-") ){
+ # gapPos_Seq = which(paramSeq=="-")
+ # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
+ # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
+ #}
+
+
+ #if( any(paramSeq=="N") ){
+ # gapPos_Seq = which(paramSeq=="N")
+ # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
+ # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
+ #}
+
+ analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) )
+
+ }
+
+ #1 = GL
+ #2 = Seq
+ analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
+ paramGL = in_matrix[2,]
+ paramSeq = in_matrix[1,]
+ paramSeqUri = paramGL
+ #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
+ mutations_val = paramGL != paramSeq
+ if(any(mutations_val)){
+ mutationPos = {1:length(mutations_val)}[mutations_val]
+ mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
+ length_mutations =length(mutationPos)
+ mutationInfo = rep(NA,length_mutations)
+ if(any(mutationPos)){
+
+ pos<- mutationPos
+ pos_array<-array(sapply(pos,getCodonPos))
+ codonGL = paramGL[pos_array]
+
+ codonSeq = sapply(pos,function(x){
+ seqP = paramGL[getCodonPos(x)]
+ muCodonPos = {x-1}%%3+1
+ seqP[muCodonPos] = paramSeq[x]
+ return(seqP)
+ })
+ GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
+ Seqcodons = apply(codonSeq,2,c2s)
+ mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
+ names(mutationInfo) = mutationPos
+ }
+ if(any(!is.na(mutationInfo))){
+ return(mutationInfo[!is.na(mutationInfo)])
+ }else{
+ return(NA)
+ }
+
+
+ }else{
+ return (NA)
+ }
+ }
+
+ processNucMutations2 <- function(mu){
+ if(!is.na(mu)){
+ #R
+ if(any(mu=="R")){
+ Rs = mu[mu=="R"]
+ nucNumbs = as.numeric(names(Rs))
+ R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
+ R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
+ }else{
+ R_CDR = 0
+ R_FWR = 0
+ }
+
+ #S
+ if(any(mu=="S")){
+ Ss = mu[mu=="S"]
+ nucNumbs = as.numeric(names(Ss))
+ S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
+ S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
+ }else{
+ S_CDR = 0
+ S_FWR = 0
+ }
+
+
+ retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
+ retVec[is.na(retVec)]=0
+ return(retVec)
+ }else{
+ return(rep(0,4))
+ }
+ }
+
+
+ ## Z-score Test
+ computeZScore <- function(mat, test="Focused"){
+ matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
+ if(test=="Focused"){
+ #Z_Focused_CDR
+ #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
+ P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
+ R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
+ R_sd=sqrt(R_mean*(1-P))
+ matRes[,1] = (mat[,1]-R_mean)/R_sd
+
+ #Z_Focused_FWR
+ #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
+ P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
+ R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
+ R_sd=sqrt(R_mean*(1-P))
+ matRes[,2] = (mat[,3]-R_mean)/R_sd
+ }
+
+ if(test=="Local"){
+ #Z_Focused_CDR
+ #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
+ P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
+ R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
+ R_sd=sqrt(R_mean*(1-P))
+ matRes[,1] = (mat[,1]-R_mean)/R_sd
+
+ #Z_Focused_FWR
+ #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
+ P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
+ R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
+ R_sd=sqrt(R_mean*(1-P))
+ matRes[,2] = (mat[,3]-R_mean)/R_sd
+ }
+
+ if(test=="Imbalanced"){
+ #Z_Focused_CDR
+ #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
+ P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
+ R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
+ R_sd=sqrt(R_mean*(1-P))
+ matRes[,1] = (mat[,1]-R_mean)/R_sd
+
+ #Z_Focused_FWR
+ #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
+ P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
+ R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
+ R_sd=sqrt(R_mean*(1-P))
+ matRes[,2] = (mat[,3]-R_mean)/R_sd
+ }
+
+ matRes[is.nan(matRes)] = NA
+ return(matRes)
+ }
+
+ # Return a p-value for a z-score
+ z2p <- function(z){
+ p=NA
+ if( !is.nan(z) && !is.na(z)){
+ if(z>0){
+ p = (1 - pnorm(z,0,1))
+ } else if(z<0){
+ p = (-1 * pnorm(z,0,1))
+ } else{
+ p = 0.5
+ }
+ }else{
+ p = NA
+ }
+ return(p)
+ }
+
+
+ ## Bayesian Test
+
+ # Fitted parameter for the bayesian framework
+BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
+ CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))
+
+ # Given x, M & p, returns a pdf
+ calculate_bayes <- function ( x=3, N=10, p=0.33,
+ i=CONST_i,
+ max_sigma=20,length_sigma=4001
+ ){
+ if(!0%in%N){
+ G <- max(length(x),length(N),length(p))
+ x=array(x,dim=G)
+ N=array(N,dim=G)
+ p=array(p,dim=G)
+ sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
+ sigma_1<-log({i/{1-i}}/{p/{1-p}})
+ index<-min(N,60)
+ y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
+ if(!sum(is.na(y))){
+ tmp<-approx(sigma_1,y,sigma_s)$y
+ tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
+ }else{
+ return(NA)
+ }
+ }else{
+ return(NA)
+ }
+ }
+ # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
+ computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
+ flagOneSeq = F
+ if(nrow(mat)==1){
+ mat=rbind(mat,mat)
+ flagOneSeq = T
+ }
+ if(test=="Focused"){
+ #CDR
+ P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
+ N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
+ X = c(mat[,1],0)
+ bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesCDR = bayesCDR[-length(bayesCDR)]
+
+ #FWR
+ P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
+ N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
+ X = c(mat[,3],0)
+ bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesFWR = bayesFWR[-length(bayesFWR)]
+ }
+
+ if(test=="Local"){
+ #CDR
+ P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
+ N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
+ X = c(mat[,1],0)
+ bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesCDR = bayesCDR[-length(bayesCDR)]
+
+ #FWR
+ P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
+ N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
+ X = c(mat[,3],0)
+ bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesFWR = bayesFWR[-length(bayesFWR)]
+ }
+
+ if(test=="Imbalanced"){
+ #CDR
+ P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
+ N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
+ X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
+ bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesCDR = bayesCDR[-length(bayesCDR)]
+
+ #FWR
+ P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
+ N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
+ X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
+ bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesFWR = bayesFWR[-length(bayesFWR)]
+ }
+
+ if(test=="ImbalancedSilent"){
+ #CDR
+ P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
+ N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
+ X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
+ bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesCDR = bayesCDR[-length(bayesCDR)]
+
+ #FWR
+ P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
+ N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
+ X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
+ bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
+ bayesFWR = bayesFWR[-length(bayesFWR)]
+ }
+
+ if(flagOneSeq==T){
+ bayesCDR = bayesCDR[1]
+ bayesFWR = bayesFWR[1]
+ }
+ return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
+ }
+
+ ##Covolution
+ break2chunks<-function(G=1000){
+ base<-2^round(log(sqrt(G),2),0)
+ return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
+ }
+
+ PowersOfTwo <- function(G=100){
+ exponents <- array()
+ i = 0
+ while(G > 0){
+ i=i+1
+ exponents[i] <- floor( log2(G) )
+ G <- G-2^exponents[i]
+ }
+ return(exponents)
+ }
+
+ convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
+ G = ncol(cons)
+ if(G>1){
+ for(gen in log(G,2):1){
+ ll<-seq(from=2,to=2^gen,by=2)
+ sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
+ }
+ }
+ return( cons[,1] )
+ }
+
+ convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
+ if(length(ncol(cons))) G<-ncol(cons)
+ groups <- PowersOfTwo(G)
+ matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
+ startIndex = 1
+ for( i in 1:length(groups) ){
+ stopIndex <- 2^groups[i] + startIndex - 1
+ if(stopIndex!=startIndex){
+ matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
+ startIndex = stopIndex + 1
+ }
+ else {
+ if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
+ else matG[,i] <- cons
+ #startIndex = stopIndex + 1
+ }
+ }
+ return( list( matG, groups ) )
+ }
+
+ weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
+ lx<-length(x)
+ ly<-length(y)
+ if({lx1){
+ while( i1 & Length_Postrior<=Threshold){
+ cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
+ listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
+ y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
+ return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
+ }else if(Length_Postrior==1) return(listPosteriors[[1]])
+ else if(Length_Postrior==0) return(NA)
+ else {
+ cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
+ y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
+ return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
+ }
+ }
+
+ fastConv<-function(cons, max_sigma=20, length_sigma=4001){
+ chunks<-break2chunks(G=ncol(cons))
+ if(ncol(cons)==3) chunks<-2:1
+ index_chunks_end <- cumsum(chunks)
+ index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
+ index_chunks <- cbind(index_chunks_start,index_chunks_end)
+
+ case <- sum(chunks!=chunks[1])
+ if(case==1) End <- max(1,((length(index_chunks)/2)-1))
+ else End <- max(1,((length(index_chunks)/2)))
+
+ firsts <- sapply(1:End,function(i){
+ indexes<-index_chunks[i,1]:index_chunks[i,2]
+ convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
+ })
+ if(case==0){
+ result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
+ }else if(case==1){
+ last<-list(calculate_bayesGHelper(
+ convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
+ ),0)
+ result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
+ result<-calculate_bayesGHelper(
+ list(
+ cbind(
+ result_first,last[[1]]),
+ c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
+ )
+ )
+ }
+ return(as.vector(result))
+ }
+
+ # Computes the 95% CI for a pdf
+ calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
+ if(length(Pdf)!=length_sigma) return(NA)
+ sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
+ cdf = cumsum(Pdf)
+ cdf = cdf/cdf[length(cdf)]
+ return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) )
+ }
+
+ # Computes a mean for a pdf
+ calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
+ if(length(Pdf)!=length_sigma) return(NA)
+ sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
+ norm = {length_sigma-1}/2/max_sigma
+ return( (Pdf%*%sigma_s/norm) )
+ }
+
+ # Returns the mean, and the 95% CI for a pdf
+ calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
+ if(is.na(Pdf))
+ return(rep(NA,3))
+ bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
+ bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
+ return(c(bMean, bCI))
+ }
+
+ # Computes the p-value of a pdf
+ computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
+ if(length(Pdf)>1){
+ norm = {length_sigma-1}/2/max_sigma
+ pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
+ if(pVal>0.5){
+ pVal = pVal-1
+ }
+ return(pVal)
+ }else{
+ return(NA)
+ }
+ }
+
+ # Compute p-value of two distributions
+ compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
+ #print(c(length(dens1),length(dens2)))
+ if(length(dens1)>1 & length(dens2)>1 ){
+ dens1<-dens1/sum(dens1)
+ dens2<-dens2/sum(dens2)
+ cum2 <- cumsum(dens2)-dens2/2
+ tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
+ #print(tmp)
+ if(tmp>0.5)tmp<-tmp-1
+ return( tmp )
+ }
+ else {
+ return(NA)
+ }
+ #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
+ }
+
+ # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
+ numberOfSeqsWithMutations <- function(matMutations,test=1){
+ if(test==4)test=2
+ cdrSeqs <- 0
+ fwrSeqs <- 0
+ if(test==1){#focused
+ cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
+ fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
+ if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
+ if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
+ }
+ if(test==2){#local
+ cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
+ fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
+ if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
+ if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
+ }
+ return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
+}
+
+
+
+shadeColor <- function(sigmaVal=NA,pVal=NA){
+ if(is.na(sigmaVal) & is.na(pVal)) return(NA)
+ if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
+ if(is.na(pVal) || pVal==1 || pVal==0){
+ returnColor = "#FFFFFF";
+ }else{
+ colVal=abs(pVal);
+
+ if(sigmaVal<0){
+ if(colVal>0.1)
+ returnColor = "#CCFFCC";
+ if(colVal<=0.1)
+ returnColor = "#99FF99";
+ if(colVal<=0.050)
+ returnColor = "#66FF66";
+ if(colVal<=0.010)
+ returnColor = "#33FF33";
+ if(colVal<=0.005)
+ returnColor = "#00FF00";
+
+ }else{
+ if(colVal>0.1)
+ returnColor = "#FFCCCC";
+ if(colVal<=0.1)
+ returnColor = "#FF9999";
+ if(colVal<=0.05)
+ returnColor = "#FF6666";
+ if(colVal<=0.01)
+ returnColor = "#FF3333";
+ if(colVal<0.005)
+ returnColor = "#FF0000";
+ }
+ }
+
+ return(returnColor)
+}
+
+
+
+plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
+ if(!log){
+ x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
+ y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
+ }else {
+ if(log==2){
+ x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
+ y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
+ }
+ if(log==1){
+ x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
+ y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
+ }
+ if(log==3){
+ x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
+ y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
+ }
+ }
+ return(c("x"=x,"y"=y))
+}
+
+# SHMulation
+
+ # Based on targeting, introduce a single mutation & then update the targeting
+ oneMutation <- function(){
+ # Pick a postion + mutation
+ posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
+ posNucNumb = ceiling(posMutation/4) # Nucleotide number
+ posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to
+
+ #mutate the simulation sequence
+ seqSimVec <- s2c(seqSim)
+ seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
+ seqSim <<- c2s(seqSimVec)
+
+ #update Mutability, Targeting & MutationsTypes
+ updateMutabilityNTargeting(posNucNumb)
+
+ #return(c(posNucNumb,NUCLEOTIDES[posNucKind]))
+ return(posNucNumb)
+ }
+
+ updateMutabilityNTargeting <- function(position){
+ min_i<-max((position-2),1)
+ max_i<-min((position+2),nchar(seqSim))
+ min_ii<-min(min_i,3)
+
+ #mutability - update locally
+ seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]
+
+
+ #targeting - compute locally
+ seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])
+ seqTargeting[is.na(seqTargeting)] <<- 0
+ #mutCodonPos = getCodonPos(position)
+ mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
+ #cat(mutCodonPos,"\n")
+ mutTypeCodon = getCodonPos(position)
+ seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) )
+ # Stop = 0
+ if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
+ seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
+ }
+
+
+ #Selection
+ selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))
+ # CDR
+ selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
+ seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR)
+ seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K
+
+ # FWR
+ selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
+ seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR)
+ seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K
+
+ }
+
+
+
+ # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.
+ validateMutation <- function(){
+ if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
+ uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
+ mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos
+ }else{
+ if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
+ mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)]
+ uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1
+ }
+ }
+ }
+
+
+
+ # Places text (labels) at normalized coordinates
+ myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
+ par(xpd=TRUE)
+ if(!log)
+ text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
+ else {
+ if(log==2)
+ text(
+ par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
+ 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
+ w,cex=cex,adj=adj,col=thecol)
+ if(log==1)
+ text(
+ 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
+ par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
+ w,cex=cex,adj=adj,col=thecol)
+ if(log==3)
+ text(
+ 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
+ 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
+ w,cex=cex,adj=adj,col=thecol)
+ }
+ par(xpd=FALSE)
+ }
+
+
+
+ # Count the mutations in a sequence
+ analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){
+
+ paramGL = s2c(matInput[inputMatrixIndex,2])
+ paramSeq = s2c(matInput[inputMatrixIndex,1])
+
+ #if( any(paramSeq=="N") ){
+ # gapPos_Seq = which(paramSeq=="N")
+ # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
+ # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
+ #}
+ mutations_val = paramGL != paramSeq
+
+ if(any(mutations_val)){
+ mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
+ length_mutations =length(mutationPos)
+ mutationInfo = rep(NA,length_mutations)
+
+ pos<- mutationPos
+ pos_array<-array(sapply(pos,getCodonPos))
+ codonGL = paramGL[pos_array]
+ codonSeqWhole = paramSeq[pos_array]
+ codonSeq = sapply(pos,function(x){
+ seqP = paramGL[getCodonPos(x)]
+ muCodonPos = {x-1}%%3+1
+ seqP[muCodonPos] = paramSeq[x]
+ return(seqP)
+ })
+ GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
+ SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
+ Seqcodons = apply(codonSeq,2,c2s)
+
+ mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
+ names(mutationInfo) = mutationPos
+
+ mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
+ names(mutationInfoWhole) = mutationPos
+
+ mutationInfo <- mutationInfo[!is.na(mutationInfo)]
+ mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
+
+ if(any(!is.na(mutationInfo))){
+
+ #Filter based on Stop (at the codon level)
+ if(seqWithStops==1){
+ nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
+ mutationInfo = mutationInfo[nucleotidesAtStopCodons]
+ mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
+ }else{
+ countStops = sum(mutationInfoWhole=="Stop")
+ if(seqWithStops==2 & countStops==0) mutationInfo = NA
+ if(seqWithStops==3 & countStops>0) mutationInfo = NA
+ }
+
+ if(any(!is.na(mutationInfo))){
+ #Filter mutations based on multipleMutation
+ if(multipleMutation==1 & !is.na(mutationInfo)){
+ mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
+ tableMutationCodons <- table(mutationCodons)
+ codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
+ if(any(codonsWithMultipleMutations)){
+ #remove the nucleotide mutations in the codons with multiple mutations
+ mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
+ #replace those codons with Ns in the input sequence
+ paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
+ matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
+ }
+ }
+
+ #Filter mutations based on the model
+ if(any(mutationInfo)==T | is.na(any(mutationInfo))){
+
+ if(model==1 & !is.na(mutationInfo)){
+ mutationInfo <- mutationInfo[mutationInfo=="S"]
+ }
+ if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
+ else return(NA)
+ }else{
+ return(NA)
+ }
+ }else{
+ return(NA)
+ }
+
+
+ }else{
+ return(NA)
+ }
+
+
+ }else{
+ return (NA)
+ }
+ }
+
+ analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){
+
+ paramGL = s2c(inputArray[2])
+ paramSeq = s2c(inputArray[1])
+ inputSeq <- inputArray[1]
+ #if( any(paramSeq=="N") ){
+ # gapPos_Seq = which(paramSeq=="N")
+ # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
+ # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
+ #}
+ mutations_val = paramGL != paramSeq
+
+ if(any(mutations_val)){
+ mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
+ length_mutations =length(mutationPos)
+ mutationInfo = rep(NA,length_mutations)
+
+ pos<- mutationPos
+ pos_array<-array(sapply(pos,getCodonPos))
+ codonGL = paramGL[pos_array]
+ codonSeqWhole = paramSeq[pos_array]
+ codonSeq = sapply(pos,function(x){
+ seqP = paramGL[getCodonPos(x)]
+ muCodonPos = {x-1}%%3+1
+ seqP[muCodonPos] = paramSeq[x]
+ return(seqP)
+ })
+ GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
+ SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
+ Seqcodons = apply(codonSeq,2,c2s)
+
+ mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
+ names(mutationInfo) = mutationPos
+
+ mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
+ names(mutationInfoWhole) = mutationPos
+
+ mutationInfo <- mutationInfo[!is.na(mutationInfo)]
+ mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
+
+ if(any(!is.na(mutationInfo))){
+
+ #Filter based on Stop (at the codon level)
+ if(seqWithStops==1){
+ nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
+ mutationInfo = mutationInfo[nucleotidesAtStopCodons]
+ mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
+ }else{
+ countStops = sum(mutationInfoWhole=="Stop")
+ if(seqWithStops==2 & countStops==0) mutationInfo = NA
+ if(seqWithStops==3 & countStops>0) mutationInfo = NA
+ }
+
+ if(any(!is.na(mutationInfo))){
+ #Filter mutations based on multipleMutation
+ if(multipleMutation==1 & !is.na(mutationInfo)){
+ mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
+ tableMutationCodons <- table(mutationCodons)
+ codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
+ if(any(codonsWithMultipleMutations)){
+ #remove the nucleotide mutations in the codons with multiple mutations
+ mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
+ #replace those codons with Ns in the input sequence
+ paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
+ #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
+ inputSeq <- c2s(paramSeq)
+ }
+ }
+
+ #Filter mutations based on the model
+ if(any(mutationInfo)==T | is.na(any(mutationInfo))){
+
+ if(model==1 & !is.na(mutationInfo)){
+ mutationInfo <- mutationInfo[mutationInfo=="S"]
+ }
+ if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
+ else return(list(NA,inputSeq))
+ }else{
+ return(list(NA,inputSeq))
+ }
+ }else{
+ return(list(NA,inputSeq))
+ }
+
+
+ }else{
+ return(list(NA,inputSeq))
+ }
+
+
+ }else{
+ return (list(NA,inputSeq))
+ }
+ }
+
+ # triMutability Background Count
+ buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
+
+ #rowOrigMatInput = matInput[inputMatrixIndex,]
+ seqGL = gsub("-", "", matInput[inputMatrixIndex,2])
+ seqInput = gsub("-", "", matInput[inputMatrixIndex,1])
+ #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
+ tempInput <- cbind(seqInput,seqGL)
+ seqLength = nchar(seqGL)
+ list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
+ mutationCount <- list_analyzeMutationsFixed[[1]]
+ seqInput <- list_analyzeMutationsFixed[[2]]
+ BackgroundMatrix = mutabilityMatrix
+ MutationMatrix = mutabilityMatrix
+ MutationCountMatrix = mutabilityMatrix
+ if(!is.na(mutationCount)){
+ if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")0)) ){
+
+ fivermerStartPos = 1:(seqLength-4)
+ fivemerLength <- length(fivermerStartPos)
+ fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
+ fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
+
+ #Background
+ for(fivemerIndex in 1:fivemerLength){
+ fivemer = fivemerGL[fivemerIndex]
+ if(!any(grep("N",fivemer))){
+ fivemerCodonPos = fivemerCodon(fivemerIndex)
+ fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
+ fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])
+
+ # All mutations model
+ #if(!any(grep("N",fivemerReadingFrameCodon))){
+ if(model==0){
+ if(stopMutations==0){
+ if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
+ BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)
+ }else{
+ if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
+ positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
+ BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
+ }
+ }
+ }else{ # Only silent mutations
+ if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
+ positionWithinCodon = which(fivemerCodonPos==3)
+ BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
+ }
+ }
+ #}
+ }
+ }
+
+ #Mutations
+ if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
+ if(model==1) mutationCount = mutationCount[mutationCount=="S"]
+ mutationPositions = as.numeric(names(mutationCount))
+ mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
+ mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
+ countMutations = 0
+ for(mutationPosition in mutationPositions){
+ fivemerIndex = mutationPosition-2
+ fivemer = fivemerSeq[fivemerIndex]
+ GLfivemer = fivemerGL[fivemerIndex]
+ fivemerCodonPos = fivemerCodon(fivemerIndex)
+ fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
+ fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
+ if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
+ if(model==0){
+ countMutations = countMutations + 1
+ MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
+ MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
+ }else{
+ if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
+ countMutations = countMutations + 1
+ positionWithinCodon = which(fivemerCodonPos==3)
+ glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
+ inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
+ MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
+ MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
+ }
+ }
+ }
+ }
+
+ seqMutability = MutationMatrix/BackgroundMatrix
+ seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
+ #cat(inputMatrixIndex,"\t",countMutations,"\n")
+ return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
+
+ }
+ }
+
+ }
+
+ #Returns the codon position containing the middle nucleotide
+ fivemerCodon <- function(fivemerIndex){
+ codonPos = list(2:4,1:3,3:5)
+ fivemerType = fivemerIndex%%3
+ return(codonPos[[fivemerType+1]])
+ }
+
+ #returns probability values for one mutation in codons resulting in R, S or Stop
+ probMutations <- function(typeOfMutation){
+ matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))
+ for(codon in rownames(matMutationProb)){
+ if( !any(grep("N",codon)) ){
+ for(muPos in 1:3){
+ matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
+ glNuc = matCodon[1,muPos]
+ matCodon[,muPos] = canMutateTo(glNuc)
+ substitutionRate = substitution[glNuc,matCodon[,muPos]]
+ typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
+ matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
+ }
+ }
+ }
+
+ return(matMutationProb)
+ }
+
+
+
+
+#Mapping Trinucleotides to fivemers
+mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
+ rownames(triMutability) <- triMutability_Names
+ Fivemer<-rep(NA,1024)
+ names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
+ Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
+ Fivemer<-Fivemer/sum(Fivemer)
+ return(Fivemer)
+}
+
+collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
+ Indices<-substring(names(Fivemer),3,3)==NUC
+ Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
+ tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
+}
+
+
+
+CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
+ Indices<-substring(names(Fivemer),3,3)==NUC
+ Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
+ tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
+}
+
+#Uses the real counts of the mutated fivemers
+CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
+ Indices<-substring(names(Fivemer),3,3)==NUC
+ Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
+ tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
+}
+
+bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
+N<-sum(x)
+if(N){
+p<-x/N
+k<-length(x)-1
+tmp<-rmultinom(M, size = N, prob=p)
+tmp_p<-apply(tmp,2,function(y)y/N)
+(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
+}
+else return(matrix(0,2,length(x)))
+}
+
+
+
+
+bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){
+
+N<-sum(x)
+k<-length(x)
+y<-rep(1:k,x)
+tmp<-sapply(1:M,function(i)sample(y,n))
+if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
+if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
+(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
+}
+
+
+
+p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
+n=sum(x_obs)
+N<-sum(x)
+k<-length(x)
+y<-rep(1:k,x)
+tmp<-sapply(1:M,function(i)sample(y,n))
+if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
+if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
+tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
+sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
+sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
+}
+
+#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
+# Remove SNPs from IMGT germline segment alleles
+generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
+ repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
+ alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
+ SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
+ Indices<-NULL
+ for(i in 1:length(x)){
+ firstSeq = s2c(x[[1]])
+ iSeq = s2c(x[[i]])
+ Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." ))
+ }
+ return(sort(unique(Indices)))
+ })
+ repertoireOut <- repertoireIn
+ repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
+ alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
+ geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
+ alleleSeq <- s2c(repertoireOut[[repertoireName]])
+ alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
+ alleleSeq <- c2s(alleleSeq)
+ repertoireOut[[repertoireName]] <- alleleSeq
+ })
+ names(repertoireOut) <- names(repertoireIn)
+ write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)
+
+}
+
+
+
+
+
+
+############
+groupBayes2 = function(indexes, param_resultMat){
+
+ BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
+ BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
+ #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
+ #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
+ #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
+ #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
+ return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
+ "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
+ #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
+ #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
+# "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
+# "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )
+
+
+}
+
+
+calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
+ G <- max(length(x),length(N),length(p))
+ x=array(x,dim=G)
+ N=array(N,dim=G)
+ p=array(p,dim=G)
+
+ indexOfZero = N>0 & p>0
+ N = N[indexOfZero]
+ x = x[indexOfZero]
+ p = p[indexOfZero]
+ G <- length(x)
+
+ if(G){
+
+ cons<-array( dim=c(length_sigma,G) )
+ if(G==1) {
+ return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
+ }
+ else {
+ for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
+ listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
+ y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
+ return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
+ }
+ }else{
+ return(NA)
+ }
+}
+
+
+calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
+ matG <- listMatG[[1]]
+ groups <- listMatG[[2]]
+ i = 1
+ resConv <- matG[,i]
+ denom <- 2^groups[i]
+ if(length(groups)>1){
+ while( i0)) ){
+
+# ONEmerStartPos = 1:(seqLength)
+# ONEmerLength <- length(ONEmerStartPos)
+ ONEmerGL <- s2c(seqGL)
+ ONEmerSeq <- s2c(seqInput)
+
+ #Background
+ for(ONEmerIndex in 1:seqLength){
+ ONEmer = ONEmerGL[ONEmerIndex]
+ if(ONEmer!="N"){
+ ONEmerCodonPos = getCodonPos(ONEmerIndex)
+ ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos])
+ ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )
+
+ # All mutations model
+ #if(!any(grep("N",ONEmerReadingFrameCodon))){
+ if(model==0){
+ if(stopMutations==0){
+ if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
+ BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)
+ }else{
+ if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
+ positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
+ BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
+ }
+ }
+ }else{ # Only silent mutations
+ if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
+ positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
+ BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
+ }
+ }
+ }
+ }
+ }
+
+ #Mutations
+ if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
+ if(model==1) mutationCount = mutationCount[mutationCount=="S"]
+ mutationPositions = as.numeric(names(mutationCount))
+ mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
+ mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
+ countMutations = 0
+ for(mutationPosition in mutationPositions){
+ ONEmerIndex = mutationPosition
+ ONEmer = ONEmerSeq[ONEmerIndex]
+ GLONEmer = ONEmerGL[ONEmerIndex]
+ ONEmerCodonPos = getCodonPos(ONEmerIndex)
+ ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])
+ ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])
+ if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
+ if(model==0){
+ countMutations = countMutations + 1
+ MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
+ MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
+ }else{
+ if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
+ countMutations = countMutations + 1
+ positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
+ glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
+ inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
+ MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
+ MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
+ }
+ }
+ }
+ }
+
+ seqMutability = MutationMatrix/BackgroundMatrix
+ seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
+ #cat(inputMatrixIndex,"\t",countMutations,"\n")
+ return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
+# tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
+ }
+ }
+
+################
+# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $
+
+trim <- function(s, recode.factor=TRUE, ...)
+ UseMethod("trim", s)
+
+trim.default <- function(s, recode.factor=TRUE, ...)
+ s
+
+trim.character <- function(s, recode.factor=TRUE, ...)
+{
+ s <- sub(pattern="^ +", replacement="", x=s)
+ s <- sub(pattern=" +$", replacement="", x=s)
+ s
+}
+
+trim.factor <- function(s, recode.factor=TRUE, ...)
+{
+ levels(s) <- trim(levels(s))
+ if(recode.factor) {
+ dots <- list(x=s, ...)
+ if(is.null(dots$sort)) dots$sort <- sort
+ s <- do.call(what=reorder.factor, args=dots)
+ }
+ s
+}
+
+trim.list <- function(s, recode.factor=TRUE, ...)
+ lapply(s, trim, recode.factor=recode.factor, ...)
+
+trim.data.frame <- function(s, recode.factor=TRUE, ...)
+{
+ s[] <- trim.list(s, recode.factor=recode.factor, ...)
+ s
+}
+#######################################
+# Compute the expected for each sequence-germline pair by codon
+getExpectedIndividualByCodon <- function(matInput){
+if( any(grep("multicore",search())) ){
+ facGL <- factor(matInput[,2])
+ facLevels = levels(facGL)
+ LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){
+ computeMutabilities(facLevels[x])
+ })
+ facIndex = match(facGL,facLevels)
+
+ LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){
+ cInput = rep(NA,nchar(matInput[x,1]))
+ cInput[s2c(matInput[x,1])!="N"] = 1
+ LisGLs_MutabilityU[[facIndex[x]]] * cInput
+ })
+
+ LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){
+ computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+ })
+
+ LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){
+ #print(x)
+ computeMutationTypes(matInput[x,2])
+ })
+
+ LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){
+ Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3,
+ function(codonNucs){
+ RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
+ sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
+ }
+ )
+ })
+
+ LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){
+ Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3,
+ function(codonNucs){
+ SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
+ sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
+ }
+ )
+ })
+
+ Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
+ Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
+ return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
+ }else{
+ facGL <- factor(matInput[,2])
+ facLevels = levels(facGL)
+ LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){
+ computeMutabilities(facLevels[x])
+ })
+ facIndex = match(facGL,facLevels)
+
+ LisGLs_Mutability = lapply(1:nrow(matInput), function(x){
+ cInput = rep(NA,nchar(matInput[x,1]))
+ cInput[s2c(matInput[x,1])!="N"] = 1
+ LisGLs_MutabilityU[[facIndex[x]]] * cInput
+ })
+
+ LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){
+ computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+ })
+
+ LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){
+ #print(x)
+ computeMutationTypes(matInput[x,2])
+ })
+
+ LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){
+ Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3,
+ function(codonNucs){
+ RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
+ sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
+ }
+ )
+ })
+
+ LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){
+ Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3,
+ function(codonNucs){
+ SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
+ sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
+ }
+ )
+ })
+
+ Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
+ Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
+ return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
+ }
+}
+
+# getObservedMutationsByCodon <- function(listMutations){
+# numbSeqs <- length(listMutations)
+# obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
+# obsMu_S <- obsMu_R
+# temp <- mclapply(1:length(listMutations), function(i){
+# arrMutations = listMutations[[i]]
+# RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
+# RPos <- sapply(RPos,getCodonNumb)
+# if(any(RPos)){
+# tabR <- table(RPos)
+# obsMu_R[i,as.numeric(names(tabR))] <<- tabR
+# }
+#
+# SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
+# SPos <- sapply(SPos,getCodonNumb)
+# if(any(SPos)){
+# tabS <- table(SPos)
+# obsMu_S[i,names(tabS)] <<- tabS
+# }
+# }
+# )
+# return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
+# }
+
+getObservedMutationsByCodon <- function(listMutations){
+ numbSeqs <- length(listMutations)
+ obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
+ obsMu_S <- obsMu_R
+ temp <- lapply(1:length(listMutations), function(i){
+ arrMutations = listMutations[[i]]
+ RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
+ RPos <- sapply(RPos,getCodonNumb)
+ if(any(RPos)){
+ tabR <- table(RPos)
+ obsMu_R[i,as.numeric(names(tabR))] <<- tabR
+ }
+
+ SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
+ SPos <- sapply(SPos,getCodonNumb)
+ if(any(SPos)){
+ tabS <- table(SPos)
+ obsMu_S[i,names(tabS)] <<- tabS
+ }
+ }
+ )
+ return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
+}
+
diff -r b84477f57318 -r e7b550d52eb7 baseline/Baseline_Main.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/Baseline_Main.r Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,388 @@
+#########################################################################################
+# License Agreement
+#
+# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
+# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
+# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
+# OR COPYRIGHT LAW IS PROHIBITED.
+#
+# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
+# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
+# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
+# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
+#
+# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
+# Coded by: Mohamed Uduman & Gur Yaari
+# Copyright 2012 Kleinstein Lab
+# Version: 1.3 (01/23/2014)
+#########################################################################################
+
+op <- options();
+options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1)
+library('seqinr')
+if( F & Sys.info()[1]=="Linux"){
+ library("multicore")
+}
+
+# Load functions and initialize global variables
+source("Baseline_Functions.r")
+
+# Initialize parameters with user provided arguments
+ arg <- commandArgs(TRUE)
+ #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample")
+ #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample")
+ #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu")
+ testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local
+ species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse
+ substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS
+ mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS
+ clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations
+ fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels
+ region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3
+ inputFilePath <- arg[8] # Full path to input file
+ outputPath <- arg[9] # Full path to location of output files
+ outputID <- arg[10] # ID for session output
+
+
+ if(testID==5){
+ traitChangeModel <- 1
+ if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998
+ initializeTraitChange(traitChangeModel)
+ }
+
+# Initialize other parameters/variables
+
+ # Initialzie the codon table ( definitions of R/S )
+ computeCodonTable(testID)
+
+ # Initialize
+ # Test Name
+ testName<-"Focused"
+ if(testID==2) testName<-"Local"
+ if(testID==3) testName<-"Imbalanced"
+ if(testID==4) testName<-"ImbalancedSilent"
+
+ # Indel placeholders initialization
+ indelPos <- NULL
+ delPos <- NULL
+ insPos <- NULL
+
+ # Initialize in Tranistion & Mutability matrixes
+ substitution <- initializeSubstitutionMatrix(substitutionModel,species)
+ mutability <- initializeMutabilityMatrix(mutabilityModel,species)
+
+ # FWR/CDR boundaries
+ flagTrim <- F
+ if( is.na(region[7])){
+ flagTrim <- T
+ region[7]<-region[6]
+ }
+ readStart = min(region,na.rm=T)
+ readEnd = max(region,na.rm=T)
+ if(readStart>1){
+ region = region - (readStart - 1)
+ }
+ region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) )
+ region_Cod = region
+
+ readStart = (readStart*3)-2
+ readEnd = (readEnd*3)
+
+ FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])),
+ rep(FALSE,(region_Nuc[3]-region_Nuc[2])),
+ rep(TRUE,(region_Nuc[4]-region_Nuc[3])),
+ rep(FALSE,(region_Nuc[5]-region_Nuc[4])),
+ rep(TRUE,(region_Nuc[6]-region_Nuc[5])),
+ rep(FALSE,(region_Nuc[7]-region_Nuc[6]))
+ )
+ CDR_Nuc <- (1-FWR_Nuc)
+ CDR_Nuc <- as.logical(CDR_Nuc)
+ FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T)
+ CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T)
+
+ FWR_Codon <- c( rep(TRUE,(region[2])),
+ rep(FALSE,(region[3]-region[2])),
+ rep(TRUE,(region[4]-region[3])),
+ rep(FALSE,(region[5]-region[4])),
+ rep(TRUE,(region[6]-region[5])),
+ rep(FALSE,(region[7]-region[6]))
+ )
+ CDR_Codon <- (1-FWR_Codon)
+ CDR_Codon <- as.logical(CDR_Codon)
+
+
+# Read input FASTA file
+ tryCatch(
+ inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
+ , error = function(ex){
+ cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
+ q()
+ }
+ )
+
+ if (length(inputFASTA)==1) {
+ cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
+ q()
+ }
+
+ # Process sequence IDs/names
+ names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)})
+
+ # Convert non nucleotide characters to N
+ inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)])
+ inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars)
+
+ # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence]
+ processedInput <- processInputAdvanced(inputFASTA)
+ matInput <- processedInput[[1]]
+ germlines <- processedInput[[2]]
+ lenGermlines = length(unique(germlines))
+ groups <- processedInput[[3]]
+ lenGroups = length(unique(groups))
+ rm(processedInput)
+ rm(inputFASTA)
+
+# # remove clones with less than 2 seqeunces
+# tableGL <- table(germlines)
+# singletons <- which(tableGL<8)
+# rowsToRemove <- match(singletons,germlines)
+# if(any(rowsToRemove)){
+# matInput <- matInput[-rowsToRemove,]
+# germlines <- germlines[-rowsToRemove]
+# groups <- groups[-rowsToRemove]
+# }
+#
+# # remove unproductive seqs
+# nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))})
+# if(any(nonFuctionalSeqs)){
+# if(sum(nonFuctionalSeqs)==length(germlines)){
+# write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+# q()
+# }
+# matInput <- matInput[-which(nonFuctionalSeqs),]
+# germlines <- germlines[-which(nonFuctionalSeqs)]
+# germlines[1:length(germlines)] <- 1:length(germlines)
+# groups <- groups[-which(nonFuctionalSeqs)]
+# }
+#
+# if(class(matInput)=="character"){
+# write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+# q()
+# }
+#
+# if(nrow(matInput)<10 | is.null(nrow(matInput))){
+# write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+# q()
+# }
+
+# replace leading & trailing "-" with "N:
+ matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd))
+
+ # Trim (nucleotide) input sequences to the last codon
+ #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon)
+
+# # Check for Indels
+# if(fixIndels){
+# delPos <- fixDeletions(matInput)
+# insPos <- fixInsertions(matInput)
+# }else{
+# # Check for indels
+# indelPos <- checkForInDels(matInput)
+# indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)})
+# }
+
+ # If indels are present, remove mutations in the seqeunce & throw warning at end
+ #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) })
+
+ colnames(matInput)=c("Input","Germline")
+
+ # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions
+ germlinesOriginal = NULL
+ if(clonal){
+ germlinesOriginal <- germlines
+ collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){
+ collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1))
+ })
+ matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])}))
+ names_groups = tapply(groups,germlines,function(x){names(x[1])})
+ groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))})
+ names(groups) = names_groups
+
+ names_germlines = tapply(germlines,germlines,function(x){names(x[1])})
+ germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} )
+ names(germlines) = names_germlines
+ matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])})
+ }
+
+
+# Selection Analysis
+
+
+# if (length(germlines)>sequenceLimit) {
+# # Code to parallelize processing goes here
+# stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") )
+# }
+
+# if (length(germlines)1){
+ groups <- c(groups,lenGroups+1)
+ names(groups)[length(groups)] = "All sequences combined"
+ bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001)
+ bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001)
+ }
+
+ #Bayesian Outputs
+ bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo))
+ bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo))
+ bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo))
+ bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo))
+ bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo))
+ bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo))
+
+ #P-values
+ simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP)
+ simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP)
+
+ simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP)
+ simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP)
+
+ simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP)
+ simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP)
+
+
+ #Format output
+
+ # Round expected mutation frequencies to 3 decimal places
+ matMutationInfo[germlinesOriginal[indelPos],] = NA
+ if(nrow(matMutationInfo)==1){
+ matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3)
+ }else{
+ matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3))
+ }
+
+ listPDFs = list()
+ nRows = length(unique(groups)) + length(unique(germlines)) + length(groups)
+
+ matOutput = matrix(NA,ncol=18,nrow=nRows)
+ rowNumb = 1
+ for(G in unique(groups)){
+ #print(G)
+ matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G])
+ listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]])
+ names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1]
+ #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){
+ gs = unique(germlines[groups==G])
+ rowNumb = rowNumb+1
+ if( !is.na(gs) ){
+ for( g in gs ){
+ matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g])
+ listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]])
+ names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1]
+ rowNumb = rowNumb+1
+ indexesOfInterest = which(germlines==g)
+ numbSeqsOfInterest = length(indexesOfInterest)
+ rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1))
+ matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest),
+ rownames(matInput)[indexesOfInterest],
+ c(matMutationInfo[indexesOfInterest,1:4]),
+ c(matMutationInfo[indexesOfInterest,5:8]),
+ c(bayes_cdr[indexesOfInterest,]),
+ c(bayes_fwr[indexesOfInterest,]),
+ c(simgaP_cdr[indexesOfInterest]),
+ c(simgaP_fwr[indexesOfInterest])
+ ), ncol=18, nrow=numbSeqsOfInterest,byrow=F)
+ increment=0
+ for( ioi in indexesOfInterest){
+ listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]])
+ names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi]
+ increment = increment + 1
+ }
+ rowNumb=max(rowNumb)+1
+
+ }
+ }
+ }
+ colsToFormat = 11:18
+ matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3)
+ matOutput[matOutput== " NaN"] = NA
+
+
+
+ colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S",
+ "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S",
+ paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"),
+ paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_")
+ )
+ fileName = paste(outputPath,outputID,".txt",sep="")
+ write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA)
+ fileName = paste(outputPath,outputID,".RData",sep="")
+ save(listPDFs,file=fileName)
+
+indelWarning = FALSE
+if(sum(indelPos)>0){
+ indelWarning = "Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis.";
+ indelWarning = paste( indelWarning , "
", sep="" )
+ for(indels in names(indelPos)[indelPos]){
+ indelWarning = paste( indelWarning , "", indels, " ", sep="" )
+ }
+ indelWarning = paste( indelWarning , "
", sep="" )
+}
+
+cloneWarning = FALSE
+if(clonal==1){
+ if(sum(matInputErrors)>0){
+ cloneWarning = "Warning: The following clones have sequences of unequal length.";
+ cloneWarning = paste( cloneWarning , "
", sep="" )
+ for(clone in names(matInputErrors)[matInputErrors]){
+ cloneWarning = paste( cloneWarning , "", names(germlines)[as.numeric(clone)], " ", sep="" )
+ }
+ cloneWarning = paste( cloneWarning , " ", sep="" )
+ }
+}
+cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
diff -r b84477f57318 -r e7b550d52eb7 baseline/FiveS_Mutability.RData
Binary file baseline/FiveS_Mutability.RData has changed
diff -r b84477f57318 -r e7b550d52eb7 baseline/FiveS_Substitution.RData
Binary file baseline/FiveS_Substitution.RData has changed
diff -r b84477f57318 -r e7b550d52eb7 baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,703 @@
+>IGHV1-18*01
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-18*02
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
+>IGHV1-18*03
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1-18*04
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
+>IGHV1-2*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*04
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*05
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
+>IGHV1-24*01
+caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
+>IGHV1-3*01
+caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
+>IGHV1-3*02
+caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
+>IGHV1-38-4*01
+caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
+>IGHV1-45*01
+cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
+>IGHV1-45*02
+cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
+>IGHV1-45*03
+.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
+>IGHV1-46*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-46*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-46*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
+>IGHV1-58*01
+caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
+>IGHV1-58*02
+caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
+>IGHV1-68*01
+caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
+>IGHV1-69*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*02
+caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1-69*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
+>IGHV1-69*04
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*05
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1-69*06
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*07
+.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
+>IGHV1-69*08
+caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*09
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*10
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*11
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*12
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*13
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*14
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69-2*01
+gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
+>IGHV1-69-2*02
+.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
+>IGHV1-69D*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-8*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
+>IGHV1-8*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
+>IGHV1-NL1*01
+caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
+>IGHV1/OR15-1*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
+>IGHV1/OR15-1*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
+>IGHV1/OR15-1*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
+>IGHV1/OR15-1*04
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
+>IGHV1/OR15-2*01
+caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1/OR15-2*02
+caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1/OR15-2*03
+caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1/OR15-3*01
+caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1/OR15-3*02
+caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1/OR15-3*03
+caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1/OR15-4*01
+caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1/OR15-5*01
+.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
+>IGHV1/OR15-5*02
+caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
+>IGHV1/OR15-9*01
+caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
+>IGHV1/OR21-1*01
+caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
+>IGHV2-10*01
+caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
+>IGHV2-26*01
+caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
+>IGHV2-5*01
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*02
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*03
+................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
+>IGHV2-5*04|
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
+>IGHV2-5*05
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*06
+cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
+>IGHV2-5*08
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*09
+caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-70*01
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70*02
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*03
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*04
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
+>IGHV2-70*05
+..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
+>IGHV2-70*06
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*07
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*08
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*09
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
+>IGHV2-70*10
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70*11
+cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70*12
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-70*13
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
+>IGHV2-70D*04
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70D*14
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2/OR16-5*01
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
+>IGHV3-11*01
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-11*03
+caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
+>IGHV3-11*04
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-11*05
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-11*06
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-13*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-13*02
+gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-13*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
+>IGHV3-13*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-13*05
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-15*01
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*02
+gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*03
+gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*04
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*05
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*06
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*07
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*08
+gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
+>IGHV3-16*01
+gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-16*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-19*01
+acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-20*01
+gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
+>IGHV3-20*02
+gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
+>IGHV3-21*01
+gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-21*02
+gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-21*03
+gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
+>IGHV3-21*04
+gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-22*01
+gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
+>IGHV3-22*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
+>IGHV3-23*01
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*02
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*03
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*05
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
+>IGHV3-23D*01
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23D*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-25*01
+gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-25*02
+gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-25*03
+gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
+>IGHV3-25*04
+gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
+>IGHV3-25*05
+gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-29*01
+gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
+>IGHV3-30*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*02
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30*03
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*04
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*05
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
+>IGHV3-30*06
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*07
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*08
+caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
+>IGHV3-30*09
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*10
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*11
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*12
+caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*13
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*14
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*15
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*16
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*17
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*18
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30*19
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-2*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
+>IGHV3-30-22*01
+gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
+>IGHV3-30-3*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-3*02
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-3*03
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-33*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
+>IGHV3-30-42*01
+gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
+>IGHV3-30-5*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-5*02
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-52*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
+>IGHV3-32*01
+gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
+>AIGHV3-33*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*02
+caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*03
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-33*04
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*05
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*06
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-33-2*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
+>IGHV3-35*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
+>IGHV3-38*01|
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
+>IGHV3-38*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
+>IGHV3-38*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
+>IGHV3-38-3*01
+gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
+>IGHV3-43*01
+gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-43*02
+gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-43D*01
+gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-47*01
+gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
+>IGHV3-47*02
+gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
+>IGHV3-48*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-48*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-48*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
+>IGHV3-48*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-49*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*05
+gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-52*01
+gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
+>IGHV3-52*02
+gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
+>IGHV3-52*03
+gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
+>IGHV3-53*01
+gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-53*02
+gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-53*03
+gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
+>IGHV3-53*04
+gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-54*01
+gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
+>IGHV3-54*02
+gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
+>IGHV3-54*04
+gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
+>IGHV3-62*01
+gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
+>IGHV3-63*01
+gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
+>IGHV3-63*02
+gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
+>IGHV3-64*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
+>IGHV3-64*02
+gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
+>IGHV3-64*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-64*04
+caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-64*05
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-64D*06
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-66*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-66*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
+>IGHV3-66*03
+gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-66*04
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
+>IGHV3-69-1*01
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-69-1*02
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
+>IGHV3-7*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-7*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
+>IGHV3-7*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-71*01
+gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-71*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
+>IGHV3-71*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-72*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
+>IGHV3-72*02
+....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
+>IGHV3-73*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
+>IGHV3-73*02
+gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
+>IGHV3-74*01
+gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
+>IGHV3-74*02
+gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
+>IGHV3-74*03
+gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
+>IGHV3-9*01
+gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
+>IGHV3-9*02
+gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
+>IGHV3-9*03
+gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
+>IGHV3-NL1*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3/OR15-7*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
+>IGHV3/OR15-7*02
+gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
+>IGHV3/OR15-7*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
+>IGHV3/OR15-7*05
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
+>IGHV3/OR16-10*01
+gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
+>IGHV3/OR16-10*02
+gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
+>IGHV3/OR16-10*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
+>IGHV3/OR16-12*01
+gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
+>IGHV3/OR16-13*01
+gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
+>IGHV3/OR16-14*01
+gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
+>IGHV3/OR16-15*01
+gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
+>IGHV3/OR16-15*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
+>IGHV3/OR16-16*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
+>IGHV3/OR16-6*02
+gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
+>IGHV3/OR16-8*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
+>IGHV3/OR16-8*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
+>IGHV3/OR16-9*01
+gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
+>IGHV4-28*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
+>IGHV4-28*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
+>IGHV4-28*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*06
+caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*07
+caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-30-2*01
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-30-2*02
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
+>IGHV4-30-2*03
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
+>IGHV4-30-2*04
+...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-30-2*05
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-2*06
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>XIGHV4-30-4*04
+caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
+>IGHV4-30-4*05
+..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*06
+...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-31*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-31*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-31*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-31*04
+caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
+>IGHV4-31*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
+>IGHV4-31*06
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>IGHV4-31*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>IGHV4-31*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>IGHV4-31*09
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-31*10
+caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
+>IGHV4-34*01
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*02
+caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*03
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-34*04
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*05
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*06
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-34*07
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-34*08
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
+>IGHV4-34*09
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-34*10
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-34*11
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
+>IGHV4-34*12
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
+>IGHV4-34*13
+...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-38-2*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
+>IGHV4-38-2*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-39*01
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
+>IGHV4-39*02
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
+>IGHV4-39*03
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
+>IGHV4-39*04
+..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
+>IGHV4-39*05
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
+>IGHV4-39*06
+cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-39*07
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
+>IGHV4-4*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-4*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-4*05
+caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-4*06
+............................................................
+...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-55*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-55*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-55*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-55*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-55*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-55*06
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
+>IGHV4-55*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
+>IGHV4-55*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-55*09
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-59*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-59*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-59*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
+>IGHV4-59*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
+>IGHV4-59*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
+>IGHV4-59*06
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
+>IGHV4-59*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
+>IGHV4-59*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
+>IGHV4-59*09
+...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
+>IGHV4-59*10
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-61*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-61*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-61*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-61*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
+>IGHV4-61*05
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
+>IGHV4-61*06
+...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-61*07
+...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
+>IGHV4-61*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4/OR15-8*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4/OR15-8*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4/OR15-8*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV5-10-1*01
+gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-10-1*02
+gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
+>IGHV5-10-1*03
+gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-10-1*04
+gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-51*01
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
+>IGHV5-51*02
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
+>IGHV5-51*03
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-51*04
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-51*05
+.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
+>IGHV5-78*01
+gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
+>IGHV6-1*01
+caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
+>IGHV6-1*02
+caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
+>IGHV7-34-1*01
+...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
+>IGHV7-34-1*02
+...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
+>IGHV7-4-1*01
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
+>IGHV7-4-1*02
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
+>IGHV7-4-1*03
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
+>IGHV7-4-1*04
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
+>IGHV7-4-1*05
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
+>AIGHV7-40*03|
+ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
+>IGHV7-81*01
+caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
diff -r b84477f57318 -r e7b550d52eb7 baseline/comparePDFs.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/comparePDFs.r Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,225 @@
+options("warn"=-1)
+
+#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r
+# Compute p-value of two distributions
+compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
+#print(c(length(dens1),length(dens2)))
+if(length(dens1)>1 & length(dens2)>1 ){
+ dens1<-dens1/sum(dens1)
+ dens2<-dens2/sum(dens2)
+ cum2 <- cumsum(dens2)-dens2/2
+ tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
+ #print(tmp)
+ if(tmp>0.5)tmp<-tmp-1
+ return( tmp )
+ }
+ else {
+ return(NA)
+ }
+ #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
+}
+
+
+require("grid")
+arg <- commandArgs(TRUE)
+#arg <- c("300143","4","5")
+arg[!arg=="clonal"]
+input <- arg[1]
+output <- arg[2]
+rowIDs <- as.numeric( sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } ) )
+
+numbSeqs = length(rowIDs)
+
+if ( is.na(rowIDs[1]) | numbSeqs>10 ) {
+ stop( paste("Error: Please select between one and 10 seqeunces to compare.") )
+}
+
+#load( paste("output/",sessionID,".RData",sep="") )
+load( input )
+#input
+
+xMarks = seq(-20,20,length.out=4001)
+
+plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){
+ yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1
+
+ if(length(xlim==2)){
+ xMin=xlim[1]
+ xMax=xlim[2]
+ } else {
+ xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
+ xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
+ xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
+ xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
+
+ xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1]
+ xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1]
+ xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])]
+ xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])]
+
+ xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE)
+ xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE)
+ }
+
+ sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x
+ grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex))
+ x <- sigma
+ pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc"))
+ #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex))
+ pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05)))
+ grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc")
+ grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc")
+ grid.rect()
+ grid.xaxis(gp = gpar(cex=cex/1.1))
+ yticks = pretty(c(-yMax,yMax),8)
+ yticks = yticks[yticks>(-yMax) & yticks<(yMax)]
+ grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1))
+ if(length(listPDFs[pdf1][[1]][["CDR"]])>1){
+ ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+ grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2))
+ }
+ if(length(listPDFs[pdf1][[1]][["FWR"]])>1){
+ yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+ grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2))
+ }
+
+ if(length(listPDFs[pdf2][[1]][["CDR"]])>1){
+ ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+ grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2))
+ }
+ if(length(listPDFs[pdf2][[1]][["FWR"]])>1){
+ yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+ grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2))
+ }
+
+ grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1))
+ grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3))
+
+ grid.text("Density", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex))
+ grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex))
+
+ if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){
+ pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]])
+ pval = formatC(as.numeric(pCDRFWR),digits=3)
+ grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2))
+ }
+ grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5))
+ grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5))
+ popViewport(2)
+}
+#plot_grid_s(1)
+
+
+p2col<-function(p=0.01){
+ breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51)
+ i<-findInterval(p,breaks)
+ cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0),
+ rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) )
+ return(cols[i])
+}
+
+
+plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){
+ if(upper){
+ pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]])
+ pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]])
+ pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]])
+ pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]])
+ grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc")
+ grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc")
+ grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc")
+ grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc")
+
+ grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925)))
+ grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925)))
+
+ grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+ grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+ grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+ grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+
+
+ # grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex))
+ # grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex))
+ }
+ else{
+ }
+}
+
+
+##################################################################################
+################## The whole OCD's matrix ########################################
+##################################################################################
+
+#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)
+pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)
+
+pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null")))))
+
+for( seqOne in 1:numbSeqs+1){
+ pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1))
+ if(seqOne>2){
+ grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
+ grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
+ grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc")
+
+ grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center")
+ grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center")
+ }
+ grid.rect(gp = gpar(col=grey(0.9)))
+ grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center")
+ popViewport(1)
+}
+
+for( seqOne in 1:numbSeqs+1){
+ pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1))
+ if(seqOne<=numbSeqs){
+ grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
+ grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
+ grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc")
+ grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270)
+ grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270)
+ }
+ grid.rect(gp = gpar(col=grey(0.9)))
+ grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center")
+ popViewport(1)
+}
+
+for( seqOne in 1:numbSeqs+1){
+ for(seqTwo in 1:numbSeqs+1){
+ pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne))
+ if(seqTwo>seqOne){
+ plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2)
+ grid.rect()
+ }
+ popViewport(1)
+ }
+}
+
+
+xMin=0
+xMax=0.01
+for(pdf1 in rowIDs){
+ xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
+ xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
+ xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
+ xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
+ xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE)
+ xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE)
+}
+
+
+
+for(i in 1:numbSeqs+1){
+ for(j in (i-1):numbSeqs){
+ pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1))
+ grid.rect()
+ plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1)
+ popViewport(1)
+ }
+}
+
+dev.off()
+
+cat("Success", paste(rowIDs,collapse="_"),sep=":")
+
diff -r b84477f57318 -r e7b550d52eb7 baseline/filter.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/filter.r Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,35 @@
+arg = commandArgs(TRUE)
+summaryfile = arg[1]
+gappedfile = arg[2]
+selection = arg[3]
+output = arg[4]
+print(paste("selection = ", selection))
+
+
+summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
+gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
+
+#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
+
+dat = cbind(gappeddat, summarydat$AA.JUNCTION)
+
+colnames(dat)[length(dat)] = "AA.JUNCTION"
+
+dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
+dat$VGene = gsub("[*].*", "", dat$VGene)
+
+dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
+dat$DGene = gsub("[*].*", "", dat$DGene)
+
+dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
+dat$JGene = gsub("[*].*", "", dat$JGene)
+
+#print(str(dat))
+
+dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
+
+dat = dat[!duplicated(dat$past), ]
+
+dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",]
+
+write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
diff -r b84477f57318 -r e7b550d52eb7 baseline/script_imgt.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/script_imgt.py Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,79 @@
+#import xlrd #avoid dep
+import argparse
+import re
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
+parser.add_argument("--ref", help="Reference file")
+parser.add_argument("--output", help="Output file")
+parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
+
+args = parser.parse_args()
+
+refdic = dict()
+with open(args.ref, 'r') as ref:
+ currentSeq = ""
+ currentId = ""
+ for line in ref:
+ if line[0] is ">":
+ if currentSeq is not "" and currentId is not "":
+ refdic[currentId[1:]] = currentSeq
+ currentId = line.rstrip()
+ currentSeq = ""
+ else:
+ currentSeq += line.rstrip()
+ refdic[currentId[1:]] = currentSeq
+
+
+vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
+# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
+# r"(IGKV[0-3]D?-[0-9]{1,2})",
+# r"(IGLV[0-9]-[0-9]{1,2})",
+# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
+# r"(TRGV[234589])",
+# r"(TRDV[1-3])"]
+
+#vPattern = re.compile(r"|".join(vPattern))
+vPattern = re.compile("|".join(vPattern))
+
+def filterGene(s, pattern):
+ if type(s) is not str:
+ return None
+ res = pattern.search(s)
+ if res:
+ return res.group(0)
+ return None
+
+
+
+currentSeq = ""
+currentId = ""
+first=True
+with open(args.input, 'r') as i:
+ with open(args.output, 'a') as o:
+ o.write(">>>" + args.id + "\n")
+ outputdic = dict()
+ for line in i:
+ if first:
+ first = False
+ continue
+ linesplt = line.split("\t")
+ ref = filterGene(linesplt[1], vPattern)
+ if not ref or not linesplt[2].rstrip():
+ continue
+ if ref in outputdic:
+ outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
+ else:
+ outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
+ #print outputdic
+
+ for k in outputdic.keys():
+ if k in refdic:
+ o.write(">>" + k + "\n")
+ o.write(refdic[k] + "\n")
+ for seq in outputdic[k]:
+ #print seq
+ o.write(">" + seq[0] + "\n")
+ o.write(seq[1] + "\n")
+ else:
+ print k + " not in reference, skipping " + k
diff -r b84477f57318 -r e7b550d52eb7 baseline/script_xlsx.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/script_xlsx.py Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,58 @@
+import xlrd
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
+parser.add_argument("--ref", help="Reference file")
+parser.add_argument("--output", help="Output file")
+
+args = parser.parse_args()
+
+gene_column = 6
+id_column = 7
+seq_column = 8
+LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
+
+
+refdic = dict()
+with open(args.ref, 'r') as ref:
+ currentSeq = ""
+ currentId = ""
+ for line in ref.readlines():
+ if line[0] is ">":
+ if currentSeq is not "" and currentId is not "":
+ refdic[currentId[1:]] = currentSeq
+ currentId = line.rstrip()
+ currentSeq = ""
+ else:
+ currentSeq += line.rstrip()
+ refdic[currentId[1:]] = currentSeq
+
+currentSeq = ""
+currentId = ""
+with xlrd.open_workbook(args.input, 'r') as wb:
+ with open(args.output, 'a') as o:
+ for sheet in wb.sheets():
+ if sheet.cell(1,gene_column).value.find("IGHV") < 0:
+ print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name
+ continue
+ o.write(">>>" + sheet.name + "\n")
+ outputdic = dict()
+ for rowindex in range(1, sheet.nrows):
+ ref = sheet.cell(rowindex, gene_column).value.replace(">", "")
+ if ref in outputdic:
+ outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
+ else:
+ outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
+ #print outputdic
+
+ for k in outputdic.keys():
+ if k in refdic:
+ o.write(">>" + k + "\n")
+ o.write(refdic[k] + "\n")
+ for seq in outputdic[k]:
+ #print seq
+ o.write(">" + seq[0] + "\n")
+ o.write(seq[1] + "\n")
+ else:
+ print k + " not in reference, skipping " + k
diff -r b84477f57318 -r e7b550d52eb7 baseline/wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/baseline/wrapper.sh Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,104 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+testID=$1
+species=$2
+substitutionModel=$3
+mutabilityModel=$4
+clonal=$5
+fixIndels=$6
+region=$7
+inputs=$8
+inputs=($inputs)
+IDs=$9
+IDs=($IDs)
+ref=${10}
+output=${11}
+selection=${12}
+output_table=${13}
+outID="result"
+
+echo "$PWD"
+
+echo "testID = $testID"
+echo "species = $species"
+echo "substitutionModel = $substitutionModel"
+echo "mutabilityModel = $mutabilityModel"
+echo "clonal = $clonal"
+echo "fixIndels = $fixIndels"
+echo "region = $region"
+echo "inputs = ${inputs[@]}"
+echo "IDs = ${IDs[@]}"
+echo "ref = $ref"
+echo "output = $output"
+echo "outID = $outID"
+
+fasta="$PWD/baseline.fasta"
+
+
+count=0
+for current in ${inputs[@]}
+do
+ f=$(file $current)
+ zipType="Zip archive"
+ if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"XZ compressed data"* ]]
+ then
+ id=${IDs[$count]}
+ echo "id=$id"
+ if [[ "$f" == *"Zip archive"* ]] ; then
+ echo "Zip archive"
+ echo "unzip $input -d $PWD/files/"
+ unzip $current -d "$PWD/$id/"
+ elif [[ "$f" == *"XZ compressed data"* ]] ; then
+ echo "ZX archive"
+ echo "tar -xJf $input -C $PWD/files/"
+ mkdir -p "$PWD/$id/files"
+ tar -xJf $current -C "$PWD/$id/files/"
+ fi
+ summaryfile="$PWD/summary_${id}.txt"
+ gappedfile="$PWD/gappednt_${id}.txt"
+ filtered="$PWD/filtered_${id}.txt"
+ filecount=`ls -l $PWD/$id/ | wc -l`
+ if [[ "$filecount" -eq "2" ]]
+ then
+ cat $PWD/$id/*/1_* > $summaryfile
+ cat $PWD/$id/*/2_* > $gappedfile
+ else
+ cat $PWD/$id/1_* > $summaryfile
+ cat $PWD/$id/2_* > $gappedfile
+ fi
+ Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1
+
+ final="$PWD/final_${id}.txt"
+ cat $filtered | cut -f2,4,7 > $final
+ python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id
+ else
+ python $dir/script_xlsx.py --input $current --ref $ref --output $fasta
+ fi
+ count=$((count+1))
+done
+
+if [[ $(wc -l < $fasta) -eq "1" ]]; then
+ echo "No sequences in the fasta file, exiting"
+ exit 0
+fi
+
+workdir="$PWD"
+cd $dir
+echo "file: ${inputs[0]}"
+#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1
+Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1
+
+echo "$workdir/${outID}.txt"
+
+rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
+rows=($rows)
+#unset rows[${#rows[@]}-1]
+
+cd $dir
+Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1
+cp $workdir/result.txt ${output_table}
+
+
+
+
diff -r b84477f57318 -r e7b550d52eb7 datatypes_conf.xml
--- a/datatypes_conf.xml Thu Aug 04 04:52:51 2016 -0400
+++ b/datatypes_conf.xml Tue Aug 09 07:20:41 2016 -0400
@@ -1,6 +1,6 @@
-
+
diff -r b84477f57318 -r e7b550d52eb7 merge_and_filter.r
--- a/merge_and_filter.r Thu Aug 04 04:52:51 2016 -0400
+++ b/merge_and_filter.r Tue Aug 09 07:20:41 2016 -0400
@@ -12,9 +12,10 @@
unmatchedfile = args[9]
method=args[10]
functionality=args[11]
-unique_type=args[12]
-filter_unique=args[13]
-class_filter=args[14]
+unique.type=args[12]
+filter.unique=args[13]
+class.filter=args[14]
+empty.region.filter=args[15]
summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
@@ -65,7 +66,7 @@
filtering.steps = rbind(filtering.steps, c("After productive filter", nrow(summ)))
-splt = strsplit(class_filter, "_")[[1]]
+splt = strsplit(class.filter, "_")[[1]]
chunk_hit_threshold = as.numeric(splt[1])
nt_hit_threshold = as.numeric(splt[2])
@@ -101,15 +102,15 @@
print(paste("Number of sequences after merging with hotspots:", nrow(result)))
#result$past = paste(result$AA.JUNCTION, result$VGene, result$JGene, (result$FR1.IMGT.Nb.of.mutations + result$CDR1.IMGT.Nb.of.mutations + result$FR2.IMGT.Nb.of.mutations + result$CDR2.IMGT.Nb.of.mutations + result$FR3.IMGT.Nb.of.mutations), result$best_match)
-if(unique_type == "AA.JUNCTION_V_subclass"){
+if(unique.type == "AA.JUNCTION_V_subclass"){
result$past = paste(result$AA.JUNCTION, result$VGene, result$best_match)
-} else if (unique_type == "AA.JUNCTION_subclass"){
+} else if (unique.type == "AA.JUNCTION_subclass"){
result$past = paste(result$AA.JUNCTION, result$best_match)
-} else if (unique_type == "V_subclass"){
+} else if (unique.type == "V_subclass"){
result$past = paste(result$VGene, result$best_match)
-} else if (unique_type == "AA.JUNCTION_V"){
+} else if (unique.type == "AA.JUNCTION_V"){
result$past = paste(result$AA.JUNCTION, result$VGene)
-} else if (unique_type == "AA.JUNCTION"){
+} else if (unique.type == "AA.JUNCTION"){
result$past = paste(result$AA.JUNCTION)
} else {
result$past = 1:nrow(result)
@@ -119,7 +120,7 @@
result = result[,!(names(result) %in% c("past"))]
-print(paste("Number of sequences in result after", unique_type, "filtering:", nrow(result)))
+print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))
filtering.steps = rbind(filtering.steps, c("After duplicate filter", nrow(result)))
@@ -135,10 +136,19 @@
print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == "")))
print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == "")))
-result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-
-print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
-filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result)))
+if(empty.region.filter == "FR1"){
+ result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+ print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
+ filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result)))
+} else if(empty.region.filter == "CDR1"){
+ result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+ print(paste("Number of sequences after empty FR2, CDR2 and FR3 column filter:", nrow(result)))
+ filtering.steps = rbind(filtering.steps, c("After empty FR2, CDR2, FR3 filter", nrow(result)))
+} else if(empty.region.filter == "FR2"){
+ result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+ print(paste("Number of sequences after empty CDR2 and FR3 column filter:", nrow(result)))
+ filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result)))
+}
result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
@@ -164,10 +174,10 @@
write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T)
-if(filter_unique != "no"){
+if(filter.unique != "no"){
clmns = names(result)
- if(grepl("_c", filter_unique)){
+ if(grepl("_c", filter.unique)){
result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq, result$best_match)
} else {
result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
@@ -175,7 +185,7 @@
#fltr = result$unique.def %in% result.filtered$unique.def
- if(grepl("keep", filter_unique)){
+ if(grepl("keep", filter.unique)){
result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes
result = result[!duplicated(result$unique.def),]
} else {
diff -r b84477f57318 -r e7b550d52eb7 mutation_analysis.r
--- a/mutation_analysis.r Thu Aug 04 04:52:51 2016 -0400
+++ b/mutation_analysis.r Tue Aug 09 07:20:41 2016 -0400
@@ -169,6 +169,8 @@
setwd(outputdir)
+base.order = data.frame(base=c("A", "T", "C", "G"), order=1:4)
+
calculate_result = function(i, gene, dat, matrx, f, fname, name){
tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),]
@@ -179,67 +181,67 @@
if(nrow(tmp) > 0){
- if(fname == "sum"){
+ if(fname == "sum"){
matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
matrx[1,z] = round(f(matrx[1,x] / matrx[1,y]) * 100, digits=1)
- } else {
+ } else {
matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
matrx[1,z] = round(f(tmp$VRegionMutations / tmp$VRegionNucleotides) * 100, digits=1)
- }
-
- matrx[2,x] = round(f(tmp$transitionMutations, na.rm=T), digits=1)
- matrx[2,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
- matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
-
- matrx[3,x] = round(f(tmp$transversionMutations, na.rm=T), digits=1)
- matrx[3,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
- matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
-
- matrx[4,x] = round(f(tmp$transitionMutationsAtGC, na.rm=T), digits=1)
- matrx[4,y] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
- matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
-
- matrx[5,x] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
- matrx[5,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
- matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
-
- matrx[6,x] = round(f(tmp$transitionMutationsAtAT, na.rm=T), digits=1)
- matrx[6,y] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
- matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
-
- matrx[7,x] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
- matrx[7,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
- matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1)
-
- matrx[8,x] = round(f(tmp$nonSilentMutationsFR, na.rm=T), digits=1)
- matrx[8,y] = round(f(tmp$silentMutationsFR, na.rm=T), digits=1)
- matrx[8,z] = round(matrx[8,x] / matrx[8,y], digits=1)
-
- matrx[9,x] = round(f(tmp$nonSilentMutationsCDR, na.rm=T), digits=1)
- matrx[9,y] = round(f(tmp$silentMutationsCDR, na.rm=T), digits=1)
- matrx[9,z] = round(matrx[9,x] / matrx[9,y], digits=1)
-
- if(fname == "sum"){
- matrx[10,x] = round(f(rowSums(tmp[,c("FR2.IMGT.Nb.of.nucleotides", "FR3.IMGT.Nb.of.nucleotides")], na.rm=T)), digits=1)
- matrx[10,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
- matrx[10,z] = round(matrx[10,x] / matrx[10,y], digits=1)
-
- matrx[11,x] = round(f(rowSums(tmp[,c("CDR1.IMGT.Nb.of.nucleotides", "CDR2.IMGT.Nb.of.nucleotides")], na.rm=T)), digits=1)
- matrx[11,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
- matrx[11,z] = round(matrx[11,x] / matrx[11,y], digits=1)
- }
- }
+ }
+
+ matrx[2,x] = round(f(tmp$transitionMutations, na.rm=T), digits=1)
+ matrx[2,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+ matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
+
+ matrx[3,x] = round(f(tmp$transversionMutations, na.rm=T), digits=1)
+ matrx[3,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+ matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
+
+ matrx[4,x] = round(f(tmp$transitionMutationsAtGC, na.rm=T), digits=1)
+ matrx[4,y] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
+ matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
+
+ matrx[5,x] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
+ matrx[5,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+ matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
+
+ matrx[6,x] = round(f(tmp$transitionMutationsAtAT, na.rm=T), digits=1)
+ matrx[6,y] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
+ matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
+
+ matrx[7,x] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
+ matrx[7,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+ matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1)
+
+ matrx[8,x] = round(f(tmp$nonSilentMutationsFR, na.rm=T), digits=1)
+ matrx[8,y] = round(f(tmp$silentMutationsFR, na.rm=T), digits=1)
+ matrx[8,z] = round(matrx[8,x] / matrx[8,y], digits=1)
+
+ matrx[9,x] = round(f(tmp$nonSilentMutationsCDR, na.rm=T), digits=1)
+ matrx[9,y] = round(f(tmp$silentMutationsCDR, na.rm=T), digits=1)
+ matrx[9,z] = round(matrx[9,x] / matrx[9,y], digits=1)
+
+ if(fname == "sum"){
+ matrx[10,x] = round(f(rowSums(tmp[,c("FR2.IMGT.Nb.of.nucleotides", "FR3.IMGT.Nb.of.nucleotides")], na.rm=T)), digits=1)
+ matrx[10,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
+ matrx[10,z] = round(matrx[10,x] / matrx[10,y], digits=1)
+
+ matrx[11,x] = round(f(rowSums(tmp[,c("CDR1.IMGT.Nb.of.nucleotides", "CDR2.IMGT.Nb.of.nucleotides")], na.rm=T)), digits=1)
+ matrx[11,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
+ matrx[11,z] = round(matrx[11,x] / matrx[11,y], digits=1)
+ }
+ }
- transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
- row.names(transitionTable) = c("A", "C", "G", "T")
- transitionTable["A","A"] = NA
- transitionTable["C","C"] = NA
- transitionTable["G","G"] = NA
- transitionTable["T","T"] = NA
+ transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
+ row.names(transitionTable) = c("A", "C", "G", "T")
+ transitionTable["A","A"] = NA
+ transitionTable["C","C"] = NA
+ transitionTable["G","G"] = NA
+ transitionTable["T","T"] = NA
- if(nrow(tmp) > 0){
+ if(nrow(tmp) > 0){
for(nt1 in nts){
for(nt2 in nts){
if(nt1 == nt2){
@@ -259,20 +261,40 @@
}
}
}
- }
-
-
- print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep=""))
-
- write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
- write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-
- cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep=""))
- cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep=""))
-
- print(paste(fname, name, nrow(tmp)))
-
- matrx
+ transition = transitionTable
+ transition$id = names(transition)
+
+ transition2 = melt(transition, id.vars="id")
+
+ transition2 = merge(transition2, base.order, by.x="id", by.y="base")
+ transition2 = merge(transition2, base.order, by.x="variable", by.y="base")
+
+ transition2[is.na(transition2$value),]$value = 0
+
+ png(filename=paste("transitions_stacked_", name, ".png", sep=""))
+ p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity") #stacked bar
+ p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
+ print(p)
+ dev.off()
+
+ png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
+ p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value), colour="white") + scale_fill_gradient(low="white", high="steelblue") #heatmap
+ p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base")
+ print(p)
+ dev.off()
+ }
+
+ #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep=""))
+
+ write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
+ write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+
+ cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep=""))
+ cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep=""))
+
+ #print(paste(fname, name, nrow(tmp)))
+
+ matrx
}
nts = c("a", "c", "g", "t")
@@ -322,12 +344,6 @@
write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F)
-
-
-if (!("ggplot2" %in% rownames(installed.packages()))) {
- install.packages("ggplot2", repos="http://cran.xl-mirror.nl/")
-}
-
dat = dat[!grepl("^unmatched", dat$best_match),]
#blegh
diff -r b84477f57318 -r e7b550d52eb7 mutation_analysis.xml
--- a/mutation_analysis.xml Thu Aug 04 04:52:51 2016 -0400
+++ b/mutation_analysis.xml Tue Aug 09 07:20:41 2016 -0400
@@ -1,7 +1,7 @@
- wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter
+ wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter
@@ -28,19 +28,24 @@
Top.V.Gene, CDR3.Seq, C region
- CDR3.Seq + C region
- CDR3.seq + Top.V.Gene
+ CDR3.Seq, C region
+ CDR3.seq, Top.V.Gene
CDR3.seq
Don't remove duplicates
>70% class and >70% subclass
- >60% class and >55% subclass
+ >60% class and >55% subclass
>70% class
>60% class
+
+ FR1 : exclude empty CDR1,FR2,CDR2,FR3
+ CDR1: exclude empty FR2,CDR2,FR3
+ FR2: exclude empty,CDR2,FR3
+
-
+
Yes
No
@@ -48,13 +53,13 @@
-
+
naive_output_cond['naive_output'] == "yes"
-
+
naive_output_cond['naive_output'] == "yes"
-
+
naive_output_cond['naive_output'] == "yes"
diff -r b84477f57318 -r e7b550d52eb7 naive_output.r
--- a/naive_output.r Thu Aug 04 04:52:51 2016 -0400
+++ b/naive_output.r Tue Aug 09 07:20:41 2016 -0400
@@ -43,9 +43,3 @@
write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T)
write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T)
-
-
-
-
-
-
diff -r b84477f57318 -r e7b550d52eb7 new_imgt.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/new_imgt.r Tue Aug 09 07:20:41 2016 -0400
@@ -0,0 +1,27 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+imgt.dir = args[1]
+merged.file = args[2]
+gene = args[3]
+
+merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F)
+
+if(gene != "-"){
+ merged = merged[grepl(gene, merged$best_match),]
+}
+
+merged = merged[!grepl("unmatched", merged$best_match),]
+
+for(f in list.files(imgt.dir, pattern="*.txt$")){
+ #print(paste("filtering", f))
+ path = paste(imgt.dir, f, sep="")
+ dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE)
+
+ dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
+
+ if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
+ dat[,grepl("^FR1", names(dat))] = 0
+ }
+
+ write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
+}
diff -r b84477f57318 -r e7b550d52eb7 tmp/IgAT.xlsm
Binary file tmp/IgAT.xlsm has changed
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/Baseline_Functions.r
--- a/tmp/baseline/Baseline_Functions.r Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,2287 +0,0 @@
-#########################################################################################
-# License Agreement
-#
-# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
-# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
-# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
-# OR COPYRIGHT LAW IS PROHIBITED.
-#
-# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
-# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
-# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
-# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
-#
-# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
-# Coded by: Mohamed Uduman & Gur Yaari
-# Copyright 2012 Kleinstein Lab
-# Version: 1.3 (01/23/2014)
-#########################################################################################
-
-# Global variables
-
- FILTER_BY_MUTATIONS = 1000
-
- # Nucleotides
- NUCLEOTIDES = c("A","C","G","T")
-
- # Amino Acids
- AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
- names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
- names(AMINO_ACIDS) <- names(AMINO_ACIDS)
-
- #Amino Acid Traits
- #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
- #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface")
- TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
- names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
- TRAITS_AMINO_ACIDS <- array(NA,21)
-
- # Codon Table
- CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))
-
- # Substitution Model: Smith DS et al. 1996
- substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
- substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
- substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
- load("FiveS_Substitution.RData")
-
- # Mutability Models: Shapiro GS et al. 2002
- triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
- triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
- triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
- load("FiveS_Mutability.RData")
-
-# Functions
-
- # Translate codon to amino acid
- translateCodonToAminoAcid<-function(Codon){
- return(AMINO_ACIDS[Codon])
- }
-
- # Translate amino acid to trait change
- translateAminoAcidToTraitChange<-function(AminoAcid){
- return(TRAITS_AMINO_ACIDS[AminoAcid])
- }
-
- # Initialize Amino Acid Trait Changes
- initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
- if(!is.null(traitChangeFileName)){
- tryCatch(
- traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
- , error = function(ex){
- cat("Error|Error reading trait changes. Please check file name/path and format.\n")
- q()
- }
- )
- }else{
- traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
- }
- TRAITS_AMINO_ACIDS <<- traitChange
- }
-
- # Read in formatted nucleotide substitution matrix
- initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
- if(!is.null(subsMatFileName)){
- tryCatch(
- subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
- , error = function(ex){
- cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
- q()
- }
- )
- if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
- }else{
- if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
- if(substitutionModel==2)subsMat <- substitution_Flu_Human
- if(substitutionModel==3)subsMat <- substitution_Flu25_Human
-
- }
-
- if(substitutionModel==0){
- subsMat <- matrix(1,4,4)
- subsMat[,] = 1/3
- subsMat[1,1] = 0
- subsMat[2,2] = 0
- subsMat[3,3] = 0
- subsMat[4,4] = 0
- }
-
-
- NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
- if(substitutionModel==5){
- subsMat <- FiveS_Substitution
- return(subsMat)
- }else{
- subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
- return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
- }
- }
-
-
- # Read in formatted Mutability file
- initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
- if(!is.null(mutabilityMatFileName)){
- tryCatch(
- mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
- , error = function(ex){
- cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
- q()
- }
- )
- }else{
- mutabilityMat <- triMutability_Literature_Human
- if(species==2) mutabilityMat <- triMutability_Literature_Mouse
- }
-
- if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}
-
- if(mutabilityModel==5){
- mutabilityMat <- FiveS_Mutability
- return(mutabilityMat)
- }else{
- return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
- }
- }
-
- # Read FASTA file formats
- # Modified from read.fasta from the seqinR package
- baseline.read.fasta <-
- function (file = system.file("sequences/sample.fasta", package = "seqinr"),
- seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE,
- set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE,
- strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong,
- endian = .Platform$endian, apply.mask = TRUE)
- {
- seqtype <- match.arg(seqtype)
-
- lines <- readLines(file)
-
- if (legacy.mode) {
- comments <- grep("^;", lines)
- if (length(comments) > 0)
- lines <- lines[-comments]
- }
-
-
- ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
- lines_mod<-lines
-
- if(!length(ind_groups)){
- lines_mod<-c(">>>All sequences combined",lines)
- }
-
- ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")
-
- lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
- id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
- lines[id] <- "THIS IS A FAKE SEQUENCE"
- lines[-id] <- lines_mod
- rm(lines_mod)
-
- ind <- which(substr(lines, 1L, 1L) == ">")
- nseq <- length(ind)
- if (nseq == 0) {
- stop("no line starting with a > character found")
- }
- start <- ind + 1
- end <- ind - 1
-
- while( any(which(ind%in%end)) ){
- ind=ind[-which(ind%in%end)]
- nseq <- length(ind)
- if (nseq == 0) {
- stop("no line starting with a > character found")
- }
- start <- ind + 1
- end <- ind - 1
- }
-
- end <- c(end[-1], length(lines))
- sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
- if (seqonly)
- return(sequences)
- nomseq <- lapply(seq_len(nseq), function(i) {
-
- #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
- substr(lines[ind[i]], 2, nchar(lines[ind[i]]))
-
- })
- if (seqtype == "DNA") {
- if (forceDNAtolower) {
- sequences <- as.list(tolower(chartr(".","-",sequences)))
- }else{
- sequences <- as.list(toupper(chartr(".","-",sequences)))
- }
- }
- if (as.string == FALSE)
- sequences <- lapply(sequences, s2c)
- if (set.attributes) {
- for (i in seq_len(nseq)) {
- Annot <- lines[ind[i]]
- if (strip.desc)
- Annot <- substr(Annot, 2L, nchar(Annot))
- attributes(sequences[[i]]) <- list(name = nomseq[[i]],
- Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA",
- DNA = "SeqFastadna"))
- }
- }
- names(sequences) <- nomseq
- return(sequences)
- }
-
-
- # Replaces non FASTA characters in input files with N
- replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
- gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
- }
-
- # Find the germlines in the FASTA list
- germlinesInFile <- function(seqIDs){
- firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
- secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
- return(firstChar==">" & secondChar!=">")
- }
-
- # Find the groups in the FASTA list
- groupsInFile <- function(seqIDs){
- sapply(seqIDs,function(x){substr(x,1,2)})==">>"
- }
-
- # In the process of finding germlines/groups, expand from the start to end of the group
- expandTillNext <- function(vecPosToID){
- IDs = names(vecPosToID)
- posOfInterests = which(vecPosToID)
-
- expandedID = rep(NA,length(IDs))
- expandedIDNames = gsub(">","",IDs[posOfInterests])
- startIndexes = c(1,posOfInterests[-1])
- stopIndexes = c(posOfInterests[-1]-1,length(IDs))
- expandedID = unlist(sapply(1:length(startIndexes),function(i){
- rep(i,stopIndexes[i]-startIndexes[i]+1)
- }))
- names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
- rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
- }))
- return(expandedID)
- }
-
- # Process FASTA (list) to return a matrix[input, germline)
- processInputAdvanced <- function(inputFASTA){
-
- seqIDs = names(inputFASTA)
- numbSeqs = length(seqIDs)
- posGermlines1 = germlinesInFile(seqIDs)
- numbGermlines = sum(posGermlines1)
- posGroups1 = groupsInFile(seqIDs)
- numbGroups = sum(posGroups1)
- consDef = NA
-
- if(numbGermlines==0){
- posGermlines = 2
- numbGermlines = 1
- }
-
- glPositionsSum = cumsum(posGermlines1)
- glPositions = table(glPositionsSum)
- #Find the position of the conservation row
- consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1
- if( length(consDefPos)> 0 ){
- consDefID = match(consDefPos, glPositionsSum)
- #The coservation rows need to be pulled out and stores seperately
- consDef = inputFASTA[consDefID]
- inputFASTA = inputFASTA[-consDefID]
-
- seqIDs = names(inputFASTA)
- numbSeqs = length(seqIDs)
- posGermlines1 = germlinesInFile(seqIDs)
- numbGermlines = sum(posGermlines1)
- posGroups1 = groupsInFile(seqIDs)
- numbGroups = sum(posGroups1)
- if(numbGermlines==0){
- posGermlines = 2
- numbGermlines = 1
- }
- }
-
- posGroups <- expandTillNext(posGroups1)
- posGermlines <- expandTillNext(posGermlines1)
- posGermlines[posGroups1] = 0
- names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
- posInput = rep(TRUE,numbSeqs)
- posInput[posGroups1 | posGermlines1] = FALSE
-
- matInput = matrix(NA, nrow=sum(posInput), ncol=2)
- rownames(matInput) = seqIDs[posInput]
- colnames(matInput) = c("Input","Germline")
-
- vecInputFASTA = unlist(inputFASTA)
- matInput[,1] = vecInputFASTA[posInput]
- matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]
-
- germlines = posGermlines[posInput]
- groups = posGroups[posInput]
-
- return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))
- }
-
-
- # Replace leading and trailing dashes in the sequence
- replaceLeadingTrailingDashes <- function(x,readEnd){
- iiGap = unlist(gregexpr("-",x[1]))
- ggGap = unlist(gregexpr("-",x[2]))
- #posToChange = intersect(iiGap,ggGap)
-
-
- seqIn = replaceLeadingTrailingDashesHelper(x[1])
- seqGL = replaceLeadingTrailingDashesHelper(x[2])
- seqTemplate = rep('N',readEnd)
- seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
- seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
-# if(posToChange!=-1){
-# seqIn[posToChange] = "-"
-# seqGL[posToChange] = "-"
-# }
-
- seqIn = c2s(seqIn[1:readEnd])
- seqGL = c2s(seqGL[1:readEnd])
-
- lenGL = nchar(seqGL)
- if(lenGL seqLen )
- trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )
-
- return(trimmedSeq)
- }
-
- # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
- # e.g. nuc 86 is part of nucs 85,86,87
- getCodonPos <- function(nucPos){
- codonNum = (ceiling(nucPos/3))*3
- return( (codonNum-2):codonNum)
- }
-
- # Given a nuclotide position, returns the codon number
- # e.g. nuc 86 = codon 29
- getCodonNumb <- function(nucPos){
- return( ceiling(nucPos/3) )
- }
-
- # Given a codon, returns all the nuc positions that make the codon
- getCodonNucs <- function(codonNumb){
- getCodonPos(codonNumb*3)
- }
-
- computeCodonTable <- function(testID=1){
-
- if(testID<=4){
- # Pre-compute every codons
- intCounter = 1
- for(pOne in NUCLEOTIDES){
- for(pTwo in NUCLEOTIDES){
- for(pThree in NUCLEOTIDES){
- codon = paste(pOne,pTwo,pThree,sep="")
- colnames(CODON_TABLE)[intCounter] = codon
- intCounter = intCounter + 1
- CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
- }
- }
- }
- chars = c("N","A","C","G","T", "-")
- for(a in chars){
- for(b in chars){
- for(c in chars){
- if(a=="N" | b=="N" | c=="N"){
- #cat(paste(a,b,c),sep="","\n")
- CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
- }
- }
- }
- }
-
- chars = c("-","A","C","G","T")
- for(a in chars){
- for(b in chars){
- for(c in chars){
- if(a=="-" | b=="-" | c=="-"){
- #cat(paste(a,b,c),sep="","\n")
- CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
- }
- }
- }
- }
- CODON_TABLE <<- as.matrix(CODON_TABLE)
- }
- }
-
- collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
- #print(length(vecInputSeqs))
- vecInputSeqs = unique(vecInputSeqs)
- if(length(vecInputSeqs)==1){
- return( list( c(vecInputSeqs,glSeq), F) )
- }else{
- charInputSeqs <- sapply(vecInputSeqs, function(x){
- s2c(x)[1:readEnd]
- })
- charGLSeq <- s2c(glSeq)
- matClone <- sapply(1:readEnd, function(i){
- posNucs = unique(charInputSeqs[i,])
- posGL = charGLSeq[i]
- error = FALSE
- if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
- return(c("-",error))
- }
- if(length(posNucs)==1)
- return(c(posNucs[1],error))
- else{
- if("N"%in%posNucs){
- error=TRUE
- }
- if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
- return( c(posGL,error) )
- }else{
- #return( c(sample(posNucs[posNucs!="N"],1),error) )
- if(nonTerminalOnly==0){
- return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )
- }else{
- posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
- posNucsTable = table(posNucs)
- if(sum(posNucsTable>1)==0){
- return( c(posGL,error) )
- }else{
- return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
- }
- }
-
- }
- }
- })
-
-
- #print(length(vecInputSeqs))
- return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
- }
- }
-
- # Compute the expected for each sequence-germline pair
- getExpectedIndividual <- function(matInput){
- if( any(grep("multicore",search())) ){
- facGL <- factor(matInput[,2])
- facLevels = levels(facGL)
- LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){
- computeMutabilities(facLevels[x])
- })
- facIndex = match(facGL,facLevels)
-
- LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){
- cInput = rep(NA,nchar(matInput[x,1]))
- cInput[s2c(matInput[x,1])!="N"] = 1
- LisGLs_MutabilityU[[facIndex[x]]] * cInput
- })
-
- LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){
- computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
- })
-
- LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){
- #print(x)
- computeMutationTypes(matInput[x,2])
- })
-
- LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){
- computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
- })
-
- ul_LisGLs_Exp = unlist(LisGLs_Exp)
- return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
- }else{
- facGL <- factor(matInput[,2])
- facLevels = levels(facGL)
- LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){
- computeMutabilities(facLevels[x])
- })
- facIndex = match(facGL,facLevels)
-
- LisGLs_Mutability = lapply(1:nrow(matInput), function(x){
- cInput = rep(NA,nchar(matInput[x,1]))
- cInput[s2c(matInput[x,1])!="N"] = 1
- LisGLs_MutabilityU[[facIndex[x]]] * cInput
- })
-
- LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){
- computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
- })
-
- LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){
- #print(x)
- computeMutationTypes(matInput[x,2])
- })
-
- LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){
- computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
- })
-
- ul_LisGLs_Exp = unlist(LisGLs_Exp)
- return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
-
- }
- }
-
- # Compute mutabilities of sequence based on the tri-nucleotide model
- computeMutabilities <- function(paramSeq){
- seqLen = nchar(paramSeq)
- seqMutabilites = rep(NA,seqLen)
-
- gaplessSeq = gsub("-", "", paramSeq)
- gaplessSeqLen = nchar(gaplessSeq)
- gaplessSeqMutabilites = rep(NA,gaplessSeqLen)
-
- if(mutabilityModel!=5){
- pos<- 3:(gaplessSeqLen)
- subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
- gaplessSeqMutabilites[pos] =
- tapply( c(
- getMutability( substr(subSeq,1,3), 3) ,
- getMutability( substr(subSeq,2,4), 2),
- getMutability( substr(subSeq,3,5), 1)
- ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
- )
- #Pos 1
- subSeq = substr(gaplessSeq,1,3)
- gaplessSeqMutabilites[1] = getMutability(subSeq , 1)
- #Pos 2
- subSeq = substr(gaplessSeq,1,4)
- gaplessSeqMutabilites[2] = mean( c(
- getMutability( substr(subSeq,1,3), 2) ,
- getMutability( substr(subSeq,2,4), 1)
- ),na.rm=T
- )
- seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
- return(seqMutabilites)
- }else{
-
- pos<- 3:(gaplessSeqLen)
- subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
- gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
- seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
- return(seqMutabilites)
- }
-
- }
-
- # Returns the mutability of a triplet at a given position
- getMutability <- function(codon, pos=1:3){
- triplets <- rownames(mutability)
- mutability[ match(codon,triplets) ,pos]
- }
-
- getMutability5 <- function(fivemer){
- return(mutability[fivemer])
- }
-
- # Returns the substitution probabilty
- getTransistionProb <- function(nuc){
- substitution[nuc,]
- }
-
- getTransistionProb5 <- function(fivemer){
- if(any(which(fivemer==colnames(substitution)))){
- return(substitution[,fivemer])
- }else{
- return(array(NA,4))
- }
- }
-
- # Given a nuc, returns the other 3 nucs it can mutate to
- canMutateTo <- function(nuc){
- NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
- }
-
- # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to
- canMutateToProb <- function(nuc){
- substitution[nuc,canMutateTo(nuc)]
- }
-
- # Compute targeting, based on precomputed mutatbility & substitution
- computeTargeting <- function(param_strSeq,param_vecMutabilities){
-
- if(substitutionModel!=5){
- vecSeq = s2c(param_strSeq)
- matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )
- #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
- dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq)))
- return (matTargeting)
- }else{
-
- seqLen = nchar(param_strSeq)
- seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
- paramSeq <- param_strSeq
- gaplessSeq = gsub("-", "", paramSeq)
- gaplessSeqLen = nchar(gaplessSeq)
- gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4)
-
- pos<- 3:(gaplessSeqLen)
- subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))
- gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
- seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
- #matTargeting <- param_vecMutabilities %*% seqsubstitution
- matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
- dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen))
- return (matTargeting)
- }
- }
-
- # Compute the mutations types
- computeMutationTypes <- function(param_strSeq){
- #cat(param_strSeq,"\n")
- #vecSeq = trimToLastCodon(param_strSeq)
- lenSeq = nchar(param_strSeq)
- vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
- matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
- dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
- return(matMutationTypes)
- }
- computeMutationTypesFast <- function(param_strSeq){
- matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
- #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq)))
- return(matMutationTypes)
- }
- mutationTypeOptimized <- function( matOfCodons ){
- apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } )
- }
-
- # Returns a vector of codons 1 mutation away from the given codon
- permutateAllCodon <- function(codon){
- cCodon = s2c(codon)
- matCodons = t(array(cCodon,dim=c(3,12)))
- matCodons[1:4,1] = NUCLEOTIDES
- matCodons[5:8,2] = NUCLEOTIDES
- matCodons[9:12,3] = NUCLEOTIDES
- apply(matCodons,1,c2s)
- }
-
- # Given two codons, tells you if the mutation is R or S (based on your definition)
- mutationType <- function(codonFrom,codonTo){
- if(testID==4){
- if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
- return(NA)
- }else{
- mutationType = "S"
- if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
- mutationType = "R"
- }
- if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
- mutationType = "Stop"
- }
- return(mutationType)
- }
- }else if(testID==5){
- if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
- return(NA)
- }else{
- if(codonFrom==codonTo){
- mutationType = "S"
- }else{
- codonFrom = s2c(codonFrom)
- codonTo = s2c(codonTo)
- mutationType = "Stop"
- nucOfI = codonFrom[which(codonTo!=codonFrom)]
- if(nucOfI=="C"){
- mutationType = "R"
- }else if(nucOfI=="G"){
- mutationType = "S"
- }
- }
- return(mutationType)
- }
- }else{
- if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
- return(NA)
- }else{
- mutationType = "S"
- if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
- mutationType = "R"
- }
- if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
- mutationType = "Stop"
- }
- return(mutationType)
- }
- }
- }
-
-
- #given a mat of targeting & it's corresponding mutationtypes returns
- #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
- computeExpected <- function(paramTargeting,paramMutationTypes){
- # Replacements
- RPos = which(paramMutationTypes=="R")
- #FWR
- Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
- #CDR
- Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
- # Silents
- SPos = which(paramMutationTypes=="S")
- #FWR
- Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
- #CDR
- Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)
-
- return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
- }
-
- # Count the mutations in a sequence
- # each mutation is treated independently
- analyzeMutations2NucUri_website <- function( rev_in_matrix ){
- paramGL = rev_in_matrix[2,]
- paramSeq = rev_in_matrix[1,]
-
- #Fill seq with GL seq if gapped
- #if( any(paramSeq=="-") ){
- # gapPos_Seq = which(paramSeq=="-")
- # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
- # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
- #}
-
-
- #if( any(paramSeq=="N") ){
- # gapPos_Seq = which(paramSeq=="N")
- # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
- # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
- #}
-
- analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) )
-
- }
-
- #1 = GL
- #2 = Seq
- analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
- paramGL = in_matrix[2,]
- paramSeq = in_matrix[1,]
- paramSeqUri = paramGL
- #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
- mutations_val = paramGL != paramSeq
- if(any(mutations_val)){
- mutationPos = {1:length(mutations_val)}[mutations_val]
- mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
- length_mutations =length(mutationPos)
- mutationInfo = rep(NA,length_mutations)
- if(any(mutationPos)){
-
- pos<- mutationPos
- pos_array<-array(sapply(pos,getCodonPos))
- codonGL = paramGL[pos_array]
-
- codonSeq = sapply(pos,function(x){
- seqP = paramGL[getCodonPos(x)]
- muCodonPos = {x-1}%%3+1
- seqP[muCodonPos] = paramSeq[x]
- return(seqP)
- })
- GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
- Seqcodons = apply(codonSeq,2,c2s)
- mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
- names(mutationInfo) = mutationPos
- }
- if(any(!is.na(mutationInfo))){
- return(mutationInfo[!is.na(mutationInfo)])
- }else{
- return(NA)
- }
-
-
- }else{
- return (NA)
- }
- }
-
- processNucMutations2 <- function(mu){
- if(!is.na(mu)){
- #R
- if(any(mu=="R")){
- Rs = mu[mu=="R"]
- nucNumbs = as.numeric(names(Rs))
- R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
- R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
- }else{
- R_CDR = 0
- R_FWR = 0
- }
-
- #S
- if(any(mu=="S")){
- Ss = mu[mu=="S"]
- nucNumbs = as.numeric(names(Ss))
- S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
- S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)
- }else{
- S_CDR = 0
- S_FWR = 0
- }
-
-
- retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
- retVec[is.na(retVec)]=0
- return(retVec)
- }else{
- return(rep(0,4))
- }
- }
-
-
- ## Z-score Test
- computeZScore <- function(mat, test="Focused"){
- matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
- if(test=="Focused"){
- #Z_Focused_CDR
- #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
- P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
- R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
- R_sd=sqrt(R_mean*(1-P))
- matRes[,1] = (mat[,1]-R_mean)/R_sd
-
- #Z_Focused_FWR
- #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
- P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
- R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
- R_sd=sqrt(R_mean*(1-P))
- matRes[,2] = (mat[,3]-R_mean)/R_sd
- }
-
- if(test=="Local"){
- #Z_Focused_CDR
- #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
- P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
- R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
- R_sd=sqrt(R_mean*(1-P))
- matRes[,1] = (mat[,1]-R_mean)/R_sd
-
- #Z_Focused_FWR
- #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
- P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
- R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
- R_sd=sqrt(R_mean*(1-P))
- matRes[,2] = (mat[,3]-R_mean)/R_sd
- }
-
- if(test=="Imbalanced"){
- #Z_Focused_CDR
- #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
- P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
- R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
- R_sd=sqrt(R_mean*(1-P))
- matRes[,1] = (mat[,1]-R_mean)/R_sd
-
- #Z_Focused_FWR
- #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
- P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
- R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
- R_sd=sqrt(R_mean*(1-P))
- matRes[,2] = (mat[,3]-R_mean)/R_sd
- }
-
- matRes[is.nan(matRes)] = NA
- return(matRes)
- }
-
- # Return a p-value for a z-score
- z2p <- function(z){
- p=NA
- if( !is.nan(z) && !is.na(z)){
- if(z>0){
- p = (1 - pnorm(z,0,1))
- } else if(z<0){
- p = (-1 * pnorm(z,0,1))
- } else{
- p = 0.5
- }
- }else{
- p = NA
- }
- return(p)
- }
-
-
- ## Bayesian Test
-
- # Fitted parameter for the bayesian framework
-BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
- CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))
-
- # Given x, M & p, returns a pdf
- calculate_bayes <- function ( x=3, N=10, p=0.33,
- i=CONST_i,
- max_sigma=20,length_sigma=4001
- ){
- if(!0%in%N){
- G <- max(length(x),length(N),length(p))
- x=array(x,dim=G)
- N=array(N,dim=G)
- p=array(p,dim=G)
- sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
- sigma_1<-log({i/{1-i}}/{p/{1-p}})
- index<-min(N,60)
- y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
- if(!sum(is.na(y))){
- tmp<-approx(sigma_1,y,sigma_s)$y
- tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
- }else{
- return(NA)
- }
- }else{
- return(NA)
- }
- }
- # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
- computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
- flagOneSeq = F
- if(nrow(mat)==1){
- mat=rbind(mat,mat)
- flagOneSeq = T
- }
- if(test=="Focused"){
- #CDR
- P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
- N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
- X = c(mat[,1],0)
- bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesCDR = bayesCDR[-length(bayesCDR)]
-
- #FWR
- P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
- N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
- X = c(mat[,3],0)
- bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesFWR = bayesFWR[-length(bayesFWR)]
- }
-
- if(test=="Local"){
- #CDR
- P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
- N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
- X = c(mat[,1],0)
- bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesCDR = bayesCDR[-length(bayesCDR)]
-
- #FWR
- P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
- N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
- X = c(mat[,3],0)
- bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesFWR = bayesFWR[-length(bayesFWR)]
- }
-
- if(test=="Imbalanced"){
- #CDR
- P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
- N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
- X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
- bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesCDR = bayesCDR[-length(bayesCDR)]
-
- #FWR
- P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
- N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
- X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
- bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesFWR = bayesFWR[-length(bayesFWR)]
- }
-
- if(test=="ImbalancedSilent"){
- #CDR
- P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
- N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
- X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
- bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesCDR = bayesCDR[-length(bayesCDR)]
-
- #FWR
- P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
- N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
- X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
- bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})
- bayesFWR = bayesFWR[-length(bayesFWR)]
- }
-
- if(flagOneSeq==T){
- bayesCDR = bayesCDR[1]
- bayesFWR = bayesFWR[1]
- }
- return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
- }
-
- ##Covolution
- break2chunks<-function(G=1000){
- base<-2^round(log(sqrt(G),2),0)
- return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
- }
-
- PowersOfTwo <- function(G=100){
- exponents <- array()
- i = 0
- while(G > 0){
- i=i+1
- exponents[i] <- floor( log2(G) )
- G <- G-2^exponents[i]
- }
- return(exponents)
- }
-
- convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
- G = ncol(cons)
- if(G>1){
- for(gen in log(G,2):1){
- ll<-seq(from=2,to=2^gen,by=2)
- sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
- }
- }
- return( cons[,1] )
- }
-
- convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
- if(length(ncol(cons))) G<-ncol(cons)
- groups <- PowersOfTwo(G)
- matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
- startIndex = 1
- for( i in 1:length(groups) ){
- stopIndex <- 2^groups[i] + startIndex - 1
- if(stopIndex!=startIndex){
- matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
- startIndex = stopIndex + 1
- }
- else {
- if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
- else matG[,i] <- cons
- #startIndex = stopIndex + 1
- }
- }
- return( list( matG, groups ) )
- }
-
- weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
- lx<-length(x)
- ly<-length(y)
- if({lx1){
- while( i1 & Length_Postrior<=Threshold){
- cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
- listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
- y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
- return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
- }else if(Length_Postrior==1) return(listPosteriors[[1]])
- else if(Length_Postrior==0) return(NA)
- else {
- cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
- y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
- return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
- }
- }
-
- fastConv<-function(cons, max_sigma=20, length_sigma=4001){
- chunks<-break2chunks(G=ncol(cons))
- if(ncol(cons)==3) chunks<-2:1
- index_chunks_end <- cumsum(chunks)
- index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
- index_chunks <- cbind(index_chunks_start,index_chunks_end)
-
- case <- sum(chunks!=chunks[1])
- if(case==1) End <- max(1,((length(index_chunks)/2)-1))
- else End <- max(1,((length(index_chunks)/2)))
-
- firsts <- sapply(1:End,function(i){
- indexes<-index_chunks[i,1]:index_chunks[i,2]
- convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
- })
- if(case==0){
- result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
- }else if(case==1){
- last<-list(calculate_bayesGHelper(
- convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
- ),0)
- result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
- result<-calculate_bayesGHelper(
- list(
- cbind(
- result_first,last[[1]]),
- c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
- )
- )
- }
- return(as.vector(result))
- }
-
- # Computes the 95% CI for a pdf
- calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
- if(length(Pdf)!=length_sigma) return(NA)
- sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
- cdf = cumsum(Pdf)
- cdf = cdf/cdf[length(cdf)]
- return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) )
- }
-
- # Computes a mean for a pdf
- calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
- if(length(Pdf)!=length_sigma) return(NA)
- sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
- norm = {length_sigma-1}/2/max_sigma
- return( (Pdf%*%sigma_s/norm) )
- }
-
- # Returns the mean, and the 95% CI for a pdf
- calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
- if(is.na(Pdf))
- return(rep(NA,3))
- bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
- bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
- return(c(bMean, bCI))
- }
-
- # Computes the p-value of a pdf
- computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
- if(length(Pdf)>1){
- norm = {length_sigma-1}/2/max_sigma
- pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
- if(pVal>0.5){
- pVal = pVal-1
- }
- return(pVal)
- }else{
- return(NA)
- }
- }
-
- # Compute p-value of two distributions
- compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
- #print(c(length(dens1),length(dens2)))
- if(length(dens1)>1 & length(dens2)>1 ){
- dens1<-dens1/sum(dens1)
- dens2<-dens2/sum(dens2)
- cum2 <- cumsum(dens2)-dens2/2
- tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
- #print(tmp)
- if(tmp>0.5)tmp<-tmp-1
- return( tmp )
- }
- else {
- return(NA)
- }
- #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
- }
-
- # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
- numberOfSeqsWithMutations <- function(matMutations,test=1){
- if(test==4)test=2
- cdrSeqs <- 0
- fwrSeqs <- 0
- if(test==1){#focused
- cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
- fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
- if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
- if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
- }
- if(test==2){#local
- cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
- fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
- if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
- if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0)
- }
- return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
-}
-
-
-
-shadeColor <- function(sigmaVal=NA,pVal=NA){
- if(is.na(sigmaVal) & is.na(pVal)) return(NA)
- if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
- if(is.na(pVal) || pVal==1 || pVal==0){
- returnColor = "#FFFFFF";
- }else{
- colVal=abs(pVal);
-
- if(sigmaVal<0){
- if(colVal>0.1)
- returnColor = "#CCFFCC";
- if(colVal<=0.1)
- returnColor = "#99FF99";
- if(colVal<=0.050)
- returnColor = "#66FF66";
- if(colVal<=0.010)
- returnColor = "#33FF33";
- if(colVal<=0.005)
- returnColor = "#00FF00";
-
- }else{
- if(colVal>0.1)
- returnColor = "#FFCCCC";
- if(colVal<=0.1)
- returnColor = "#FF9999";
- if(colVal<=0.05)
- returnColor = "#FF6666";
- if(colVal<=0.01)
- returnColor = "#FF3333";
- if(colVal<0.005)
- returnColor = "#FF0000";
- }
- }
-
- return(returnColor)
-}
-
-
-
-plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
- if(!log){
- x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
- y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
- }else {
- if(log==2){
- x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
- y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
- }
- if(log==1){
- x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
- y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
- }
- if(log==3){
- x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
- y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
- }
- }
- return(c("x"=x,"y"=y))
-}
-
-# SHMulation
-
- # Based on targeting, introduce a single mutation & then update the targeting
- oneMutation <- function(){
- # Pick a postion + mutation
- posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
- posNucNumb = ceiling(posMutation/4) # Nucleotide number
- posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to
-
- #mutate the simulation sequence
- seqSimVec <- s2c(seqSim)
- seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
- seqSim <<- c2s(seqSimVec)
-
- #update Mutability, Targeting & MutationsTypes
- updateMutabilityNTargeting(posNucNumb)
-
- #return(c(posNucNumb,NUCLEOTIDES[posNucKind]))
- return(posNucNumb)
- }
-
- updateMutabilityNTargeting <- function(position){
- min_i<-max((position-2),1)
- max_i<-min((position+2),nchar(seqSim))
- min_ii<-min(min_i,3)
-
- #mutability - update locally
- seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]
-
-
- #targeting - compute locally
- seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])
- seqTargeting[is.na(seqTargeting)] <<- 0
- #mutCodonPos = getCodonPos(position)
- mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
- #cat(mutCodonPos,"\n")
- mutTypeCodon = getCodonPos(position)
- seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) )
- # Stop = 0
- if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
- seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
- }
-
-
- #Selection
- selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))
- # CDR
- selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
- seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR)
- seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K
-
- # FWR
- selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
- seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR)
- seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K
-
- }
-
-
-
- # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.
- validateMutation <- function(){
- if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
- uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
- mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos
- }else{
- if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
- mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)]
- uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1
- }
- }
- }
-
-
-
- # Places text (labels) at normalized coordinates
- myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
- par(xpd=TRUE)
- if(!log)
- text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
- else {
- if(log==2)
- text(
- par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
- 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
- w,cex=cex,adj=adj,col=thecol)
- if(log==1)
- text(
- 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
- par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
- w,cex=cex,adj=adj,col=thecol)
- if(log==3)
- text(
- 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
- 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
- w,cex=cex,adj=adj,col=thecol)
- }
- par(xpd=FALSE)
- }
-
-
-
- # Count the mutations in a sequence
- analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){
-
- paramGL = s2c(matInput[inputMatrixIndex,2])
- paramSeq = s2c(matInput[inputMatrixIndex,1])
-
- #if( any(paramSeq=="N") ){
- # gapPos_Seq = which(paramSeq=="N")
- # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
- # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
- #}
- mutations_val = paramGL != paramSeq
-
- if(any(mutations_val)){
- mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
- length_mutations =length(mutationPos)
- mutationInfo = rep(NA,length_mutations)
-
- pos<- mutationPos
- pos_array<-array(sapply(pos,getCodonPos))
- codonGL = paramGL[pos_array]
- codonSeqWhole = paramSeq[pos_array]
- codonSeq = sapply(pos,function(x){
- seqP = paramGL[getCodonPos(x)]
- muCodonPos = {x-1}%%3+1
- seqP[muCodonPos] = paramSeq[x]
- return(seqP)
- })
- GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
- SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
- Seqcodons = apply(codonSeq,2,c2s)
-
- mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
- names(mutationInfo) = mutationPos
-
- mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
- names(mutationInfoWhole) = mutationPos
-
- mutationInfo <- mutationInfo[!is.na(mutationInfo)]
- mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
-
- if(any(!is.na(mutationInfo))){
-
- #Filter based on Stop (at the codon level)
- if(seqWithStops==1){
- nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
- mutationInfo = mutationInfo[nucleotidesAtStopCodons]
- mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
- }else{
- countStops = sum(mutationInfoWhole=="Stop")
- if(seqWithStops==2 & countStops==0) mutationInfo = NA
- if(seqWithStops==3 & countStops>0) mutationInfo = NA
- }
-
- if(any(!is.na(mutationInfo))){
- #Filter mutations based on multipleMutation
- if(multipleMutation==1 & !is.na(mutationInfo)){
- mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
- tableMutationCodons <- table(mutationCodons)
- codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
- if(any(codonsWithMultipleMutations)){
- #remove the nucleotide mutations in the codons with multiple mutations
- mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
- #replace those codons with Ns in the input sequence
- paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
- matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
- }
- }
-
- #Filter mutations based on the model
- if(any(mutationInfo)==T | is.na(any(mutationInfo))){
-
- if(model==1 & !is.na(mutationInfo)){
- mutationInfo <- mutationInfo[mutationInfo=="S"]
- }
- if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
- else return(NA)
- }else{
- return(NA)
- }
- }else{
- return(NA)
- }
-
-
- }else{
- return(NA)
- }
-
-
- }else{
- return (NA)
- }
- }
-
- analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){
-
- paramGL = s2c(inputArray[2])
- paramSeq = s2c(inputArray[1])
- inputSeq <- inputArray[1]
- #if( any(paramSeq=="N") ){
- # gapPos_Seq = which(paramSeq=="N")
- # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
- # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace]
- #}
- mutations_val = paramGL != paramSeq
-
- if(any(mutations_val)){
- mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]
- length_mutations =length(mutationPos)
- mutationInfo = rep(NA,length_mutations)
-
- pos<- mutationPos
- pos_array<-array(sapply(pos,getCodonPos))
- codonGL = paramGL[pos_array]
- codonSeqWhole = paramSeq[pos_array]
- codonSeq = sapply(pos,function(x){
- seqP = paramGL[getCodonPos(x)]
- muCodonPos = {x-1}%%3+1
- seqP[muCodonPos] = paramSeq[x]
- return(seqP)
- })
- GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
- SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)
- Seqcodons = apply(codonSeq,2,c2s)
-
- mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
- names(mutationInfo) = mutationPos
-
- mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
- names(mutationInfoWhole) = mutationPos
-
- mutationInfo <- mutationInfo[!is.na(mutationInfo)]
- mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
-
- if(any(!is.na(mutationInfo))){
-
- #Filter based on Stop (at the codon level)
- if(seqWithStops==1){
- nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
- mutationInfo = mutationInfo[nucleotidesAtStopCodons]
- mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
- }else{
- countStops = sum(mutationInfoWhole=="Stop")
- if(seqWithStops==2 & countStops==0) mutationInfo = NA
- if(seqWithStops==3 & countStops>0) mutationInfo = NA
- }
-
- if(any(!is.na(mutationInfo))){
- #Filter mutations based on multipleMutation
- if(multipleMutation==1 & !is.na(mutationInfo)){
- mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
- tableMutationCodons <- table(mutationCodons)
- codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
- if(any(codonsWithMultipleMutations)){
- #remove the nucleotide mutations in the codons with multiple mutations
- mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
- #replace those codons with Ns in the input sequence
- paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
- #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
- inputSeq <- c2s(paramSeq)
- }
- }
-
- #Filter mutations based on the model
- if(any(mutationInfo)==T | is.na(any(mutationInfo))){
-
- if(model==1 & !is.na(mutationInfo)){
- mutationInfo <- mutationInfo[mutationInfo=="S"]
- }
- if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
- else return(list(NA,inputSeq))
- }else{
- return(list(NA,inputSeq))
- }
- }else{
- return(list(NA,inputSeq))
- }
-
-
- }else{
- return(list(NA,inputSeq))
- }
-
-
- }else{
- return (list(NA,inputSeq))
- }
- }
-
- # triMutability Background Count
- buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
-
- #rowOrigMatInput = matInput[inputMatrixIndex,]
- seqGL = gsub("-", "", matInput[inputMatrixIndex,2])
- seqInput = gsub("-", "", matInput[inputMatrixIndex,1])
- #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
- tempInput <- cbind(seqInput,seqGL)
- seqLength = nchar(seqGL)
- list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
- mutationCount <- list_analyzeMutationsFixed[[1]]
- seqInput <- list_analyzeMutationsFixed[[2]]
- BackgroundMatrix = mutabilityMatrix
- MutationMatrix = mutabilityMatrix
- MutationCountMatrix = mutabilityMatrix
- if(!is.na(mutationCount)){
- if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")0)) ){
-
- fivermerStartPos = 1:(seqLength-4)
- fivemerLength <- length(fivermerStartPos)
- fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
- fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
-
- #Background
- for(fivemerIndex in 1:fivemerLength){
- fivemer = fivemerGL[fivemerIndex]
- if(!any(grep("N",fivemer))){
- fivemerCodonPos = fivemerCodon(fivemerIndex)
- fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
- fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])
-
- # All mutations model
- #if(!any(grep("N",fivemerReadingFrameCodon))){
- if(model==0){
- if(stopMutations==0){
- if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
- BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)
- }else{
- if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
- positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
- BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
- }
- }
- }else{ # Only silent mutations
- if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
- positionWithinCodon = which(fivemerCodonPos==3)
- BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
- }
- }
- #}
- }
- }
-
- #Mutations
- if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
- if(model==1) mutationCount = mutationCount[mutationCount=="S"]
- mutationPositions = as.numeric(names(mutationCount))
- mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
- mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
- countMutations = 0
- for(mutationPosition in mutationPositions){
- fivemerIndex = mutationPosition-2
- fivemer = fivemerSeq[fivemerIndex]
- GLfivemer = fivemerGL[fivemerIndex]
- fivemerCodonPos = fivemerCodon(fivemerIndex)
- fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3])
- fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
- if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
- if(model==0){
- countMutations = countMutations + 1
- MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
- MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
- }else{
- if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
- countMutations = countMutations + 1
- positionWithinCodon = which(fivemerCodonPos==3)
- glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
- inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
- MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
- MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)
- }
- }
- }
- }
-
- seqMutability = MutationMatrix/BackgroundMatrix
- seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
- #cat(inputMatrixIndex,"\t",countMutations,"\n")
- return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
-
- }
- }
-
- }
-
- #Returns the codon position containing the middle nucleotide
- fivemerCodon <- function(fivemerIndex){
- codonPos = list(2:4,1:3,3:5)
- fivemerType = fivemerIndex%%3
- return(codonPos[[fivemerType+1]])
- }
-
- #returns probability values for one mutation in codons resulting in R, S or Stop
- probMutations <- function(typeOfMutation){
- matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))
- for(codon in rownames(matMutationProb)){
- if( !any(grep("N",codon)) ){
- for(muPos in 1:3){
- matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
- glNuc = matCodon[1,muPos]
- matCodon[,muPos] = canMutateTo(glNuc)
- substitutionRate = substitution[glNuc,matCodon[,muPos]]
- typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})
- matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
- }
- }
- }
-
- return(matMutationProb)
- }
-
-
-
-
-#Mapping Trinucleotides to fivemers
-mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
- rownames(triMutability) <- triMutability_Names
- Fivemer<-rep(NA,1024)
- names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
- Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
- Fivemer<-Fivemer/sum(Fivemer)
- return(Fivemer)
-}
-
-collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
- Indices<-substring(names(Fivemer),3,3)==NUC
- Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
- tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
-}
-
-
-
-CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
- Indices<-substring(names(Fivemer),3,3)==NUC
- Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
- tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
-}
-
-#Uses the real counts of the mutated fivemers
-CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
- Indices<-substring(names(Fivemer),3,3)==NUC
- Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
- tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
-}
-
-bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
-N<-sum(x)
-if(N){
-p<-x/N
-k<-length(x)-1
-tmp<-rmultinom(M, size = N, prob=p)
-tmp_p<-apply(tmp,2,function(y)y/N)
-(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
-}
-else return(matrix(0,2,length(x)))
-}
-
-
-
-
-bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){
-
-N<-sum(x)
-k<-length(x)
-y<-rep(1:k,x)
-tmp<-sapply(1:M,function(i)sample(y,n))
-if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
-if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
-(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
-}
-
-
-
-p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
-n=sum(x_obs)
-N<-sum(x)
-k<-length(x)
-y<-rep(1:k,x)
-tmp<-sapply(1:M,function(i)sample(y,n))
-if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
-if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
-tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
-sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
-sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
-}
-
-#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
-# Remove SNPs from IMGT germline segment alleles
-generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
- repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
- alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
- SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
- Indices<-NULL
- for(i in 1:length(x)){
- firstSeq = s2c(x[[1]])
- iSeq = s2c(x[[i]])
- Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." ))
- }
- return(sort(unique(Indices)))
- })
- repertoireOut <- repertoireIn
- repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
- alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
- geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
- alleleSeq <- s2c(repertoireOut[[repertoireName]])
- alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
- alleleSeq <- c2s(alleleSeq)
- repertoireOut[[repertoireName]] <- alleleSeq
- })
- names(repertoireOut) <- names(repertoireIn)
- write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)
-
-}
-
-
-
-
-
-
-############
-groupBayes2 = function(indexes, param_resultMat){
-
- BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
- BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
- #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
- #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
- #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
- #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
- return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
- "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
- #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
- #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
-# "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
-# "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )
-
-
-}
-
-
-calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
- G <- max(length(x),length(N),length(p))
- x=array(x,dim=G)
- N=array(N,dim=G)
- p=array(p,dim=G)
-
- indexOfZero = N>0 & p>0
- N = N[indexOfZero]
- x = x[indexOfZero]
- p = p[indexOfZero]
- G <- length(x)
-
- if(G){
-
- cons<-array( dim=c(length_sigma,G) )
- if(G==1) {
- return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
- }
- else {
- for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
- listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
- y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
- return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
- }
- }else{
- return(NA)
- }
-}
-
-
-calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
- matG <- listMatG[[1]]
- groups <- listMatG[[2]]
- i = 1
- resConv <- matG[,i]
- denom <- 2^groups[i]
- if(length(groups)>1){
- while( i0)) ){
-
-# ONEmerStartPos = 1:(seqLength)
-# ONEmerLength <- length(ONEmerStartPos)
- ONEmerGL <- s2c(seqGL)
- ONEmerSeq <- s2c(seqInput)
-
- #Background
- for(ONEmerIndex in 1:seqLength){
- ONEmer = ONEmerGL[ONEmerIndex]
- if(ONEmer!="N"){
- ONEmerCodonPos = getCodonPos(ONEmerIndex)
- ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos])
- ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )
-
- # All mutations model
- #if(!any(grep("N",ONEmerReadingFrameCodon))){
- if(model==0){
- if(stopMutations==0){
- if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
- BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)
- }else{
- if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
- positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
- BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
- }
- }
- }else{ # Only silent mutations
- if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
- positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
- BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
- }
- }
- }
- }
- }
-
- #Mutations
- if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
- if(model==1) mutationCount = mutationCount[mutationCount=="S"]
- mutationPositions = as.numeric(names(mutationCount))
- mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
- mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
- countMutations = 0
- for(mutationPosition in mutationPositions){
- ONEmerIndex = mutationPosition
- ONEmer = ONEmerSeq[ONEmerIndex]
- GLONEmer = ONEmerGL[ONEmerIndex]
- ONEmerCodonPos = getCodonPos(ONEmerIndex)
- ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])
- ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])
- if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
- if(model==0){
- countMutations = countMutations + 1
- MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
- MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
- }else{
- if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
- countMutations = countMutations + 1
- positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
- glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
- inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
- MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
- MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)
- }
- }
- }
- }
-
- seqMutability = MutationMatrix/BackgroundMatrix
- seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
- #cat(inputMatrixIndex,"\t",countMutations,"\n")
- return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))
-# tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
- }
- }
-
-################
-# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $
-
-trim <- function(s, recode.factor=TRUE, ...)
- UseMethod("trim", s)
-
-trim.default <- function(s, recode.factor=TRUE, ...)
- s
-
-trim.character <- function(s, recode.factor=TRUE, ...)
-{
- s <- sub(pattern="^ +", replacement="", x=s)
- s <- sub(pattern=" +$", replacement="", x=s)
- s
-}
-
-trim.factor <- function(s, recode.factor=TRUE, ...)
-{
- levels(s) <- trim(levels(s))
- if(recode.factor) {
- dots <- list(x=s, ...)
- if(is.null(dots$sort)) dots$sort <- sort
- s <- do.call(what=reorder.factor, args=dots)
- }
- s
-}
-
-trim.list <- function(s, recode.factor=TRUE, ...)
- lapply(s, trim, recode.factor=recode.factor, ...)
-
-trim.data.frame <- function(s, recode.factor=TRUE, ...)
-{
- s[] <- trim.list(s, recode.factor=recode.factor, ...)
- s
-}
-#######################################
-# Compute the expected for each sequence-germline pair by codon
-getExpectedIndividualByCodon <- function(matInput){
-if( any(grep("multicore",search())) ){
- facGL <- factor(matInput[,2])
- facLevels = levels(facGL)
- LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){
- computeMutabilities(facLevels[x])
- })
- facIndex = match(facGL,facLevels)
-
- LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){
- cInput = rep(NA,nchar(matInput[x,1]))
- cInput[s2c(matInput[x,1])!="N"] = 1
- LisGLs_MutabilityU[[facIndex[x]]] * cInput
- })
-
- LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){
- computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
- })
-
- LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){
- #print(x)
- computeMutationTypes(matInput[x,2])
- })
-
- LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){
- Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3,
- function(codonNucs){
- RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
- sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
- }
- )
- })
-
- LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){
- Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3,
- function(codonNucs){
- SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
- sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
- }
- )
- })
-
- Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
- Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
- return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
- }else{
- facGL <- factor(matInput[,2])
- facLevels = levels(facGL)
- LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){
- computeMutabilities(facLevels[x])
- })
- facIndex = match(facGL,facLevels)
-
- LisGLs_Mutability = lapply(1:nrow(matInput), function(x){
- cInput = rep(NA,nchar(matInput[x,1]))
- cInput[s2c(matInput[x,1])!="N"] = 1
- LisGLs_MutabilityU[[facIndex[x]]] * cInput
- })
-
- LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){
- computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
- })
-
- LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){
- #print(x)
- computeMutationTypes(matInput[x,2])
- })
-
- LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){
- Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3,
- function(codonNucs){
- RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R")
- sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T )
- }
- )
- })
-
- LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){
- Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3,
- function(codonNucs){
- SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")
- sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
- }
- )
- })
-
- Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
- Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)
- return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
- }
-}
-
-# getObservedMutationsByCodon <- function(listMutations){
-# numbSeqs <- length(listMutations)
-# obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
-# obsMu_S <- obsMu_R
-# temp <- mclapply(1:length(listMutations), function(i){
-# arrMutations = listMutations[[i]]
-# RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
-# RPos <- sapply(RPos,getCodonNumb)
-# if(any(RPos)){
-# tabR <- table(RPos)
-# obsMu_R[i,as.numeric(names(tabR))] <<- tabR
-# }
-#
-# SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
-# SPos <- sapply(SPos,getCodonNumb)
-# if(any(SPos)){
-# tabS <- table(SPos)
-# obsMu_S[i,names(tabS)] <<- tabS
-# }
-# }
-# )
-# return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
-# }
-
-getObservedMutationsByCodon <- function(listMutations){
- numbSeqs <- length(listMutations)
- obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
- obsMu_S <- obsMu_R
- temp <- lapply(1:length(listMutations), function(i){
- arrMutations = listMutations[[i]]
- RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
- RPos <- sapply(RPos,getCodonNumb)
- if(any(RPos)){
- tabR <- table(RPos)
- obsMu_R[i,as.numeric(names(tabR))] <<- tabR
- }
-
- SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
- SPos <- sapply(SPos,getCodonNumb)
- if(any(SPos)){
- tabS <- table(SPos)
- obsMu_S[i,names(tabS)] <<- tabS
- }
- }
- )
- return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) )
-}
-
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/Baseline_Main.r
--- a/tmp/baseline/Baseline_Main.r Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,388 +0,0 @@
-#########################################################################################
-# License Agreement
-#
-# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE
-# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER
-# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE
-# OR COPYRIGHT LAW IS PROHIBITED.
-#
-# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE
-# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED
-# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN
-# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
-#
-# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
-# Coded by: Mohamed Uduman & Gur Yaari
-# Copyright 2012 Kleinstein Lab
-# Version: 1.3 (01/23/2014)
-#########################################################################################
-
-op <- options();
-options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1)
-library('seqinr')
-if( F & Sys.info()[1]=="Linux"){
- library("multicore")
-}
-
-# Load functions and initialize global variables
-source("Baseline_Functions.r")
-
-# Initialize parameters with user provided arguments
- arg <- commandArgs(TRUE)
- #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample")
- #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample")
- #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu")
- testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local
- species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse
- substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS
- mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS
- clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations
- fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels
- region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3
- inputFilePath <- arg[8] # Full path to input file
- outputPath <- arg[9] # Full path to location of output files
- outputID <- arg[10] # ID for session output
-
-
- if(testID==5){
- traitChangeModel <- 1
- if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998
- initializeTraitChange(traitChangeModel)
- }
-
-# Initialize other parameters/variables
-
- # Initialzie the codon table ( definitions of R/S )
- computeCodonTable(testID)
-
- # Initialize
- # Test Name
- testName<-"Focused"
- if(testID==2) testName<-"Local"
- if(testID==3) testName<-"Imbalanced"
- if(testID==4) testName<-"ImbalancedSilent"
-
- # Indel placeholders initialization
- indelPos <- NULL
- delPos <- NULL
- insPos <- NULL
-
- # Initialize in Tranistion & Mutability matrixes
- substitution <- initializeSubstitutionMatrix(substitutionModel,species)
- mutability <- initializeMutabilityMatrix(mutabilityModel,species)
-
- # FWR/CDR boundaries
- flagTrim <- F
- if( is.na(region[7])){
- flagTrim <- T
- region[7]<-region[6]
- }
- readStart = min(region,na.rm=T)
- readEnd = max(region,na.rm=T)
- if(readStart>1){
- region = region - (readStart - 1)
- }
- region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) )
- region_Cod = region
-
- readStart = (readStart*3)-2
- readEnd = (readEnd*3)
-
- FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])),
- rep(FALSE,(region_Nuc[3]-region_Nuc[2])),
- rep(TRUE,(region_Nuc[4]-region_Nuc[3])),
- rep(FALSE,(region_Nuc[5]-region_Nuc[4])),
- rep(TRUE,(region_Nuc[6]-region_Nuc[5])),
- rep(FALSE,(region_Nuc[7]-region_Nuc[6]))
- )
- CDR_Nuc <- (1-FWR_Nuc)
- CDR_Nuc <- as.logical(CDR_Nuc)
- FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T)
- CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T)
-
- FWR_Codon <- c( rep(TRUE,(region[2])),
- rep(FALSE,(region[3]-region[2])),
- rep(TRUE,(region[4]-region[3])),
- rep(FALSE,(region[5]-region[4])),
- rep(TRUE,(region[6]-region[5])),
- rep(FALSE,(region[7]-region[6]))
- )
- CDR_Codon <- (1-FWR_Codon)
- CDR_Codon <- as.logical(CDR_Codon)
-
-
-# Read input FASTA file
- tryCatch(
- inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
- , error = function(ex){
- cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
- q()
- }
- )
-
- if (length(inputFASTA)==1) {
- cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
- q()
- }
-
- # Process sequence IDs/names
- names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)})
-
- # Convert non nucleotide characters to N
- inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)])
- inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars)
-
- # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence]
- processedInput <- processInputAdvanced(inputFASTA)
- matInput <- processedInput[[1]]
- germlines <- processedInput[[2]]
- lenGermlines = length(unique(germlines))
- groups <- processedInput[[3]]
- lenGroups = length(unique(groups))
- rm(processedInput)
- rm(inputFASTA)
-
-# # remove clones with less than 2 seqeunces
-# tableGL <- table(germlines)
-# singletons <- which(tableGL<8)
-# rowsToRemove <- match(singletons,germlines)
-# if(any(rowsToRemove)){
-# matInput <- matInput[-rowsToRemove,]
-# germlines <- germlines[-rowsToRemove]
-# groups <- groups[-rowsToRemove]
-# }
-#
-# # remove unproductive seqs
-# nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))})
-# if(any(nonFuctionalSeqs)){
-# if(sum(nonFuctionalSeqs)==length(germlines)){
-# write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-# q()
-# }
-# matInput <- matInput[-which(nonFuctionalSeqs),]
-# germlines <- germlines[-which(nonFuctionalSeqs)]
-# germlines[1:length(germlines)] <- 1:length(germlines)
-# groups <- groups[-which(nonFuctionalSeqs)]
-# }
-#
-# if(class(matInput)=="character"){
-# write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-# q()
-# }
-#
-# if(nrow(matInput)<10 | is.null(nrow(matInput))){
-# write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-# q()
-# }
-
-# replace leading & trailing "-" with "N:
- matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd))
-
- # Trim (nucleotide) input sequences to the last codon
- #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon)
-
-# # Check for Indels
-# if(fixIndels){
-# delPos <- fixDeletions(matInput)
-# insPos <- fixInsertions(matInput)
-# }else{
-# # Check for indels
-# indelPos <- checkForInDels(matInput)
-# indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)})
-# }
-
- # If indels are present, remove mutations in the seqeunce & throw warning at end
- #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) })
-
- colnames(matInput)=c("Input","Germline")
-
- # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions
- germlinesOriginal = NULL
- if(clonal){
- germlinesOriginal <- germlines
- collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){
- collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1))
- })
- matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])}))
- names_groups = tapply(groups,germlines,function(x){names(x[1])})
- groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))})
- names(groups) = names_groups
-
- names_germlines = tapply(germlines,germlines,function(x){names(x[1])})
- germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} )
- names(germlines) = names_germlines
- matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])})
- }
-
-
-# Selection Analysis
-
-
-# if (length(germlines)>sequenceLimit) {
-# # Code to parallelize processing goes here
-# stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") )
-# }
-
-# if (length(germlines)1){
- groups <- c(groups,lenGroups+1)
- names(groups)[length(groups)] = "All sequences combined"
- bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001)
- bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001)
- }
-
- #Bayesian Outputs
- bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo))
- bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo))
- bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo))
- bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo))
- bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo))
- bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo))
-
- #P-values
- simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP)
- simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP)
-
- simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP)
- simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP)
-
- simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP)
- simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP)
-
-
- #Format output
-
- # Round expected mutation frequencies to 3 decimal places
- matMutationInfo[germlinesOriginal[indelPos],] = NA
- if(nrow(matMutationInfo)==1){
- matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3)
- }else{
- matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3))
- }
-
- listPDFs = list()
- nRows = length(unique(groups)) + length(unique(germlines)) + length(groups)
-
- matOutput = matrix(NA,ncol=18,nrow=nRows)
- rowNumb = 1
- for(G in unique(groups)){
- #print(G)
- matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G])
- listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]])
- names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1]
- #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){
- gs = unique(germlines[groups==G])
- rowNumb = rowNumb+1
- if( !is.na(gs) ){
- for( g in gs ){
- matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g])
- listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]])
- names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1]
- rowNumb = rowNumb+1
- indexesOfInterest = which(germlines==g)
- numbSeqsOfInterest = length(indexesOfInterest)
- rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1))
- matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest),
- rownames(matInput)[indexesOfInterest],
- c(matMutationInfo[indexesOfInterest,1:4]),
- c(matMutationInfo[indexesOfInterest,5:8]),
- c(bayes_cdr[indexesOfInterest,]),
- c(bayes_fwr[indexesOfInterest,]),
- c(simgaP_cdr[indexesOfInterest]),
- c(simgaP_fwr[indexesOfInterest])
- ), ncol=18, nrow=numbSeqsOfInterest,byrow=F)
- increment=0
- for( ioi in indexesOfInterest){
- listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]])
- names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi]
- increment = increment + 1
- }
- rowNumb=max(rowNumb)+1
-
- }
- }
- }
- colsToFormat = 11:18
- matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3)
- matOutput[matOutput== " NaN"] = NA
-
-
-
- colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S",
- "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S",
- paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"),
- paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_")
- )
- fileName = paste(outputPath,outputID,".txt",sep="")
- write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA)
- fileName = paste(outputPath,outputID,".RData",sep="")
- save(listPDFs,file=fileName)
-
-indelWarning = FALSE
-if(sum(indelPos)>0){
- indelWarning = "Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis.";
- indelWarning = paste( indelWarning , "
", sep="" )
- for(indels in names(indelPos)[indelPos]){
- indelWarning = paste( indelWarning , "", indels, " ", sep="" )
- }
- indelWarning = paste( indelWarning , " ", sep="" )
-}
-
-cloneWarning = FALSE
-if(clonal==1){
- if(sum(matInputErrors)>0){
- cloneWarning = "Warning: The following clones have sequences of unequal length.";
- cloneWarning = paste( cloneWarning , "
", sep="" )
- for(clone in names(matInputErrors)[matInputErrors]){
- cloneWarning = paste( cloneWarning , "", names(germlines)[as.numeric(clone)], " ", sep="" )
- }
- cloneWarning = paste( cloneWarning , " ", sep="" )
- }
-}
-cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/FiveS_Mutability.RData
Binary file tmp/baseline/FiveS_Mutability.RData has changed
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/FiveS_Substitution.RData
Binary file tmp/baseline/FiveS_Substitution.RData has changed
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa
--- a/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,703 +0,0 @@
->IGHV1-18*01
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-18*02
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
->IGHV1-18*03
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1-18*04
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
->IGHV1-2*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*04
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*05
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
->IGHV1-24*01
-caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
->IGHV1-3*01
-caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
->IGHV1-3*02
-caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
->IGHV1-38-4*01
-caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
->IGHV1-45*01
-cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
->IGHV1-45*02
-cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
->IGHV1-45*03
-.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
->IGHV1-46*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-46*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-46*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
->IGHV1-58*01
-caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
->IGHV1-58*02
-caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
->IGHV1-68*01
-caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
->IGHV1-69*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*02
-caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1-69*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
->IGHV1-69*04
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*05
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1-69*06
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*07
-.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
->IGHV1-69*08
-caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*09
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*10
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*11
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*12
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*13
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*14
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69-2*01
-gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
->IGHV1-69-2*02
-.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
->IGHV1-69D*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-8*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
->IGHV1-8*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
->IGHV1-NL1*01
-caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
->IGHV1/OR15-1*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
->IGHV1/OR15-1*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
->IGHV1/OR15-1*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
->IGHV1/OR15-1*04
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
->IGHV1/OR15-2*01
-caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1/OR15-2*02
-caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1/OR15-2*03
-caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1/OR15-3*01
-caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1/OR15-3*02
-caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1/OR15-3*03
-caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1/OR15-4*01
-caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1/OR15-5*01
-.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
->IGHV1/OR15-5*02
-caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
->IGHV1/OR15-9*01
-caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
->IGHV1/OR21-1*01
-caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
->IGHV2-10*01
-caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
->IGHV2-26*01
-caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
->IGHV2-5*01
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*02
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*03
-................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
->IGHV2-5*04|
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
->IGHV2-5*05
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*06
-cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
->IGHV2-5*08
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*09
-caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-70*01
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70*02
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*03
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*04
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
->IGHV2-70*05
-..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
->IGHV2-70*06
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*07
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*08
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*09
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
->IGHV2-70*10
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70*11
-cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70*12
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-70*13
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
->IGHV2-70D*04
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70D*14
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2/OR16-5*01
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
->IGHV3-11*01
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-11*03
-caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
->IGHV3-11*04
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-11*05
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-11*06
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-13*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-13*02
-gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-13*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
->IGHV3-13*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-13*05
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-15*01
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*02
-gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*03
-gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*04
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*05
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*06
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*07
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*08
-gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
->IGHV3-16*01
-gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
->IGHV3-16*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
->IGHV3-19*01
-acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
->IGHV3-20*01
-gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
->IGHV3-20*02
-gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
->IGHV3-21*01
-gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-21*02
-gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-21*03
-gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
->IGHV3-21*04
-gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-22*01
-gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
->IGHV3-22*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
->IGHV3-23*01
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*02
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*03
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*05
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
->IGHV3-23D*01
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23D*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-25*01
-gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
->IGHV3-25*02
-gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
->IGHV3-25*03
-gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
->IGHV3-25*04
-gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
->IGHV3-25*05
-gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
->IGHV3-29*01
-gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
->IGHV3-30*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*02
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30*03
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*04
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*05
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
->IGHV3-30*06
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*07
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*08
-caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
->IGHV3-30*09
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*10
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*11
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*12
-caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*13
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*14
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*15
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*16
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*17
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*18
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30*19
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30-2*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
->IGHV3-30-22*01
-gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
->IGHV3-30-3*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30-3*02
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30-3*03
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30-33*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
->IGHV3-30-42*01
-gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
->IGHV3-30-5*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30-5*02
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30-52*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
->IGHV3-32*01
-gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
->AIGHV3-33*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*02
-caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*03
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-33*04
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*05
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*06
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-33-2*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
->IGHV3-35*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
->IGHV3-38*01|
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
->IGHV3-38*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
->IGHV3-38*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
->IGHV3-38-3*01
-gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
->IGHV3-43*01
-gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
->IGHV3-43*02
-gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
->IGHV3-43D*01
-gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
->IGHV3-47*01
-gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
->IGHV3-47*02
-gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
->IGHV3-48*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-48*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
->IGHV3-48*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
->IGHV3-48*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-49*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*05
-gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-52*01
-gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
->IGHV3-52*02
-gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
->IGHV3-52*03
-gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
->IGHV3-53*01
-gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-53*02
-gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-53*03
-gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
->IGHV3-53*04
-gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
->IGHV3-54*01
-gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
->IGHV3-54*02
-gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
->IGHV3-54*04
-gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
->IGHV3-62*01
-gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
->IGHV3-63*01
-gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
->IGHV3-63*02
-gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
->IGHV3-64*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
->IGHV3-64*02
-gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
->IGHV3-64*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
->IGHV3-64*04
-caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-64*05
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
->IGHV3-64D*06
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
->IGHV3-66*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-66*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
->IGHV3-66*03
-gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-66*04
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
->IGHV3-69-1*01
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-69-1*02
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
->IGHV3-7*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-7*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
->IGHV3-7*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-71*01
-gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-71*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
->IGHV3-71*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-72*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
->IGHV3-72*02
-....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
->IGHV3-73*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
->IGHV3-73*02
-gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
->IGHV3-74*01
-gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
->IGHV3-74*02
-gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
->IGHV3-74*03
-gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
->IGHV3-9*01
-gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
->IGHV3-9*02
-gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
->IGHV3-9*03
-gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
->IGHV3-NL1*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3/OR15-7*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
->IGHV3/OR15-7*02
-gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
->IGHV3/OR15-7*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
->IGHV3/OR15-7*05
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
->IGHV3/OR16-10*01
-gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
->IGHV3/OR16-10*02
-gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
->IGHV3/OR16-10*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
->IGHV3/OR16-12*01
-gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
->IGHV3/OR16-13*01
-gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
->IGHV3/OR16-14*01
-gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
->IGHV3/OR16-15*01
-gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
->IGHV3/OR16-15*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
->IGHV3/OR16-16*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
->IGHV3/OR16-6*02
-gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
->IGHV3/OR16-8*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
->IGHV3/OR16-8*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
->IGHV3/OR16-9*01
-gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
->IGHV4-28*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
->IGHV4-28*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
->IGHV4-28*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*06
-caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*07
-caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-30-2*01
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-30-2*02
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
->IGHV4-30-2*03
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
->IGHV4-30-2*04
-...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-30-2*05
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-2*06
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-30-4*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->XIGHV4-30-4*04
-caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
->IGHV4-30-4*05
-..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*06
-...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-31*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-31*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-31*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-31*04
-caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
->IGHV4-31*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
->IGHV4-31*06
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->IGHV4-31*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->IGHV4-31*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->IGHV4-31*09
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-31*10
-caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
->IGHV4-34*01
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*02
-caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*03
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-34*04
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*05
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*06
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-34*07
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-34*08
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
->IGHV4-34*09
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-34*10
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-34*11
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
->IGHV4-34*12
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
->IGHV4-34*13
-...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-38-2*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
->IGHV4-38-2*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-39*01
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
->IGHV4-39*02
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
->IGHV4-39*03
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
->IGHV4-39*04
-..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
->IGHV4-39*05
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
->IGHV4-39*06
-cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-39*07
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
->IGHV4-4*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-4*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-4*05
-caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-4*06
-............................................................
-...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-55*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-55*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-55*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-55*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-55*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-55*06
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
->IGHV4-55*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
->IGHV4-55*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-55*09
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-59*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-59*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-59*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
->IGHV4-59*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
->IGHV4-59*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
->IGHV4-59*06
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
->IGHV4-59*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
->IGHV4-59*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
->IGHV4-59*09
-...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
->IGHV4-59*10
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-61*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-61*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-61*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-61*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
->IGHV4-61*05
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
->IGHV4-61*06
-...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-61*07
-...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
->IGHV4-61*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4/OR15-8*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4/OR15-8*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4/OR15-8*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV5-10-1*01
-gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-10-1*02
-gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
->IGHV5-10-1*03
-gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-10-1*04
-gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-51*01
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
->IGHV5-51*02
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
->IGHV5-51*03
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-51*04
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-51*05
-.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
->IGHV5-78*01
-gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
->IGHV6-1*01
-caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
->IGHV6-1*02
-caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
->IGHV7-34-1*01
-...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
->IGHV7-34-1*02
-...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
->IGHV7-4-1*01
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
->IGHV7-4-1*02
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
->IGHV7-4-1*03
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
->IGHV7-4-1*04
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
->IGHV7-4-1*05
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
->AIGHV7-40*03|
-ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
->IGHV7-81*01
-caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/comparePDFs.r
--- a/tmp/baseline/comparePDFs.r Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-options("warn"=-1)
-
-#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r
-# Compute p-value of two distributions
-compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
-#print(c(length(dens1),length(dens2)))
-if(length(dens1)>1 & length(dens2)>1 ){
- dens1<-dens1/sum(dens1)
- dens2<-dens2/sum(dens2)
- cum2 <- cumsum(dens2)-dens2/2
- tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
- #print(tmp)
- if(tmp>0.5)tmp<-tmp-1
- return( tmp )
- }
- else {
- return(NA)
- }
- #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
-}
-
-
-require("grid")
-arg <- commandArgs(TRUE)
-#arg <- c("300143","4","5")
-arg[!arg=="clonal"]
-input <- arg[1]
-output <- arg[2]
-rowIDs <- as.numeric( sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } ) )
-
-numbSeqs = length(rowIDs)
-
-if ( is.na(rowIDs[1]) | numbSeqs>10 ) {
- stop( paste("Error: Please select between one and 10 seqeunces to compare.") )
-}
-
-#load( paste("output/",sessionID,".RData",sep="") )
-load( input )
-#input
-
-xMarks = seq(-20,20,length.out=4001)
-
-plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){
- yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1
-
- if(length(xlim==2)){
- xMin=xlim[1]
- xMax=xlim[2]
- } else {
- xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
- xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
- xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
- xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
-
- xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1]
- xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1]
- xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])]
- xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])]
-
- xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE)
- xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE)
- }
-
- sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x
- grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex))
- x <- sigma
- pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc"))
- #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex))
- pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05)))
- grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc")
- grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc")
- grid.rect()
- grid.xaxis(gp = gpar(cex=cex/1.1))
- yticks = pretty(c(-yMax,yMax),8)
- yticks = yticks[yticks>(-yMax) & yticks<(yMax)]
- grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1))
- if(length(listPDFs[pdf1][[1]][["CDR"]])>1){
- ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
- grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2))
- }
- if(length(listPDFs[pdf1][[1]][["FWR"]])>1){
- yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
- grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2))
- }
-
- if(length(listPDFs[pdf2][[1]][["CDR"]])>1){
- ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
- grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2))
- }
- if(length(listPDFs[pdf2][[1]][["FWR"]])>1){
- yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
- grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2))
- }
-
- grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1))
- grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3))
-
- grid.text("Density", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex))
- grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex))
-
- if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){
- pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]])
- pval = formatC(as.numeric(pCDRFWR),digits=3)
- grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2))
- }
- grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5))
- grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5))
- popViewport(2)
-}
-#plot_grid_s(1)
-
-
-p2col<-function(p=0.01){
- breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51)
- i<-findInterval(p,breaks)
- cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0),
- rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) )
- return(cols[i])
-}
-
-
-plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){
- if(upper){
- pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]])
- pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]])
- pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]])
- pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]])
- grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc")
- grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc")
- grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc")
- grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc")
-
- grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925)))
- grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925)))
-
- grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
- grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
- grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
- grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
-
-
- # grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex))
- # grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex))
- }
- else{
- }
-}
-
-
-##################################################################################
-################## The whole OCD's matrix ########################################
-##################################################################################
-
-#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)
-pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)
-
-pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null")))))
-
-for( seqOne in 1:numbSeqs+1){
- pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1))
- if(seqOne>2){
- grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
- grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
- grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc")
-
- grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center")
- grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center")
- }
- grid.rect(gp = gpar(col=grey(0.9)))
- grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center")
- popViewport(1)
-}
-
-for( seqOne in 1:numbSeqs+1){
- pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1))
- if(seqOne<=numbSeqs){
- grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
- grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
- grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc")
- grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270)
- grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270)
- }
- grid.rect(gp = gpar(col=grey(0.9)))
- grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center")
- popViewport(1)
-}
-
-for( seqOne in 1:numbSeqs+1){
- for(seqTwo in 1:numbSeqs+1){
- pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne))
- if(seqTwo>seqOne){
- plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2)
- grid.rect()
- }
- popViewport(1)
- }
-}
-
-
-xMin=0
-xMax=0.01
-for(pdf1 in rowIDs){
- xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
- xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
- xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
- xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
- xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE)
- xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE)
-}
-
-
-
-for(i in 1:numbSeqs+1){
- for(j in (i-1):numbSeqs){
- pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1))
- grid.rect()
- plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1)
- popViewport(1)
- }
-}
-
-dev.off()
-
-cat("Success", paste(rowIDs,collapse="_"),sep=":")
-
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/filter.r
--- a/tmp/baseline/filter.r Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-arg = commandArgs(TRUE)
-summaryfile = arg[1]
-gappedfile = arg[2]
-selection = arg[3]
-output = arg[4]
-print(paste("selection = ", selection))
-
-
-summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
-gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
-
-#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
-
-dat = cbind(gappeddat, summarydat$AA.JUNCTION)
-
-colnames(dat)[length(dat)] = "AA.JUNCTION"
-
-dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
-dat$VGene = gsub("[*].*", "", dat$VGene)
-
-dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
-dat$DGene = gsub("[*].*", "", dat$DGene)
-
-dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
-dat$JGene = gsub("[*].*", "", dat$JGene)
-
-#print(str(dat))
-
-dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
-
-dat = dat[!duplicated(dat$past), ]
-
-dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",]
-
-write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/script_imgt.py
--- a/tmp/baseline/script_imgt.py Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-#import xlrd #avoid dep
-import argparse
-import re
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
-parser.add_argument("--ref", help="Reference file")
-parser.add_argument("--output", help="Output file")
-parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
-
-args = parser.parse_args()
-
-refdic = dict()
-with open(args.ref, 'r') as ref:
- currentSeq = ""
- currentId = ""
- for line in ref:
- if line[0] is ">":
- if currentSeq is not "" and currentId is not "":
- refdic[currentId[1:]] = currentSeq
- currentId = line.rstrip()
- currentSeq = ""
- else:
- currentSeq += line.rstrip()
- refdic[currentId[1:]] = currentSeq
-
-
-vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
-# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
-# r"(IGKV[0-3]D?-[0-9]{1,2})",
-# r"(IGLV[0-9]-[0-9]{1,2})",
-# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
-# r"(TRGV[234589])",
-# r"(TRDV[1-3])"]
-
-#vPattern = re.compile(r"|".join(vPattern))
-vPattern = re.compile("|".join(vPattern))
-
-def filterGene(s, pattern):
- if type(s) is not str:
- return None
- res = pattern.search(s)
- if res:
- return res.group(0)
- return None
-
-
-
-currentSeq = ""
-currentId = ""
-first=True
-with open(args.input, 'r') as i:
- with open(args.output, 'a') as o:
- o.write(">>>" + args.id + "\n")
- outputdic = dict()
- for line in i:
- if first:
- first = False
- continue
- linesplt = line.split("\t")
- ref = filterGene(linesplt[1], vPattern)
- if not ref or not linesplt[2].rstrip():
- continue
- if ref in outputdic:
- outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
- else:
- outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
- #print outputdic
-
- for k in outputdic.keys():
- if k in refdic:
- o.write(">>" + k + "\n")
- o.write(refdic[k] + "\n")
- for seq in outputdic[k]:
- #print seq
- o.write(">" + seq[0] + "\n")
- o.write(seq[1] + "\n")
- else:
- print k + " not in reference, skipping " + k
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/script_xlsx.py
--- a/tmp/baseline/script_xlsx.py Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-import xlrd
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
-parser.add_argument("--ref", help="Reference file")
-parser.add_argument("--output", help="Output file")
-
-args = parser.parse_args()
-
-gene_column = 6
-id_column = 7
-seq_column = 8
-LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
-
-
-refdic = dict()
-with open(args.ref, 'r') as ref:
- currentSeq = ""
- currentId = ""
- for line in ref.readlines():
- if line[0] is ">":
- if currentSeq is not "" and currentId is not "":
- refdic[currentId[1:]] = currentSeq
- currentId = line.rstrip()
- currentSeq = ""
- else:
- currentSeq += line.rstrip()
- refdic[currentId[1:]] = currentSeq
-
-currentSeq = ""
-currentId = ""
-with xlrd.open_workbook(args.input, 'r') as wb:
- with open(args.output, 'a') as o:
- for sheet in wb.sheets():
- if sheet.cell(1,gene_column).value.find("IGHV") < 0:
- print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name
- continue
- o.write(">>>" + sheet.name + "\n")
- outputdic = dict()
- for rowindex in range(1, sheet.nrows):
- ref = sheet.cell(rowindex, gene_column).value.replace(">", "")
- if ref in outputdic:
- outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
- else:
- outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
- #print outputdic
-
- for k in outputdic.keys():
- if k in refdic:
- o.write(">>" + k + "\n")
- o.write(refdic[k] + "\n")
- for seq in outputdic[k]:
- #print seq
- o.write(">" + seq[0] + "\n")
- o.write(seq[1] + "\n")
- else:
- print k + " not in reference, skipping " + k
diff -r b84477f57318 -r e7b550d52eb7 tmp/baseline/wrapper.sh
--- a/tmp/baseline/wrapper.sh Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-#!/bin/bash
-dir="$(cd "$(dirname "$0")" && pwd)"
-
-testID=$1
-species=$2
-substitutionModel=$3
-mutabilityModel=$4
-clonal=$5
-fixIndels=$6
-region=$7
-inputs=$8
-inputs=($inputs)
-IDs=$9
-IDs=($IDs)
-ref=${10}
-output=${11}
-selection=${12}
-output_table=${13}
-outID="result"
-
-echo "$PWD"
-
-echo "testID = $testID"
-echo "species = $species"
-echo "substitutionModel = $substitutionModel"
-echo "mutabilityModel = $mutabilityModel"
-echo "clonal = $clonal"
-echo "fixIndels = $fixIndels"
-echo "region = $region"
-echo "inputs = ${inputs[@]}"
-echo "IDs = ${IDs[@]}"
-echo "ref = $ref"
-echo "output = $output"
-echo "outID = $outID"
-
-fasta="$PWD/baseline.fasta"
-
-
-count=0
-for current in ${inputs[@]}
-do
- f=$(file $current)
- zipType="Zip archive"
- if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"XZ compressed data"* ]]
- then
- id=${IDs[$count]}
- echo "id=$id"
- if [[ "$f" == *"Zip archive"* ]] ; then
- echo "Zip archive"
- echo "unzip $input -d $PWD/files/"
- unzip $current -d "$PWD/$id/"
- elif [[ "$f" == *"XZ compressed data"* ]] ; then
- echo "ZX archive"
- echo "tar -xJf $input -C $PWD/files/"
- mkdir -p "$PWD/$id/files"
- tar -xJf $current -C "$PWD/$id/files/"
- fi
- summaryfile="$PWD/summary_${id}.txt"
- gappedfile="$PWD/gappednt_${id}.txt"
- filtered="$PWD/filtered_${id}.txt"
- filecount=`ls -l $PWD/$id/ | wc -l`
- if [[ "$filecount" -eq "2" ]]
- then
- cat $PWD/$id/*/1_* > $summaryfile
- cat $PWD/$id/*/2_* > $gappedfile
- else
- cat $PWD/$id/1_* > $summaryfile
- cat $PWD/$id/2_* > $gappedfile
- fi
- Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1
-
- final="$PWD/final_${id}.txt"
- cat $filtered | cut -f2,4,7 > $final
- python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id
- else
- python $dir/script_xlsx.py --input $current --ref $ref --output $fasta
- fi
- count=$((count+1))
-done
-
-if [[ $(wc -l < $fasta) -eq "1" ]]; then
- echo "No sequences in the fasta file, exiting"
- exit 0
-fi
-
-workdir="$PWD"
-cd $dir
-echo "file: ${inputs[0]}"
-#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1
-Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1
-
-echo "$workdir/${outID}.txt"
-
-rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
-rows=($rows)
-#unset rows[${#rows[@]}-1]
-
-cd $dir
-Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1
-cp $workdir/result.txt ${output_table}
-
-
-
-
diff -r b84477f57318 -r e7b550d52eb7 tmp/igat.r
--- a/tmp/igat.r Thu Aug 04 04:52:51 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-imgt.dir = args[1]
-merged.file = args[2]
-gene = args[3]
-
-merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F)
-
-if(gene != "-"){
- merged = merged[grepl(gene, merged$best_match),]
-}
-
-merged = merged[!grepl("unmatched", merged$best_match),]
-
-for(f in list.files(imgt.dir, pattern="*.txt$")){
- #print(paste("filtering", f))
- path = paste(imgt.dir, f, sep="")
- dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE)
-
- dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
-
- if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
- dat[,grepl("^FR1", names(dat))] = 0
- }
-
- write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T)
-}
diff -r b84477f57318 -r e7b550d52eb7 tool_dependencies.xml
--- a/tool_dependencies.xml Thu Aug 04 04:52:51 2016 -0400
+++ b/tool_dependencies.xml Tue Aug 09 07:20:41 2016 -0400
@@ -6,14 +6,14 @@
- ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.30/ncbi-blast-2.2.30+-x64-linux.tar.gz
-
- bin/blastn
- $INSTALL_DIR
-
-
- $INSTALL_DIR
-
+ ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.30/ncbi-blast-2.2.30+-x64-linux.tar.gz
+
+ bin/blastn
+ $INSTALL_DIR
+
+
+ $INSTALL_DIR
+
diff -r b84477f57318 -r e7b550d52eb7 wrapper.sh
--- a/wrapper.sh Thu Aug 04 04:52:51 2016 -0400
+++ b/wrapper.sh Tue Aug 09 07:20:41 2016 -0400
@@ -15,6 +15,7 @@
naive_output_cm=${11}
filter_unique=${12}
class_filter=${13}
+empty_region_filter=${14}
mkdir $outdir
tar -xzf $dir/style.tar.gz -C $outdir
@@ -80,7 +81,7 @@
echo "---------------- merge_and_filter.r ----------------"
echo "---------------- merge_and_filter.r ---------------- " >> $log
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
echo "---------------- creating new IMGT zip ----------------"
echo "---------------- creating new IMGT zip ---------------- " >> $log
@@ -107,32 +108,24 @@
mkdir $outdir/new_IMGT_cm
cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
-Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
-Rscript $dir/tmp/igat.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
-Rscript $dir/tmp/igat.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
-Rscript $dir/tmp/igat.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
tmp="$PWD"
cd $outdir/new_IMGT/ #tar weirdness...
tar -cJf ../new_IMGT.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm
-zip -r ../IgAT.zip *
cd $outdir/new_IMGT_ca/
tar -cJf ../new_IMGT_ca.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_ca/IgAT.xlsm
-zip -r ../IgAT_ca.zip *
cd $outdir/new_IMGT_cg/
tar -cJf ../new_IMGT_cg.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cg/IgAT.xlsm
-zip -r ../IgAT_cg.zip *
cd $outdir/new_IMGT_cm/
tar -cJf ../new_IMGT_cm.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cm/IgAT.xlsm
-zip -r ../IgAT_cm.zip *
cd $tmp
@@ -219,17 +212,17 @@
echo "${gene} (N = $tmp) " >> $output
done
+ tmp=`cat $outdir/all_${func}_n.txt`
+ echo "all (N = $tmp) " >> $output
tmp=`cat $outdir/unmatched_${func}_n.txt`
echo "unmatched (N = ${unmatched_count}) " >> $output
- tmp=`cat $outdir/all_${func}_n.txt`
- echo "all (N = $tmp) " >> $output
while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
do
if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
echo "$name ${cax}/${cay} (${caz}) ${ca1x}/${ca1y} (${ca1z}) ${ca2x}/${ca2y} (${ca2z}) ${cgx}/${cgy} (${cgz}) ${cg1x}/${cg1y} (${cg1z}) ${cg2x}/${cg2y} (${cg2z}) ${cg3x}/${cg3y} (${cg3z}) ${cg4x}/${cg4y} (${cg4z}) ${cmx}/${cmy} (${cmz}) ${allx}/${ally} (${allz}) " >> $output
else
- echo "$name ${cax}/${cay} (${caz}%) ${ca1x}/${ca1y} (${ca1z}%) ${ca2x}/${ca2y} (${ca2z}%) ${cgx}/${cgy} (${cgz}%) ${cg1x}/${cg1y} (${cg1z}%) ${cg2x}/${cg2y} (${cg2z}%) ${cg3x}/${cg3y} (${cg3z}%) ${cg4x}/${cg4y} (${cg4z}%) ${cmx}/${cmy} (${cmz}%) ${unx}/${uny} (${unz}%) ${allx}/${ally} (${allz}%) " >> $output
+ echo "$name ${cax}/${cay} (${caz}%) ${ca1x}/${ca1y} (${ca1z}%) ${ca2x}/${ca2y} (${ca2z}%) ${cgx}/${cgy} (${cgz}%) ${cg1x}/${cg1y} (${cg1z}%) ${cg2x}/${cg2y} (${cg2z}%) ${cg3x}/${cg3y} (${cg3z}%) ${cg4x}/${cg4y} (${cg4z}%) ${cmx}/${cmy} (${cmz}%) ${allx}/${ally} (${allz}%) ${unx}/${uny} (${unz}%) " >> $output
fi
done < $outdir/data_${func}.txt
echo "" >> $output
@@ -259,21 +252,37 @@
echo "" >> $output
+echo "
" >> $output
+
for gene in ${genes[@]}
do
- echo "$gene transition table " >> $output
+ echo "" >> $output
+ echo "${gene} " >> $output
+ echo " " >> $output
+ echo " " >> $output
+ echo "" >> $output
while IFS=, read from a c g t
do
echo "$from $a $c $g $t " >> $output
done < $outdir/transitions_${gene}_sum.txt
- echo "
" >> $output
+ echo "
" >> $output
+
+ echo "" >> $output
done
-echo "All transition table " >> $output
+echo "" >> $output
+echo "All " >> $output
+echo " " >> $output
+echo " " >> $output
+echo "" >> $output
while IFS=, read from a c g t
do
echo "$from $a $c $g $t " >> $output
done < $outdir/transitions_all_sum.txt
+echo "
" >> $output
+
+echo " " >> $output
+
echo "
" >> $output
echo "" >> $output #transition tables tab end
@@ -315,30 +324,32 @@
echo "" >> $output #downloads tab end
@@ -356,7 +367,7 @@
mkdir $outdir/baseline/ca_cg_cm
if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
cd $outdir/baseline/ca_cg_cm
- bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
+ bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
else
echo "No sequences" > "$outdir/baseline.txt"
fi
@@ -364,7 +375,7 @@
mkdir $outdir/baseline/ca
if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
cd $outdir/baseline/ca
- bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
+ bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
else
echo "No ca sequences" > "$outdir/baseline_ca.txt"
fi
@@ -372,7 +383,7 @@
mkdir $outdir/baseline/cg
if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
cd $outdir/baseline/cg
- bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
+ bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
else
echo "No cg sequences" > "$outdir/baseline_cg.txt"
fi
@@ -380,7 +391,7 @@
mkdir $outdir/baseline/cm
if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
cd $outdir/baseline/cm
- bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
+ bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
else
echo "No cm sequences" > "$outdir/baseline_cm.txt"
fi
@@ -392,14 +403,18 @@
if [[ "$naive_output" != "None" ]]
then
- echo "---------------- imgt_loader.r ----------------"
- echo "---------------- imgt_loader.r ---------------- " >> $log
+ #echo "---------------- imgt_loader.r ----------------"
+ #echo "---------------- imgt_loader.r ---------------- " >> $log
#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
- Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
+ #Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
- echo "---------------- naive_output.r ----------------"
- echo "---------------- naive_output.r ---------------- " >> $log
- Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
+ #echo "---------------- naive_output.r ----------------"
+ #echo "---------------- naive_output.r ---------------- " >> $log
+ #Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
+
+ cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
+ cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
+ cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
fi
echo "
" >> $outdir/base_overview.html