Mercurial > repos > davidvanzessen > mutation_analysis
changeset 95:a66eb1c5374c draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 08 Jun 2016 03:54:49 -0400 |
parents | e39176ccddc8 |
children | 925efcd00c58 |
files | tmp/IgAT.xlsm tmp/baseline/Baseline_Functions.r tmp/baseline/Baseline_Main.r tmp/baseline/FiveS_Mutability.RData tmp/baseline/FiveS_Substitution.RData tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa tmp/baseline/baseline.xml tmp/baseline/comparePDFs.r tmp/baseline/filter.r tmp/baseline/script_imgt.py tmp/baseline/script_xlsx.py tmp/baseline/wrapper.sh tmp/igat.r wrapper.sh |
diffstat | 14 files changed, 4006 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/Baseline_Functions.r Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,2287 @@ +######################################################################################### +# License Agreement +# +# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE +# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER +# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE +# OR COPYRIGHT LAW IS PROHIBITED. +# +# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE +# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED +# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN +# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. +# +# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences +# Coded by: Mohamed Uduman & Gur Yaari +# Copyright 2012 Kleinstein Lab +# Version: 1.3 (01/23/2014) +######################################################################################### + +# Global variables + + FILTER_BY_MUTATIONS = 1000 + + # Nucleotides + NUCLEOTIDES = c("A","C","G","T") + + # Amino Acids + AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G") + names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG") + names(AMINO_ACIDS) <- names(AMINO_ACIDS) + + #Amino Acid Traits + #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y" + #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface") + TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N") + names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS)) + TRAITS_AMINO_ACIDS <- array(NA,21) + + # Codon Table + CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12)) + + # Substitution Model: Smith DS et al. 1996 + substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + load("FiveS_Substitution.RData") + + # Mutability Models: Shapiro GS et al. 2002 + triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T) + triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T) + triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT") + load("FiveS_Mutability.RData") + +# Functions + + # Translate codon to amino acid + translateCodonToAminoAcid<-function(Codon){ + return(AMINO_ACIDS[Codon]) + } + + # Translate amino acid to trait change + translateAminoAcidToTraitChange<-function(AminoAcid){ + return(TRAITS_AMINO_ACIDS[AminoAcid]) + } + + # Initialize Amino Acid Trait Changes + initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){ + if(!is.null(traitChangeFileName)){ + tryCatch( + traitChange <- read.delim(traitChangeFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading trait changes. Please check file name/path and format.\n") + q() + } + ) + }else{ + traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98 + } + TRAITS_AMINO_ACIDS <<- traitChange + } + + # Read in formatted nucleotide substitution matrix + initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){ + if(!is.null(subsMatFileName)){ + tryCatch( + subsMat <- read.delim(subsMatFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading substitution matrix. Please check file name/path and format.\n") + q() + } + ) + if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x))) + }else{ + if(substitutionModel==1)subsMat <- substitution_Literature_Mouse + if(substitutionModel==2)subsMat <- substitution_Flu_Human + if(substitutionModel==3)subsMat <- substitution_Flu25_Human + + } + + if(substitutionModel==0){ + subsMat <- matrix(1,4,4) + subsMat[,] = 1/3 + subsMat[1,1] = 0 + subsMat[2,2] = 0 + subsMat[3,3] = 0 + subsMat[4,4] = 0 + } + + + NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-") + if(substitutionModel==5){ + subsMat <- FiveS_Substitution + return(subsMat) + }else{ + subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4)) + return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) ) + } + } + + + # Read in formatted Mutability file + initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){ + if(!is.null(mutabilityMatFileName)){ + tryCatch( + mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading mutability matrix. Please check file name/path and format.\n") + q() + } + ) + }else{ + mutabilityMat <- triMutability_Literature_Human + if(species==2) mutabilityMat <- triMutability_Literature_Mouse + } + + if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)} + + if(mutabilityModel==5){ + mutabilityMat <- FiveS_Mutability + return(mutabilityMat) + }else{ + return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) ) + } + } + + # Read FASTA file formats + # Modified from read.fasta from the seqinR package + baseline.read.fasta <- + function (file = system.file("sequences/sample.fasta", package = "seqinr"), + seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, + set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, + strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong, + endian = .Platform$endian, apply.mask = TRUE) + { + seqtype <- match.arg(seqtype) + + lines <- readLines(file) + + if (legacy.mode) { + comments <- grep("^;", lines) + if (length(comments) > 0) + lines <- lines[-comments] + } + + + ind_groups<-which(substr(lines, 1L, 3L) == ">>>") + lines_mod<-lines + + if(!length(ind_groups)){ + lines_mod<-c(">>>All sequences combined",lines) + } + + ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>") + + lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod))) + id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1 + lines[id] <- "THIS IS A FAKE SEQUENCE" + lines[-id] <- lines_mod + rm(lines_mod) + + ind <- which(substr(lines, 1L, 1L) == ">") + nseq <- length(ind) + if (nseq == 0) { + stop("no line starting with a > character found") + } + start <- ind + 1 + end <- ind - 1 + + while( any(which(ind%in%end)) ){ + ind=ind[-which(ind%in%end)] + nseq <- length(ind) + if (nseq == 0) { + stop("no line starting with a > character found") + } + start <- ind + 1 + end <- ind - 1 + } + + end <- c(end[-1], length(lines)) + sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = "")) + if (seqonly) + return(sequences) + nomseq <- lapply(seq_len(nseq), function(i) { + + #firstword <- strsplit(lines[ind[i]], " ")[[1]][1] + substr(lines[ind[i]], 2, nchar(lines[ind[i]])) + + }) + if (seqtype == "DNA") { + if (forceDNAtolower) { + sequences <- as.list(tolower(chartr(".","-",sequences))) + }else{ + sequences <- as.list(toupper(chartr(".","-",sequences))) + } + } + if (as.string == FALSE) + sequences <- lapply(sequences, s2c) + if (set.attributes) { + for (i in seq_len(nseq)) { + Annot <- lines[ind[i]] + if (strip.desc) + Annot <- substr(Annot, 2L, nchar(Annot)) + attributes(sequences[[i]]) <- list(name = nomseq[[i]], + Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", + DNA = "SeqFastadna")) + } + } + names(sequences) <- nomseq + return(sequences) + } + + + # Replaces non FASTA characters in input files with N + replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){ + gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE) + } + + # Find the germlines in the FASTA list + germlinesInFile <- function(seqIDs){ + firstChar = sapply(seqIDs,function(x){substr(x,1,1)}) + secondChar = sapply(seqIDs,function(x){substr(x,2,2)}) + return(firstChar==">" & secondChar!=">") + } + + # Find the groups in the FASTA list + groupsInFile <- function(seqIDs){ + sapply(seqIDs,function(x){substr(x,1,2)})==">>" + } + + # In the process of finding germlines/groups, expand from the start to end of the group + expandTillNext <- function(vecPosToID){ + IDs = names(vecPosToID) + posOfInterests = which(vecPosToID) + + expandedID = rep(NA,length(IDs)) + expandedIDNames = gsub(">","",IDs[posOfInterests]) + startIndexes = c(1,posOfInterests[-1]) + stopIndexes = c(posOfInterests[-1]-1,length(IDs)) + expandedID = unlist(sapply(1:length(startIndexes),function(i){ + rep(i,stopIndexes[i]-startIndexes[i]+1) + })) + names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){ + rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1) + })) + return(expandedID) + } + + # Process FASTA (list) to return a matrix[input, germline) + processInputAdvanced <- function(inputFASTA){ + + seqIDs = names(inputFASTA) + numbSeqs = length(seqIDs) + posGermlines1 = germlinesInFile(seqIDs) + numbGermlines = sum(posGermlines1) + posGroups1 = groupsInFile(seqIDs) + numbGroups = sum(posGroups1) + consDef = NA + + if(numbGermlines==0){ + posGermlines = 2 + numbGermlines = 1 + } + + glPositionsSum = cumsum(posGermlines1) + glPositions = table(glPositionsSum) + #Find the position of the conservation row + consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1 + if( length(consDefPos)> 0 ){ + consDefID = match(consDefPos, glPositionsSum) + #The coservation rows need to be pulled out and stores seperately + consDef = inputFASTA[consDefID] + inputFASTA = inputFASTA[-consDefID] + + seqIDs = names(inputFASTA) + numbSeqs = length(seqIDs) + posGermlines1 = germlinesInFile(seqIDs) + numbGermlines = sum(posGermlines1) + posGroups1 = groupsInFile(seqIDs) + numbGroups = sum(posGroups1) + if(numbGermlines==0){ + posGermlines = 2 + numbGermlines = 1 + } + } + + posGroups <- expandTillNext(posGroups1) + posGermlines <- expandTillNext(posGermlines1) + posGermlines[posGroups1] = 0 + names(posGermlines)[posGroups1] = names(posGroups)[posGroups1] + posInput = rep(TRUE,numbSeqs) + posInput[posGroups1 | posGermlines1] = FALSE + + matInput = matrix(NA, nrow=sum(posInput), ncol=2) + rownames(matInput) = seqIDs[posInput] + colnames(matInput) = c("Input","Germline") + + vecInputFASTA = unlist(inputFASTA) + matInput[,1] = vecInputFASTA[posInput] + matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ] + + germlines = posGermlines[posInput] + groups = posGroups[posInput] + + return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef )) + } + + + # Replace leading and trailing dashes in the sequence + replaceLeadingTrailingDashes <- function(x,readEnd){ + iiGap = unlist(gregexpr("-",x[1])) + ggGap = unlist(gregexpr("-",x[2])) + #posToChange = intersect(iiGap,ggGap) + + + seqIn = replaceLeadingTrailingDashesHelper(x[1]) + seqGL = replaceLeadingTrailingDashesHelper(x[2]) + seqTemplate = rep('N',readEnd) + seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd]) + seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd]) +# if(posToChange!=-1){ +# seqIn[posToChange] = "-" +# seqGL[posToChange] = "-" +# } + + seqIn = c2s(seqIn[1:readEnd]) + seqGL = c2s(seqGL[1:readEnd]) + + lenGL = nchar(seqGL) + if(lenGL<readEnd){ + seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="") + } + + lenInput = nchar(seqIn) + if(lenInput<readEnd){ + seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="") + } + return( c(seqIn,seqGL) ) + } + + replaceLeadingTrailingDashesHelper <- function(x){ + grepResults = gregexpr("-*",x) + grepResultsPos = unlist(grepResults) + grepResultsLen = attr(grepResults[[1]],"match.length") + print(paste("x = '", x, "'", sep="")) + x = s2c(x) + if(x[1]=="-"){ + x[1:grepResultsLen[1]] = "N" + } + if(x[length(x)]=="-"){ + x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N" + } + return(x) + } + + + + + # Check sequences for indels + checkForInDels <- function(matInputP){ + insPos <- checkInsertion(matInputP) + delPos <- checkDeletions(matInputP) + return(list("Insertions"=insPos, "Deletions"=delPos)) + } + + # Check sequences for insertions + checkInsertion <- function(matInputP){ + insertionCheck = apply( matInputP,1, function(x){ + inputGaps <- as.vector( gregexpr("-",x[1])[[1]] ) + glGaps <- as.vector( gregexpr("-",x[2])[[1]] ) + return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) ) + }) + return(as.vector(insertionCheck)) + } + # Fix inserstions + fixInsertions <- function(matInputP){ + insPos <- checkInsertion(matInputP) + sapply((1:nrow(matInputP))[insPos],function(rowIndex){ + x <- matInputP[rowIndex,] + inputGaps <- gregexpr("-",x[1])[[1]] + glGaps <- gregexpr("-",x[2])[[1]] + posInsertions <- glGaps[!(glGaps%in%inputGaps)] + inputInsertionToN <- s2c(x[2]) + inputInsertionToN[posInsertions]!="-" + inputInsertionToN[posInsertions] <- "N" + inputInsertionToN <- c2s(inputInsertionToN) + matInput[rowIndex,2] <<- inputInsertionToN + }) + return(insPos) + } + + # Check sequences for deletions + checkDeletions <-function(matInputP){ + deletionCheck = apply( matInputP,1, function(x){ + inputGaps <- as.vector( gregexpr("-",x[1])[[1]] ) + glGaps <- as.vector( gregexpr("-",x[2])[[1]] ) + return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) ) + }) + return(as.vector(deletionCheck)) + } + # Fix sequences with deletions + fixDeletions <- function(matInputP){ + delPos <- checkDeletions(matInputP) + sapply((1:nrow(matInputP))[delPos],function(rowIndex){ + x <- matInputP[rowIndex,] + inputGaps <- gregexpr("-",x[1])[[1]] + glGaps <- gregexpr("-",x[2])[[1]] + posDeletions <- inputGaps[!(inputGaps%in%glGaps)] + inputDeletionToN <- s2c(x[1]) + inputDeletionToN[posDeletions] <- "N" + inputDeletionToN <- c2s(inputDeletionToN) + matInput[rowIndex,1] <<- inputDeletionToN + }) + return(delPos) + } + + + # Trim DNA sequence to the last codon + trimToLastCodon <- function(seqToTrim){ + seqLen = nchar(seqToTrim) + trimmedSeq = s2c(seqToTrim) + poi = seqLen + tailLen = 0 + + while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){ + tailLen = tailLen + 1 + poi = poi - 1 + } + + trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)]) + seqLen = nchar(trimmedSeq) + # Trim sequence to last codon + if( getCodonPos(seqLen)[3] > seqLen ) + trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) ) + + return(trimmedSeq) + } + + # Given a nuclotide position, returns the pos of the 3 nucs that made the codon + # e.g. nuc 86 is part of nucs 85,86,87 + getCodonPos <- function(nucPos){ + codonNum = (ceiling(nucPos/3))*3 + return( (codonNum-2):codonNum) + } + + # Given a nuclotide position, returns the codon number + # e.g. nuc 86 = codon 29 + getCodonNumb <- function(nucPos){ + return( ceiling(nucPos/3) ) + } + + # Given a codon, returns all the nuc positions that make the codon + getCodonNucs <- function(codonNumb){ + getCodonPos(codonNumb*3) + } + + computeCodonTable <- function(testID=1){ + + if(testID<=4){ + # Pre-compute every codons + intCounter = 1 + for(pOne in NUCLEOTIDES){ + for(pTwo in NUCLEOTIDES){ + for(pThree in NUCLEOTIDES){ + codon = paste(pOne,pTwo,pThree,sep="") + colnames(CODON_TABLE)[intCounter] = codon + intCounter = intCounter + 1 + CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12))) + } + } + } + chars = c("N","A","C","G","T", "-") + for(a in chars){ + for(b in chars){ + for(c in chars){ + if(a=="N" | b=="N" | c=="N"){ + #cat(paste(a,b,c),sep="","\n") + CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) + } + } + } + } + + chars = c("-","A","C","G","T") + for(a in chars){ + for(b in chars){ + for(c in chars){ + if(a=="-" | b=="-" | c=="-"){ + #cat(paste(a,b,c),sep="","\n") + CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) + } + } + } + } + CODON_TABLE <<- as.matrix(CODON_TABLE) + } + } + + collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){ + #print(length(vecInputSeqs)) + vecInputSeqs = unique(vecInputSeqs) + if(length(vecInputSeqs)==1){ + return( list( c(vecInputSeqs,glSeq), F) ) + }else{ + charInputSeqs <- sapply(vecInputSeqs, function(x){ + s2c(x)[1:readEnd] + }) + charGLSeq <- s2c(glSeq) + matClone <- sapply(1:readEnd, function(i){ + posNucs = unique(charInputSeqs[i,]) + posGL = charGLSeq[i] + error = FALSE + if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){ + return(c("-",error)) + } + if(length(posNucs)==1) + return(c(posNucs[1],error)) + else{ + if("N"%in%posNucs){ + error=TRUE + } + if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){ + return( c(posGL,error) ) + }else{ + #return( c(sample(posNucs[posNucs!="N"],1),error) ) + if(nonTerminalOnly==0){ + return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) ) + }else{ + posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL] + posNucsTable = table(posNucs) + if(sum(posNucsTable>1)==0){ + return( c(posGL,error) ) + }else{ + return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) ) + } + } + + } + } + }) + + + #print(length(vecInputSeqs)) + return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,])) + } + } + + # Compute the expected for each sequence-germline pair + getExpectedIndividual <- function(matInput){ + if( any(grep("multicore",search())) ){ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){ + computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) + }) + + ul_LisGLs_Exp = unlist(LisGLs_Exp) + return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) + }else{ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){ + computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) + }) + + ul_LisGLs_Exp = unlist(LisGLs_Exp) + return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) + + } + } + + # Compute mutabilities of sequence based on the tri-nucleotide model + computeMutabilities <- function(paramSeq){ + seqLen = nchar(paramSeq) + seqMutabilites = rep(NA,seqLen) + + gaplessSeq = gsub("-", "", paramSeq) + gaplessSeqLen = nchar(gaplessSeq) + gaplessSeqMutabilites = rep(NA,gaplessSeqLen) + + if(mutabilityModel!=5){ + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqMutabilites[pos] = + tapply( c( + getMutability( substr(subSeq,1,3), 3) , + getMutability( substr(subSeq,2,4), 2), + getMutability( substr(subSeq,3,5), 1) + ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE + ) + #Pos 1 + subSeq = substr(gaplessSeq,1,3) + gaplessSeqMutabilites[1] = getMutability(subSeq , 1) + #Pos 2 + subSeq = substr(gaplessSeq,1,4) + gaplessSeqMutabilites[2] = mean( c( + getMutability( substr(subSeq,1,3), 2) , + getMutability( substr(subSeq,2,4), 1) + ),na.rm=T + ) + seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites + return(seqMutabilites) + }else{ + + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T) + seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites + return(seqMutabilites) + } + + } + + # Returns the mutability of a triplet at a given position + getMutability <- function(codon, pos=1:3){ + triplets <- rownames(mutability) + mutability[ match(codon,triplets) ,pos] + } + + getMutability5 <- function(fivemer){ + return(mutability[fivemer]) + } + + # Returns the substitution probabilty + getTransistionProb <- function(nuc){ + substitution[nuc,] + } + + getTransistionProb5 <- function(fivemer){ + if(any(which(fivemer==colnames(substitution)))){ + return(substitution[,fivemer]) + }else{ + return(array(NA,4)) + } + } + + # Given a nuc, returns the other 3 nucs it can mutate to + canMutateTo <- function(nuc){ + NUCLEOTIDES[- which(NUCLEOTIDES==nuc)] + } + + # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to + canMutateToProb <- function(nuc){ + substitution[nuc,canMutateTo(nuc)] + } + + # Compute targeting, based on precomputed mutatbility & substitution + computeTargeting <- function(param_strSeq,param_vecMutabilities){ + + if(substitutionModel!=5){ + vecSeq = s2c(param_strSeq) + matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } ) + #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } ) + dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq))) + return (matTargeting) + }else{ + + seqLen = nchar(param_strSeq) + seqsubstitution = matrix(NA,ncol=seqLen,nrow=4) + paramSeq <- param_strSeq + gaplessSeq = gsub("-", "", paramSeq) + gaplessSeqLen = nchar(gaplessSeq) + gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4) + + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T) + seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution + #matTargeting <- param_vecMutabilities %*% seqsubstitution + matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`) + dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen)) + return (matTargeting) + } + } + + # Compute the mutations types + computeMutationTypes <- function(param_strSeq){ + #cat(param_strSeq,"\n") + #vecSeq = trimToLastCodon(param_strSeq) + lenSeq = nchar(param_strSeq) + vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)}) + matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F) + dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes))) + return(matMutationTypes) + } + computeMutationTypesFast <- function(param_strSeq){ + matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F) + #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq))) + return(matMutationTypes) + } + mutationTypeOptimized <- function( matOfCodons ){ + apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) + } + + # Returns a vector of codons 1 mutation away from the given codon + permutateAllCodon <- function(codon){ + cCodon = s2c(codon) + matCodons = t(array(cCodon,dim=c(3,12))) + matCodons[1:4,1] = NUCLEOTIDES + matCodons[5:8,2] = NUCLEOTIDES + matCodons[9:12,3] = NUCLEOTIDES + apply(matCodons,1,c2s) + } + + # Given two codons, tells you if the mutation is R or S (based on your definition) + mutationType <- function(codonFrom,codonTo){ + if(testID==4){ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + mutationType = "S" + if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){ + mutationType = "R" + } + if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ + mutationType = "Stop" + } + return(mutationType) + } + }else if(testID==5){ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + if(codonFrom==codonTo){ + mutationType = "S" + }else{ + codonFrom = s2c(codonFrom) + codonTo = s2c(codonTo) + mutationType = "Stop" + nucOfI = codonFrom[which(codonTo!=codonFrom)] + if(nucOfI=="C"){ + mutationType = "R" + }else if(nucOfI=="G"){ + mutationType = "S" + } + } + return(mutationType) + } + }else{ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + mutationType = "S" + if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){ + mutationType = "R" + } + if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ + mutationType = "Stop" + } + return(mutationType) + } + } + } + + + #given a mat of targeting & it's corresponding mutationtypes returns + #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR + computeExpected <- function(paramTargeting,paramMutationTypes){ + # Replacements + RPos = which(paramMutationTypes=="R") + #FWR + Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T) + #CDR + Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T) + # Silents + SPos = which(paramMutationTypes=="S") + #FWR + Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T) + #CDR + Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T) + + return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR)) + } + + # Count the mutations in a sequence + # each mutation is treated independently + analyzeMutations2NucUri_website <- function( rev_in_matrix ){ + paramGL = rev_in_matrix[2,] + paramSeq = rev_in_matrix[1,] + + #Fill seq with GL seq if gapped + #if( any(paramSeq=="-") ){ + # gapPos_Seq = which(paramSeq=="-") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + + + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + + analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) ) + + } + + #1 = GL + #2 = Seq + analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){ + paramGL = in_matrix[2,] + paramSeq = in_matrix[1,] + paramSeqUri = paramGL + #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]}) + mutations_val = paramGL != paramSeq + if(any(mutations_val)){ + mutationPos = {1:length(mutations_val)}[mutations_val] + mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + if(any(mutationPos)){ + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + } + if(any(!is.na(mutationInfo))){ + return(mutationInfo[!is.na(mutationInfo)]) + }else{ + return(NA) + } + + + }else{ + return (NA) + } + } + + processNucMutations2 <- function(mu){ + if(!is.na(mu)){ + #R + if(any(mu=="R")){ + Rs = mu[mu=="R"] + nucNumbs = as.numeric(names(Rs)) + R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) + R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) + }else{ + R_CDR = 0 + R_FWR = 0 + } + + #S + if(any(mu=="S")){ + Ss = mu[mu=="S"] + nucNumbs = as.numeric(names(Ss)) + S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) + S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) + }else{ + S_CDR = 0 + S_FWR = 0 + } + + + retVec = c(R_CDR,S_CDR,R_FWR,S_FWR) + retVec[is.na(retVec)]=0 + return(retVec) + }else{ + return(rep(0,4)) + } + } + + + ## Z-score Test + computeZScore <- function(mat, test="Focused"){ + matRes <- matrix(NA,ncol=2,nrow=(nrow(mat))) + if(test=="Focused"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + if(test=="Local"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + if(test=="Imbalanced"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))}) + R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))}) + R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + matRes[is.nan(matRes)] = NA + return(matRes) + } + + # Return a p-value for a z-score + z2p <- function(z){ + p=NA + if( !is.nan(z) && !is.na(z)){ + if(z>0){ + p = (1 - pnorm(z,0,1)) + } else if(z<0){ + p = (-1 * pnorm(z,0,1)) + } else{ + p = 0.5 + } + }else{ + p = NA + } + return(p) + } + + + ## Bayesian Test + + # Fitted parameter for the bayesian framework +BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986) + CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2)) + + # Given x, M & p, returns a pdf + calculate_bayes <- function ( x=3, N=10, p=0.33, + i=CONST_i, + max_sigma=20,length_sigma=4001 + ){ + if(!0%in%N){ + G <- max(length(x),length(N),length(p)) + x=array(x,dim=G) + N=array(N,dim=G) + p=array(p,dim=G) + sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma) + sigma_1<-log({i/{1-i}}/{p/{1-p}}) + index<-min(N,60) + y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1)) + if(!sum(is.na(y))){ + tmp<-approx(sigma_1,y,sigma_s)$y + tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}} + }else{ + return(NA) + } + }else{ + return(NA) + } + } + # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection + computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){ + flagOneSeq = F + if(nrow(mat)==1){ + mat=rbind(mat,mat) + flagOneSeq = T + } + if(test=="Focused"){ + #CDR + P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0) + X = c(mat[,1],0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0) + X = c(mat[,3],0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="Local"){ + #CDR + P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0) + X = c(mat[,1],0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0) + X = c(mat[,3],0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="Imbalanced"){ + #CDR + P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5) + N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5) + N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="ImbalancedSilent"){ + #CDR + P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5) + N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5) + N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(flagOneSeq==T){ + bayesCDR = bayesCDR[1] + bayesFWR = bayesFWR[1] + } + return( list("CDR"=bayesCDR, "FWR"=bayesFWR) ) + } + + ##Covolution + break2chunks<-function(G=1000){ + base<-2^round(log(sqrt(G),2),0) + return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base))) + } + + PowersOfTwo <- function(G=100){ + exponents <- array() + i = 0 + while(G > 0){ + i=i+1 + exponents[i] <- floor( log2(G) ) + G <- G-2^exponents[i] + } + return(exponents) + } + + convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){ + G = ncol(cons) + if(G>1){ + for(gen in log(G,2):1){ + ll<-seq(from=2,to=2^gen,by=2) + sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)}) + } + } + return( cons[,1] ) + } + + convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){ + if(length(ncol(cons))) G<-ncol(cons) + groups <- PowersOfTwo(G) + matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G ) + startIndex = 1 + for( i in 1:length(groups) ){ + stopIndex <- 2^groups[i] + startIndex - 1 + if(stopIndex!=startIndex){ + matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma ) + startIndex = stopIndex + 1 + } + else { + if(G>1) matG[,i] <- cons[,startIndex:stopIndex] + else matG[,i] <- cons + #startIndex = stopIndex + 1 + } + } + return( list( matG, groups ) ) + } + + weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ + lx<-length(x) + ly<-length(y) + if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){ + if(w<1){ + y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y + x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y + lx<-length(x1) + ly<-length(y1) + } + else { + y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y + x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y + lx<-length(x1) + ly<-length(y1) + } + } + else{ + x1<-x + y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y + ly<-length(y1) + } + tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y + tmp[tmp<=0] = 0 + return(tmp/sum(tmp)) + } + + calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ + matG <- listMatG[[1]] + groups <- listMatG[[2]] + i = 1 + resConv <- matG[,i] + denom <- 2^groups[i] + if(length(groups)>1){ + while( i<length(groups) ){ + i = i + 1 + resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma) + #cat({{2^groups[i]}/denom},"\n") + denom <- denom + 2^groups[i] + } + } + return(resConv) + } + + # Given a list of PDFs, returns a convoluted PDF + groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){ + listPosteriors = listPosteriors[ !is.na(listPosteriors) ] + Length_Postrior<-length(listPosteriors) + if(Length_Postrior>1 & Length_Postrior<=Threshold){ + cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) + listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) + y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + }else if(Length_Postrior==1) return(listPosteriors[[1]]) + else if(Length_Postrior==0) return(NA) + else { + cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) + y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma ) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + } + } + + fastConv<-function(cons, max_sigma=20, length_sigma=4001){ + chunks<-break2chunks(G=ncol(cons)) + if(ncol(cons)==3) chunks<-2:1 + index_chunks_end <- cumsum(chunks) + index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1) + index_chunks <- cbind(index_chunks_start,index_chunks_end) + + case <- sum(chunks!=chunks[1]) + if(case==1) End <- max(1,((length(index_chunks)/2)-1)) + else End <- max(1,((length(index_chunks)/2))) + + firsts <- sapply(1:End,function(i){ + indexes<-index_chunks[i,1]:index_chunks[i,2] + convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]] + }) + if(case==0){ + result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) ) + }else if(case==1){ + last<-list(calculate_bayesGHelper( + convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] ) + ),0) + result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts)) + result<-calculate_bayesGHelper( + list( + cbind( + result_first,last[[1]]), + c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2)) + ) + ) + } + return(as.vector(result)) + } + + # Computes the 95% CI for a pdf + calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ + if(length(Pdf)!=length_sigma) return(NA) + sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) + cdf = cumsum(Pdf) + cdf = cdf/cdf[length(cdf)] + return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) + } + + # Computes a mean for a pdf + calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){ + if(length(Pdf)!=length_sigma) return(NA) + sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) + norm = {length_sigma-1}/2/max_sigma + return( (Pdf%*%sigma_s/norm) ) + } + + # Returns the mean, and the 95% CI for a pdf + calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ + if(is.na(Pdf)) + return(rep(NA,3)) + bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma) + bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma) + return(c(bMean, bCI)) + } + + # Computes the p-value of a pdf + computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){ + if(length(Pdf)>1){ + norm = {length_sigma-1}/2/max_sigma + pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm + if(pVal>0.5){ + pVal = pVal-1 + } + return(pVal) + }else{ + return(NA) + } + } + + # Compute p-value of two distributions + compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ + #print(c(length(dens1),length(dens2))) + if(length(dens1)>1 & length(dens2)>1 ){ + dens1<-dens1/sum(dens1) + dens2<-dens2/sum(dens2) + cum2 <- cumsum(dens2)-dens2/2 + tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) + #print(tmp) + if(tmp>0.5)tmp<-tmp-1 + return( tmp ) + } + else { + return(NA) + } + #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) + } + + # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations) + numberOfSeqsWithMutations <- function(matMutations,test=1){ + if(test==4)test=2 + cdrSeqs <- 0 + fwrSeqs <- 0 + if(test==1){#focused + cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) }) + fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) }) + if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) + if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) + } + if(test==2){#local + cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) }) + fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) }) + if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) + if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) + } + return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs)) +} + + + +shadeColor <- function(sigmaVal=NA,pVal=NA){ + if(is.na(sigmaVal) & is.na(pVal)) return(NA) + if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal) + if(is.na(pVal) || pVal==1 || pVal==0){ + returnColor = "#FFFFFF"; + }else{ + colVal=abs(pVal); + + if(sigmaVal<0){ + if(colVal>0.1) + returnColor = "#CCFFCC"; + if(colVal<=0.1) + returnColor = "#99FF99"; + if(colVal<=0.050) + returnColor = "#66FF66"; + if(colVal<=0.010) + returnColor = "#33FF33"; + if(colVal<=0.005) + returnColor = "#00FF00"; + + }else{ + if(colVal>0.1) + returnColor = "#FFCCCC"; + if(colVal<=0.1) + returnColor = "#FF9999"; + if(colVal<=0.05) + returnColor = "#FF6666"; + if(colVal<=0.01) + returnColor = "#FF3333"; + if(colVal<0.005) + returnColor = "#FF0000"; + } + } + + return(returnColor) +} + + + +plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){ + if(!log){ + x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac + y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac + }else { + if(log==2){ + x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac + y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) + } + if(log==1){ + x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) + y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac + } + if(log==3){ + x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) + y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) + } + } + return(c("x"=x,"y"=y)) +} + +# SHMulation + + # Based on targeting, introduce a single mutation & then update the targeting + oneMutation <- function(){ + # Pick a postion + mutation + posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting)) + posNucNumb = ceiling(posMutation/4) # Nucleotide number + posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to + + #mutate the simulation sequence + seqSimVec <- s2c(seqSim) + seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind] + seqSim <<- c2s(seqSimVec) + + #update Mutability, Targeting & MutationsTypes + updateMutabilityNTargeting(posNucNumb) + + #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) + return(posNucNumb) + } + + updateMutabilityNTargeting <- function(position){ + min_i<-max((position-2),1) + max_i<-min((position+2),nchar(seqSim)) + min_ii<-min(min_i,3) + + #mutability - update locally + seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)] + + + #targeting - compute locally + seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i]) + seqTargeting[is.na(seqTargeting)] <<- 0 + #mutCodonPos = getCodonPos(position) + mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3]) + #cat(mutCodonPos,"\n") + mutTypeCodon = getCodonPos(position) + seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) + # Stop = 0 + if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){ + seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0 + } + + + #Selection + selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R")) + # CDR + selectedCDR = selectedPos[which(matCDR[selectedPos]==T)] + seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR) + seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K + + # FWR + selectedFWR = selectedPos[which(matFWR[selectedPos]==T)] + seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR) + seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K + + } + + + + # Validate the mutation: if the mutation has not been sampled before validate it, else discard it. + validateMutation <- function(){ + if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation + uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1 + mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos + }else{ + if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation + mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)] + uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1 + } + } + } + + + + # Places text (labels) at normalized coordinates + myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){ + par(xpd=TRUE) + if(!log) + text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol) + else { + if(log==2) + text( + par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac, + 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), + w,cex=cex,adj=adj,col=thecol) + if(log==1) + text( + 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), + par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac, + w,cex=cex,adj=adj,col=thecol) + if(log==3) + text( + 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), + 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), + w,cex=cex,adj=adj,col=thecol) + } + par(xpd=FALSE) + } + + + + # Count the mutations in a sequence + analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){ + + paramGL = s2c(matInput[inputMatrixIndex,2]) + paramSeq = s2c(matInput[inputMatrixIndex,1]) + + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + mutations_val = paramGL != paramSeq + + if(any(mutations_val)){ + mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + codonSeqWhole = paramSeq[pos_array] + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + + mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfoWhole) = mutationPos + + mutationInfo <- mutationInfo[!is.na(mutationInfo)] + mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] + + if(any(!is.na(mutationInfo))){ + + #Filter based on Stop (at the codon level) + if(seqWithStops==1){ + nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) + mutationInfo = mutationInfo[nucleotidesAtStopCodons] + mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] + }else{ + countStops = sum(mutationInfoWhole=="Stop") + if(seqWithStops==2 & countStops==0) mutationInfo = NA + if(seqWithStops==3 & countStops>0) mutationInfo = NA + } + + if(any(!is.na(mutationInfo))){ + #Filter mutations based on multipleMutation + if(multipleMutation==1 & !is.na(mutationInfo)){ + mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) + tableMutationCodons <- table(mutationCodons) + codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) + if(any(codonsWithMultipleMutations)){ + #remove the nucleotide mutations in the codons with multiple mutations + mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] + #replace those codons with Ns in the input sequence + paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" + matInput[inputMatrixIndex,1] <<- c2s(paramSeq) + } + } + + #Filter mutations based on the model + if(any(mutationInfo)==T | is.na(any(mutationInfo))){ + + if(model==1 & !is.na(mutationInfo)){ + mutationInfo <- mutationInfo[mutationInfo=="S"] + } + if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo) + else return(NA) + }else{ + return(NA) + } + }else{ + return(NA) + } + + + }else{ + return(NA) + } + + + }else{ + return (NA) + } + } + + analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){ + + paramGL = s2c(inputArray[2]) + paramSeq = s2c(inputArray[1]) + inputSeq <- inputArray[1] + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + mutations_val = paramGL != paramSeq + + if(any(mutations_val)){ + mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + codonSeqWhole = paramSeq[pos_array] + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + + mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfoWhole) = mutationPos + + mutationInfo <- mutationInfo[!is.na(mutationInfo)] + mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] + + if(any(!is.na(mutationInfo))){ + + #Filter based on Stop (at the codon level) + if(seqWithStops==1){ + nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) + mutationInfo = mutationInfo[nucleotidesAtStopCodons] + mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] + }else{ + countStops = sum(mutationInfoWhole=="Stop") + if(seqWithStops==2 & countStops==0) mutationInfo = NA + if(seqWithStops==3 & countStops>0) mutationInfo = NA + } + + if(any(!is.na(mutationInfo))){ + #Filter mutations based on multipleMutation + if(multipleMutation==1 & !is.na(mutationInfo)){ + mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) + tableMutationCodons <- table(mutationCodons) + codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) + if(any(codonsWithMultipleMutations)){ + #remove the nucleotide mutations in the codons with multiple mutations + mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] + #replace those codons with Ns in the input sequence + paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" + #matInput[inputMatrixIndex,1] <<- c2s(paramSeq) + inputSeq <- c2s(paramSeq) + } + } + + #Filter mutations based on the model + if(any(mutationInfo)==T | is.na(any(mutationInfo))){ + + if(model==1 & !is.na(mutationInfo)){ + mutationInfo <- mutationInfo[mutationInfo=="S"] + } + if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq)) + else return(list(NA,inputSeq)) + }else{ + return(list(NA,inputSeq)) + } + }else{ + return(list(NA,inputSeq)) + } + + + }else{ + return(list(NA,inputSeq)) + } + + + }else{ + return (list(NA,inputSeq)) + } + } + + # triMutability Background Count + buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ + + #rowOrigMatInput = matInput[inputMatrixIndex,] + seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) + seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) + #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL) + tempInput <- cbind(seqInput,seqGL) + seqLength = nchar(seqGL) + list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops) + mutationCount <- list_analyzeMutationsFixed[[1]] + seqInput <- list_analyzeMutationsFixed[[2]] + BackgroundMatrix = mutabilityMatrix + MutationMatrix = mutabilityMatrix + MutationCountMatrix = mutabilityMatrix + if(!is.na(mutationCount)){ + if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ + + fivermerStartPos = 1:(seqLength-4) + fivemerLength <- length(fivermerStartPos) + fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) + fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) + + #Background + for(fivemerIndex in 1:fivemerLength){ + fivemer = fivemerGL[fivemerIndex] + if(!any(grep("N",fivemer))){ + fivemerCodonPos = fivemerCodon(fivemerIndex) + fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3]) + + # All mutations model + #if(!any(grep("N",fivemerReadingFrameCodon))){ + if(model==0){ + if(stopMutations==0){ + if(!any(grep("N",fivemerReadingFrameCodonInputSeq))) + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1) + }else{ + if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){ + positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1] + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon]) + } + } + }else{ # Only silent mutations + if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){ + positionWithinCodon = which(fivemerCodonPos==3) + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon]) + } + } + #} + } + } + + #Mutations + if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] + if(model==1) mutationCount = mutationCount[mutationCount=="S"] + mutationPositions = as.numeric(names(mutationCount)) + mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] + mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] + countMutations = 0 + for(mutationPosition in mutationPositions){ + fivemerIndex = mutationPosition-2 + fivemer = fivemerSeq[fivemerIndex] + GLfivemer = fivemerGL[fivemerIndex] + fivemerCodonPos = fivemerCodon(fivemerIndex) + fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){ + if(model==0){ + countMutations = countMutations + 1 + MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1) + MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) + }else{ + if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){ + countMutations = countMutations + 1 + positionWithinCodon = which(fivemerCodonPos==3) + glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) + inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon) + MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc]) + MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) + } + } + } + } + + seqMutability = MutationMatrix/BackgroundMatrix + seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) + #cat(inputMatrixIndex,"\t",countMutations,"\n") + return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) + + } + } + + } + + #Returns the codon position containing the middle nucleotide + fivemerCodon <- function(fivemerIndex){ + codonPos = list(2:4,1:3,3:5) + fivemerType = fivemerIndex%%3 + return(codonPos[[fivemerType+1]]) + } + + #returns probability values for one mutation in codons resulting in R, S or Stop + probMutations <- function(typeOfMutation){ + matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3))) + for(codon in rownames(matMutationProb)){ + if( !any(grep("N",codon)) ){ + for(muPos in 1:3){ + matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T) + glNuc = matCodon[1,muPos] + matCodon[,muPos] = canMutateTo(glNuc) + substitutionRate = substitution[glNuc,matCodon[,muPos]] + typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation]) + } + } + } + + return(matMutationProb) + } + + + + +#Mapping Trinucleotides to fivemers +mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){ + rownames(triMutability) <- triMutability_Names + Fivemer<-rep(NA,1024) + names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5) + Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE))) + Fivemer<-Fivemer/sum(Fivemer) + return(Fivemer) +} + +collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE)) +} + + + +CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE)) +} + +#Uses the real counts of the mutated fivemers +CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE)) +} + +bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){ +N<-sum(x) +if(N){ +p<-x/N +k<-length(x)-1 +tmp<-rmultinom(M, size = N, prob=p) +tmp_p<-apply(tmp,2,function(y)y/N) +(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k)))) +} +else return(matrix(0,2,length(x))) +} + + + + +bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){ + +N<-sum(x) +k<-length(x) +y<-rep(1:k,x) +tmp<-sapply(1:M,function(i)sample(y,n)) +if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n +if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n +(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1))))) +} + + + +p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){ +n=sum(x_obs) +N<-sum(x) +k<-length(x) +y<-rep(1:k,x) +tmp<-sapply(1:M,function(i)sample(y,n)) +if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i))) +if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i))) +tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M), +sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M)) +sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])}) +} + +#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA" +# Remove SNPs from IMGT germline segment alleles +generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){ + repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) + alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2]) + SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){ + Indices<-NULL + for(i in 1:length(x)){ + firstSeq = s2c(x[[1]]) + iSeq = s2c(x[[i]]) + Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." )) + } + return(sort(unique(Indices))) + }) + repertoireOut <- repertoireIn + repertoireOut <- lapply(names(repertoireOut), function(repertoireName){ + alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2] + geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1] + alleleSeq <- s2c(repertoireOut[[repertoireName]]) + alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N" + alleleSeq <- c2s(alleleSeq) + repertoireOut[[repertoireName]] <- alleleSeq + }) + names(repertoireOut) <- names(repertoireIn) + write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile) + +} + + + + + + +############ +groupBayes2 = function(indexes, param_resultMat){ + + BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])})) + BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])})) + #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])})) + #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])})) + #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])})) + #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])})) + return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR, + "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) ) + #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR, + #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR)) +# "BayesGDist_Global_CDR" = BayesGDist_Global_CDR, +# "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) ) + + +} + + +calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){ + G <- max(length(x),length(N),length(p)) + x=array(x,dim=G) + N=array(N,dim=G) + p=array(p,dim=G) + + indexOfZero = N>0 & p>0 + N = N[indexOfZero] + x = x[indexOfZero] + p = p[indexOfZero] + G <- length(x) + + if(G){ + + cons<-array( dim=c(length_sigma,G) ) + if(G==1) { + return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma)) + } + else { + for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma) + listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) + y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + } + }else{ + return(NA) + } +} + + +calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ + matG <- listMatG[[1]] + groups <- listMatG[[2]] + i = 1 + resConv <- matG[,i] + denom <- 2^groups[i] + if(length(groups)>1){ + while( i<length(groups) ){ + i = i + 1 + resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma) + #cat({{2^groups[i]}/denom},"\n") + denom <- denom + 2^groups[i] + } + } + return(resConv) +} + +weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ +lx<-length(x) +ly<-length(y) +if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){ +if(w<1){ +y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y +x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y +lx<-length(x1) +ly<-length(y1) +} +else { +y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y +x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y +lx<-length(x1) +ly<-length(y1) +} +} +else{ +x1<-x +y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y +ly<-length(y1) +} +tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y +tmp[tmp<=0] = 0 +return(tmp/sum(tmp)) +} + +######################## + + + + +mutabilityMatrixONE<-rep(0,4) +names(mutabilityMatrixONE)<-NUCLEOTIDES + + # triMutability Background Count + buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ + + #rowOrigMatInput = matInput[inputMatrixIndex,] + seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) + seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) + matInput[inputMatrixIndex,] <<- c(seqInput,seqGL) + seqLength = nchar(seqGL) + mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops) + BackgroundMatrix = mutabilityMatrixONE + MutationMatrix = mutabilityMatrixONE + MutationCountMatrix = mutabilityMatrixONE + if(!is.na(mutationCount)){ + if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ + +# ONEmerStartPos = 1:(seqLength) +# ONEmerLength <- length(ONEmerStartPos) + ONEmerGL <- s2c(seqGL) + ONEmerSeq <- s2c(seqInput) + + #Background + for(ONEmerIndex in 1:seqLength){ + ONEmer = ONEmerGL[ONEmerIndex] + if(ONEmer!="N"){ + ONEmerCodonPos = getCodonPos(ONEmerIndex) + ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) + ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] ) + + # All mutations model + #if(!any(grep("N",ONEmerReadingFrameCodon))){ + if(model==0){ + if(stopMutations==0){ + if(!any(grep("N",ONEmerReadingFrameCodonInputSeq))) + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1) + }else{ + if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){ + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1] + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon]) + } + } + }else{ # Only silent mutations + if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){ + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon]) + } + } + } + } + } + + #Mutations + if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] + if(model==1) mutationCount = mutationCount[mutationCount=="S"] + mutationPositions = as.numeric(names(mutationCount)) + mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] + mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] + countMutations = 0 + for(mutationPosition in mutationPositions){ + ONEmerIndex = mutationPosition + ONEmer = ONEmerSeq[ONEmerIndex] + GLONEmer = ONEmerGL[ONEmerIndex] + ONEmerCodonPos = getCodonPos(ONEmerIndex) + ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos]) + ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos]) + if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){ + if(model==0){ + countMutations = countMutations + 1 + MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1) + MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) + }else{ + if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){ + countMutations = countMutations + 1 + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) + glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) + inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon) + MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc]) + MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) + } + } + } + } + + seqMutability = MutationMatrix/BackgroundMatrix + seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) + #cat(inputMatrixIndex,"\t",countMutations,"\n") + return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) +# tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix) + } + } + +################ +# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $ + +trim <- function(s, recode.factor=TRUE, ...) + UseMethod("trim", s) + +trim.default <- function(s, recode.factor=TRUE, ...) + s + +trim.character <- function(s, recode.factor=TRUE, ...) +{ + s <- sub(pattern="^ +", replacement="", x=s) + s <- sub(pattern=" +$", replacement="", x=s) + s +} + +trim.factor <- function(s, recode.factor=TRUE, ...) +{ + levels(s) <- trim(levels(s)) + if(recode.factor) { + dots <- list(x=s, ...) + if(is.null(dots$sort)) dots$sort <- sort + s <- do.call(what=reorder.factor, args=dots) + } + s +} + +trim.list <- function(s, recode.factor=TRUE, ...) + lapply(s, trim, recode.factor=recode.factor, ...) + +trim.data.frame <- function(s, recode.factor=TRUE, ...) +{ + s[] <- trim.list(s, recode.factor=recode.factor, ...) + s +} +####################################### +# Compute the expected for each sequence-germline pair by codon +getExpectedIndividualByCodon <- function(matInput){ +if( any(grep("multicore",search())) ){ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){ + Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") + sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) + } + ) + }) + + LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){ + Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") + sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) + } + ) + }) + + Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) + }else{ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){ + Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") + sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) + } + ) + }) + + LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){ + Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") + sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) + } + ) + }) + + Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) + } +} + +# getObservedMutationsByCodon <- function(listMutations){ +# numbSeqs <- length(listMutations) +# obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) +# obsMu_S <- obsMu_R +# temp <- mclapply(1:length(listMutations), function(i){ +# arrMutations = listMutations[[i]] +# RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) +# RPos <- sapply(RPos,getCodonNumb) +# if(any(RPos)){ +# tabR <- table(RPos) +# obsMu_R[i,as.numeric(names(tabR))] <<- tabR +# } +# +# SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) +# SPos <- sapply(SPos,getCodonNumb) +# if(any(SPos)){ +# tabS <- table(SPos) +# obsMu_S[i,names(tabS)] <<- tabS +# } +# } +# ) +# return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) +# } + +getObservedMutationsByCodon <- function(listMutations){ + numbSeqs <- length(listMutations) + obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) + obsMu_S <- obsMu_R + temp <- lapply(1:length(listMutations), function(i){ + arrMutations = listMutations[[i]] + RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) + RPos <- sapply(RPos,getCodonNumb) + if(any(RPos)){ + tabR <- table(RPos) + obsMu_R[i,as.numeric(names(tabR))] <<- tabR + } + + SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) + SPos <- sapply(SPos,getCodonNumb) + if(any(SPos)){ + tabS <- table(SPos) + obsMu_S[i,names(tabS)] <<- tabS + } + } + ) + return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/Baseline_Main.r Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,388 @@ +######################################################################################### +# License Agreement +# +# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE +# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER +# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE +# OR COPYRIGHT LAW IS PROHIBITED. +# +# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE +# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED +# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN +# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. +# +# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences +# Coded by: Mohamed Uduman & Gur Yaari +# Copyright 2012 Kleinstein Lab +# Version: 1.3 (01/23/2014) +######################################################################################### + +op <- options(); +options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1) +library('seqinr') +if( F & Sys.info()[1]=="Linux"){ + library("multicore") +} + +# Load functions and initialize global variables +source("Baseline_Functions.r") + +# Initialize parameters with user provided arguments + arg <- commandArgs(TRUE) + #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample") + #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample") + #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu") + testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local + species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse + substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS + mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS + clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations + fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels + region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3 + inputFilePath <- arg[8] # Full path to input file + outputPath <- arg[9] # Full path to location of output files + outputID <- arg[10] # ID for session output + + + if(testID==5){ + traitChangeModel <- 1 + if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998 + initializeTraitChange(traitChangeModel) + } + +# Initialize other parameters/variables + + # Initialzie the codon table ( definitions of R/S ) + computeCodonTable(testID) + + # Initialize + # Test Name + testName<-"Focused" + if(testID==2) testName<-"Local" + if(testID==3) testName<-"Imbalanced" + if(testID==4) testName<-"ImbalancedSilent" + + # Indel placeholders initialization + indelPos <- NULL + delPos <- NULL + insPos <- NULL + + # Initialize in Tranistion & Mutability matrixes + substitution <- initializeSubstitutionMatrix(substitutionModel,species) + mutability <- initializeMutabilityMatrix(mutabilityModel,species) + + # FWR/CDR boundaries + flagTrim <- F + if( is.na(region[7])){ + flagTrim <- T + region[7]<-region[6] + } + readStart = min(region,na.rm=T) + readEnd = max(region,na.rm=T) + if(readStart>1){ + region = region - (readStart - 1) + } + region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) ) + region_Cod = region + + readStart = (readStart*3)-2 + readEnd = (readEnd*3) + + FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])), + rep(FALSE,(region_Nuc[3]-region_Nuc[2])), + rep(TRUE,(region_Nuc[4]-region_Nuc[3])), + rep(FALSE,(region_Nuc[5]-region_Nuc[4])), + rep(TRUE,(region_Nuc[6]-region_Nuc[5])), + rep(FALSE,(region_Nuc[7]-region_Nuc[6])) + ) + CDR_Nuc <- (1-FWR_Nuc) + CDR_Nuc <- as.logical(CDR_Nuc) + FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T) + CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T) + + FWR_Codon <- c( rep(TRUE,(region[2])), + rep(FALSE,(region[3]-region[2])), + rep(TRUE,(region[4]-region[3])), + rep(FALSE,(region[5]-region[4])), + rep(TRUE,(region[6]-region[5])), + rep(FALSE,(region[7]-region[6])) + ) + CDR_Codon <- (1-FWR_Codon) + CDR_Codon <- as.logical(CDR_Codon) + + +# Read input FASTA file + tryCatch( + inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) + , error = function(ex){ + cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") + q() + } + ) + + if (length(inputFASTA)==1) { + cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") + q() + } + + # Process sequence IDs/names + names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)}) + + # Convert non nucleotide characters to N + inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)]) + inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars) + + # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence] + processedInput <- processInputAdvanced(inputFASTA) + matInput <- processedInput[[1]] + germlines <- processedInput[[2]] + lenGermlines = length(unique(germlines)) + groups <- processedInput[[3]] + lenGroups = length(unique(groups)) + rm(processedInput) + rm(inputFASTA) + +# # remove clones with less than 2 seqeunces +# tableGL <- table(germlines) +# singletons <- which(tableGL<8) +# rowsToRemove <- match(singletons,germlines) +# if(any(rowsToRemove)){ +# matInput <- matInput[-rowsToRemove,] +# germlines <- germlines[-rowsToRemove] +# groups <- groups[-rowsToRemove] +# } +# +# # remove unproductive seqs +# nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))}) +# if(any(nonFuctionalSeqs)){ +# if(sum(nonFuctionalSeqs)==length(germlines)){ +# write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } +# matInput <- matInput[-which(nonFuctionalSeqs),] +# germlines <- germlines[-which(nonFuctionalSeqs)] +# germlines[1:length(germlines)] <- 1:length(germlines) +# groups <- groups[-which(nonFuctionalSeqs)] +# } +# +# if(class(matInput)=="character"){ +# write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } +# +# if(nrow(matInput)<10 | is.null(nrow(matInput))){ +# write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } + +# replace leading & trailing "-" with "N: + matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd)) + + # Trim (nucleotide) input sequences to the last codon + #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon) + +# # Check for Indels +# if(fixIndels){ +# delPos <- fixDeletions(matInput) +# insPos <- fixInsertions(matInput) +# }else{ +# # Check for indels +# indelPos <- checkForInDels(matInput) +# indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)}) +# } + + # If indels are present, remove mutations in the seqeunce & throw warning at end + #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) }) + + colnames(matInput)=c("Input","Germline") + + # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions + germlinesOriginal = NULL + if(clonal){ + germlinesOriginal <- germlines + collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){ + collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1)) + }) + matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])})) + names_groups = tapply(groups,germlines,function(x){names(x[1])}) + groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))}) + names(groups) = names_groups + + names_germlines = tapply(germlines,germlines,function(x){names(x[1])}) + germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} ) + names(germlines) = names_germlines + matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])}) + } + + +# Selection Analysis + + +# if (length(germlines)>sequenceLimit) { +# # Code to parallelize processing goes here +# stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") ) +# } + +# if (length(germlines)<sequenceLimit) {} + + # Compute expected mutation frequencies + matExpected <- getExpectedIndividual(matInput) + + # Count observed number of mutations in the different regions + mutations <- lapply( 1:nrow(matInput), function(i){ + #cat(i,"\n") + seqI = s2c(matInput[i,1]) + seqG = s2c(matInput[i,2]) + matIGL = matrix(c(seqI,seqG),ncol=length(seqI),nrow=2,byrow=T) + retVal <- NA + tryCatch( + retVal <- analyzeMutations2NucUri(matIGL) + , error = function(ex){ + retVal <- NA + } + ) + + + return( retVal ) + }) + + matObserved <- t(sapply( mutations, processNucMutations2 )) + numberOfSeqsWithMutations <- numberOfSeqsWithMutations(matObserved, testID) + + #if(sum(numberOfSeqsWithMutations)==0){ + # write.table("No mutated sequences",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) + # q() + #} + + matMutationInfo <- cbind(matObserved,matExpected) + rm(matObserved,matExpected) + + + #Bayesian PDFs + bayes_pdf = computeBayesianScore(matMutationInfo, test=testName, max_sigma=20,length_sigma=4001) + bayesPDF_cdr = bayes_pdf[[1]] + bayesPDF_fwr = bayes_pdf[[2]] + rm(bayes_pdf) + + bayesPDF_germlines_cdr = tapply(bayesPDF_cdr,germlines,function(x) groupPosteriors(x,length_sigma=4001)) + bayesPDF_germlines_fwr = tapply(bayesPDF_fwr,germlines,function(x) groupPosteriors(x,length_sigma=4001)) + + bayesPDF_groups_cdr = tapply(bayesPDF_cdr,groups,function(x) groupPosteriors(x,length_sigma=4001)) + bayesPDF_groups_fwr = tapply(bayesPDF_fwr,groups,function(x) groupPosteriors(x,length_sigma=4001)) + + if(lenGroups>1){ + groups <- c(groups,lenGroups+1) + names(groups)[length(groups)] = "All sequences combined" + bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001) + bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001) + } + + #Bayesian Outputs + bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo)) + bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo)) + bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo)) + bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo)) + bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo)) + bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo)) + + #P-values + simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP) + simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP) + + simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP) + simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP) + + simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP) + simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP) + + + #Format output + + # Round expected mutation frequencies to 3 decimal places + matMutationInfo[germlinesOriginal[indelPos],] = NA + if(nrow(matMutationInfo)==1){ + matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3) + }else{ + matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3)) + } + + listPDFs = list() + nRows = length(unique(groups)) + length(unique(germlines)) + length(groups) + + matOutput = matrix(NA,ncol=18,nrow=nRows) + rowNumb = 1 + for(G in unique(groups)){ + #print(G) + matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G]) + listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]]) + names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1] + #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){ + gs = unique(germlines[groups==G]) + rowNumb = rowNumb+1 + if( !is.na(gs) ){ + for( g in gs ){ + matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g]) + listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]]) + names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1] + rowNumb = rowNumb+1 + indexesOfInterest = which(germlines==g) + numbSeqsOfInterest = length(indexesOfInterest) + rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1)) + matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest), + rownames(matInput)[indexesOfInterest], + c(matMutationInfo[indexesOfInterest,1:4]), + c(matMutationInfo[indexesOfInterest,5:8]), + c(bayes_cdr[indexesOfInterest,]), + c(bayes_fwr[indexesOfInterest,]), + c(simgaP_cdr[indexesOfInterest]), + c(simgaP_fwr[indexesOfInterest]) + ), ncol=18, nrow=numbSeqsOfInterest,byrow=F) + increment=0 + for( ioi in indexesOfInterest){ + listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]]) + names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi] + increment = increment + 1 + } + rowNumb=max(rowNumb)+1 + + } + } + } + colsToFormat = 11:18 + matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3) + matOutput[matOutput== " NaN"] = NA + + + + colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S", + "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S", + paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"), + paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_") + ) + fileName = paste(outputPath,outputID,".txt",sep="") + write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA) + fileName = paste(outputPath,outputID,".RData",sep="") + save(listPDFs,file=fileName) + +indelWarning = FALSE +if(sum(indelPos)>0){ + indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis."; + indelWarning = paste( indelWarning , "<UL>", sep="" ) + for(indels in names(indelPos)[indelPos]){ + indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" ) + } + indelWarning = paste( indelWarning , "</UL></P>", sep="" ) +} + +cloneWarning = FALSE +if(clonal==1){ + if(sum(matInputErrors)>0){ + cloneWarning = "<P>Warning: The following clones have sequences of unequal length."; + cloneWarning = paste( cloneWarning , "<UL>", sep="" ) + for(clone in names(matInputErrors)[matInputErrors]){ + cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" ) + } + cloneWarning = paste( cloneWarning , "</UL></P>", sep="" ) + } +} +cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,703 @@ +>IGHV1-18*01 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-18*02 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc +>IGHV1-18*03 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1-18*04 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga +>IGHV1-2*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*04 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*05 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga +>IGHV1-24*01 +caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-3*01 +caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga +>IGHV1-3*02 +caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga +>IGHV1-38-4*01 +caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca +>IGHV1-45*01 +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana +>IGHV1-45*02 +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata +>IGHV1-45*03 +.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga +>IGHV1-46*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-46*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-46*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga +>IGHV1-58*01 +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-58*02 +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-68*01 +caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata +>IGHV1-69*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*02 +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1-69*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc +>IGHV1-69*04 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*05 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1-69*06 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*07 +.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag +>IGHV1-69*08 +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*09 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*10 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*11 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*12 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*13 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*14 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69-2*01 +gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-69-2*02 +.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag +>IGHV1-69D*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-8*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-8*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-NL1*01 +caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga +>IGHV1/OR15-1*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga +>IGHV1/OR15-1*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga +>IGHV1/OR15-1*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga +>IGHV1/OR15-1*04 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga +>IGHV1/OR15-2*01 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-2*02 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-2*03 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-3*01 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-3*02 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1/OR15-3*03 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-4*01 +caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-5*01 +.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga +>IGHV1/OR15-5*02 +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga +>IGHV1/OR15-9*01 +caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga +>IGHV1/OR21-1*01 +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga +>IGHV2-10*01 +caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac +>IGHV2-26*01 +caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac +>IGHV2-5*01 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*02 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*03 +................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt +>IGHV2-5*04| +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac +>IGHV2-5*05 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*06 +cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga +>IGHV2-5*08 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*09 +caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*01 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*02 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*03 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*04 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac +>IGHV2-70*05 +..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga +>IGHV2-70*06 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*07 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*08 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*09 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg +>IGHV2-70*10 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*11 +cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*12 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*13 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac +>IGHV2-70D*04 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70D*14 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2/OR16-5*01 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag +>IGHV3-11*01 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-11*03 +caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga +>IGHV3-11*04 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-11*05 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-11*06 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-13*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*02 +gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga +>IGHV3-13*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*05 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-15*01 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*02 +gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*03 +gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*04 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*05 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*06 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*07 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*08 +gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg +>IGHV3-16*01 +gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-16*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-19*01 +acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-20*01 +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-20*02 +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-21*01 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-21*02 +gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-21*03 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga +>IGHV3-21*04 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-22*01 +gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-22*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-23*01 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*02 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*03 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*05 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa +>IGHV3-23D*01 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23D*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-25*01 +gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-25*02 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-25*03 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga +>IGHV3-25*04 +gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga +>IGHV3-25*05 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-29*01 +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*04 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*05 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga +>IGHV3-30*06 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*07 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*08 +caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga +>IGHV3-30*09 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*10 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*11 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*12 +caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*13 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*14 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*15 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*16 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*17 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*18 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30*19 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-2*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca +>IGHV3-30-22*01 +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc +>IGHV3-30-3*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-3*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-3*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-33*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg +>IGHV3-30-42*01 +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30-5*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-5*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-52*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg +>IGHV3-32*01 +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc +>AIGHV3-33*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*02 +caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-33*04 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*05 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*06 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-33-2*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca +>IGHV3-35*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa +>IGHV3-38*01| +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata +>IGHV3-38*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38-3*01 +gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga +>IGHV3-43*01 +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43*02 +gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43D*01 +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-47*01 +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga +>IGHV3-47*02 +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga +>IGHV3-48*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-48*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga +>IGHV3-48*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga +>IGHV3-48*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-49*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*05 +gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-52*01 +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg +>IGHV3-52*02 +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga +>IGHV3-52*03 +gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga +>IGHV3-53*01 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-53*02 +gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-53*03 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga +>IGHV3-53*04 +gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga +>IGHV3-54*01 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-54*02 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg +>IGHV3-54*04 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-62*01 +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga +>IGHV3-63*01 +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt +>IGHV3-63*02 +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa +>IGHV3-64*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga +>IGHV3-64*02 +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga +>IGHV3-64*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-64*04 +caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-64*05 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-64D*06 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-66*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-66*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga +>IGHV3-66*03 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-66*04 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca +>IGHV3-69-1*01 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-69-1*02 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga +>IGHV3-7*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-7*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga +>IGHV3-7*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-71*01 +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-71*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga +>IGHV3-71*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-72*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga +>IGHV3-72*02 +....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat +>IGHV3-73*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-73*02 +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-74*01 +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-74*02 +gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga +>IGHV3-74*03 +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-9*01 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-9*02 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-9*03 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata +>IGHV3-NL1*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3/OR15-7*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*02 +gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*05 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga +>IGHV3/OR16-10*01 +gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga +>IGHV3/OR16-10*02 +gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga +>IGHV3/OR16-10*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga +>IGHV3/OR16-12*01 +gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga +>IGHV3/OR16-13*01 +gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga +>IGHV3/OR16-14*01 +gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga +>IGHV3/OR16-15*01 +gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa +>IGHV3/OR16-15*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga +>IGHV3/OR16-16*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga +>IGHV3/OR16-6*02 +gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg +>IGHV3/OR16-8*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa +>IGHV3/OR16-8*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca +>IGHV3/OR16-9*01 +gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa +>IGHV4-28*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga +>IGHV4-28*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga +>IGHV4-28*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*06 +caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*07 +caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-30-2*01 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-30-2*02 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg +>IGHV4-30-2*03 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca +>IGHV4-30-2*04 +...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-30-2*05 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-2*06 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-30-4*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>XIGHV4-30-4*04 +caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg +>IGHV4-30-4*05 +..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*06 +...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-31*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*04 +caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg +>IGHV4-31*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg +>IGHV4-31*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-31*10 +caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga +>IGHV4-34*01 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*02 +caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*03 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*04 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*05 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*06 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*07 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*08 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg +>IGHV4-34*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-34*10 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-34*11 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga +>IGHV4-34*12 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga +>IGHV4-34*13 +...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-38-2*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga +>IGHV4-38-2*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-39*01 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca +>IGHV4-39*02 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga +>IGHV4-39*03 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg +>IGHV4-39*04 +..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac +>IGHV4-39*05 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-39*06 +cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-39*07 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga +>IGHV4-4*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*05 +caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*06 +............................................................ +...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-55*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-55*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-55*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg +>IGHV4-55*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg +>IGHV4-55*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-55*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-59*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-59*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-59*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg +>IGHV4-59*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-59*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-59*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg +>IGHV4-59*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga +>IGHV4-59*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca +>IGHV4-59*09 +...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg +>IGHV4-59*10 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-61*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-61*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg +>IGHV4-61*05 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga +>IGHV4-61*06 +...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-61*07 +...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca +>IGHV4-61*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV5-10-1*01 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-10-1*02 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca +>IGHV5-10-1*03 +gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-10-1*04 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*01 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*02 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*03 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*04 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*05 +.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg +>IGHV5-78*01 +gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga +>IGHV6-1*01 +caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV6-1*02 +caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV7-34-1*01 +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-34-1*02 +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-4-1*01 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga +>IGHV7-4-1*02 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-4-1*03 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg +>IGHV7-4-1*04 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-4-1*05 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga +>AIGHV7-40*03| +ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga +>IGHV7-81*01 +caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/baseline.xml Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,73 @@ +<tool id="baseline_bayesian_estimation" name="Baseline" version="1.0"> + <description>Bayesian Estimation of Antigen-Driven Selection</description> + <command interpreter="bash"> + wrapper.sh $ss $species $substitution $mutability $clonal $fixindels "$boundaries" " + #for $i, $input in enumerate($inputs) + ${input.in_file} + #end for + " + " + #for $i, $input in enumerate($inputs) + ${input.id} + #end for + " + $reference $out_file "$selection" "$out_table" + </command> + <inputs> + <repeat name="inputs" title="inputs" min="1" default="1"> + <param name="in_file" type="data" label="Input excel or IMGT zip file" /> + <param name="id" type="text" label="ID (alpha-numeric, no spaces)" /> + </repeat> + <param name="reference" type="data" format="fasta" label="Reference fasta file" /> + <param name="ss" type="select" label="Selection Statistic"> + <option value="1">Focused</option> + <option value="2">Local</option> + </param> + <param name="species" type="select" label="SHM Targeting Model"> + <option value="1">Human</option> + <option value="2">Mouse</option> + </param> + <param name="substitution" type="select" label="Substitution Model"> + <option value="1">Smith DS et al. 1996</option> + <option value="0">Uniform substitution</option> + <option value="5">FiveS</option> + </param> + <param name="mutability" type="select" label="Mutability Model"> + <option value="1">Tri-nucleotide (Shapiro GS et al. 2002)</option> + <option value="0">Uniform mutability</option> + <option value="5">FiveS</option> + </param> + <param name="clonal" type="select" label="Sequences are clonal"> + <option value="0">Independent sequences</option> + <option value="1">Clonally related</option> + <option value="2">Clonally related and only non-terminal mutations</option> + </param> + <param name="fixindels" type="select" label="Fix Indels"> + <option value="0">Do Nothing</option> + <option value="1">Try and fix Indels</option> + </param> + <param name="boundaries" type="text" value="25:26:38:55:65:104:-" size="40" label="Custom Boundaries (FWR1Start:FWR1End:CDR1End:FWR2End:CDR3End:FWR3End:CDR3End)" /> + <param name="selection" type="select" label="Unique Selection Definition"> + <option value="VGene,AA.JUNCTION">VGene, AA CDR3</option> + <option value="VGene,JGene,AA.JUNCTION">VGene, JGene, AA CDR3</option> + <option value="VGene,DGene,JGene,AA.JUNCTION">VGene, DGene, JGene, AA CDR3</option> + <option value="Sequence.ID">None</option> + </param> + </inputs> + <outputs> + <data format="pdf" name="out_file" label = "Baseline PDF on ${on_string}"/> + <data format="tabular" name="out_table" label = "Baseline Table on ${on_string}"/> + </outputs> + <help> + Gur Yaari; Mohamed Uduman; Steven H. Kleinstein. Quantifying selection in high-throughput Immunoglobulin sequencing data sets. Nucleic Acids Res. 2012 May 27. + + Mohamed Uduman; Gur Yaari; Uri Hershberg; Mark J. Shlomchik; Steven H. Kleinstein. Detecting selection in immunoglobulin sequences. Nucleic Acids Res. 2011 Jul;39(Web Server issue):W499-504. + + **Boundaries** + + IMGT® No CDR3: "1:26:38:55:65:104:-" + + IMGT®: "1:26:38:55:65:104:116" + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/comparePDFs.r Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,225 @@ +options("warn"=-1) + +#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r +# Compute p-value of two distributions +compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ +#print(c(length(dens1),length(dens2))) +if(length(dens1)>1 & length(dens2)>1 ){ + dens1<-dens1/sum(dens1) + dens2<-dens2/sum(dens2) + cum2 <- cumsum(dens2)-dens2/2 + tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) + #print(tmp) + if(tmp>0.5)tmp<-tmp-1 + return( tmp ) + } + else { + return(NA) + } + #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) +} + + +require("grid") +arg <- commandArgs(TRUE) +#arg <- c("300143","4","5") +arg[!arg=="clonal"] +input <- arg[1] +output <- arg[2] +rowIDs <- as.numeric( sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } ) ) + +numbSeqs = length(rowIDs) + +if ( is.na(rowIDs[1]) | numbSeqs>10 ) { + stop( paste("Error: Please select between one and 10 seqeunces to compare.") ) +} + +#load( paste("output/",sessionID,".RData",sep="") ) +load( input ) +#input + +xMarks = seq(-20,20,length.out=4001) + +plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){ + yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1 + + if(length(xlim==2)){ + xMin=xlim[1] + xMax=xlim[2] + } else { + xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] + xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] + xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] + xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] + + xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1] + xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1] + xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])] + xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])] + + xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE) + xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE) + } + + sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x + grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex)) + x <- sigma + pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc")) + #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex)) + pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05))) + grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc") + grid.rect() + grid.xaxis(gp = gpar(cex=cex/1.1)) + yticks = pretty(c(-yMax,yMax),8) + yticks = yticks[yticks>(-yMax) & yticks<(yMax)] + grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1)) + if(length(listPDFs[pdf1][[1]][["CDR"]])>1){ + ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2)) + } + if(length(listPDFs[pdf1][[1]][["FWR"]])>1){ + yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2)) + } + + if(length(listPDFs[pdf2][[1]][["CDR"]])>1){ + ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2)) + } + if(length(listPDFs[pdf2][[1]][["FWR"]])>1){ + yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2)) + } + + grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1)) + grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3)) + + grid.text("Density", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex)) + grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex)) + + if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){ + pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]]) + pval = formatC(as.numeric(pCDRFWR),digits=3) + grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2)) + } + grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5)) + grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5)) + popViewport(2) +} +#plot_grid_s(1) + + +p2col<-function(p=0.01){ + breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51) + i<-findInterval(p,breaks) + cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0), + rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) ) + return(cols[i]) +} + + +plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){ + if(upper){ + pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]]) + pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]]) + pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]]) + pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]]) + grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc") + grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc") + grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc") + grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc") + + grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925))) + grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925))) + + grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + + + # grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex)) + # grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex)) + } + else{ + } +} + + +################################################################################## +################## The whole OCD's matrix ######################################## +################################################################################## + +#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) +pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) + +pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null"))))) + +for( seqOne in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1)) + if(seqOne>2){ + grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") + grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc") + + grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center") + grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center") + } + grid.rect(gp = gpar(col=grey(0.9))) + grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center") + popViewport(1) +} + +for( seqOne in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1)) + if(seqOne<=numbSeqs){ + grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") + grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc") + grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270) + grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270) + } + grid.rect(gp = gpar(col=grey(0.9))) + grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center") + popViewport(1) +} + +for( seqOne in 1:numbSeqs+1){ + for(seqTwo in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne)) + if(seqTwo>seqOne){ + plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2) + grid.rect() + } + popViewport(1) + } +} + + +xMin=0 +xMax=0.01 +for(pdf1 in rowIDs){ + xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] + xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] + xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] + xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] + xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE) + xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE) +} + + + +for(i in 1:numbSeqs+1){ + for(j in (i-1):numbSeqs){ + pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1)) + grid.rect() + plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1) + popViewport(1) + } +} + +dev.off() + +cat("Success", paste(rowIDs,collapse="_"),sep=":") +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/filter.r Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,37 @@ +arg = commandArgs(TRUE) +summaryfile = arg[1] +gappedfile = arg[2] +selection = arg[3] +output = arg[4] +print(paste("selection = ", selection)) + + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) + + + +#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) + +dat = cbind(gappeddat, summarydat$AA.JUNCTION) + +colnames(dat)[length(dat)] = "AA.JUNCTION" + +dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) +dat$VGene = gsub("[*].*", "", dat$VGene) + +dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) +dat$DGene = gsub("[*].*", "", dat$DGene) + +dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) +dat$JGene = gsub("[*].*", "", dat$JGene) + +print(str(dat)) + +dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) + +dat = dat[!duplicated(dat$past), ] + +dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] + +write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/script_imgt.py Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,79 @@ +#import xlrd #avoid dep +import argparse +import re + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") +parser.add_argument("--ref", help="Reference file") +parser.add_argument("--output", help="Output file") +parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") + +args = parser.parse_args() + +refdic = dict() +with open(args.ref, 'r') as ref: + currentSeq = "" + currentId = "" + for line in ref: + if line[0] is ">": + if currentSeq is not "" and currentId is not "": + refdic[currentId[1:]] = currentSeq + currentId = line.rstrip() + currentSeq = "" + else: + currentSeq += line.rstrip() + refdic[currentId[1:]] = currentSeq + + +vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, +# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", +# r"(IGKV[0-3]D?-[0-9]{1,2})", +# r"(IGLV[0-9]-[0-9]{1,2})", +# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)", +# r"(TRGV[234589])", +# r"(TRDV[1-3])"] + +#vPattern = re.compile(r"|".join(vPattern)) +vPattern = re.compile("|".join(vPattern)) + +def filterGene(s, pattern): + if type(s) is not str: + return None + res = pattern.search(s) + if res: + return res.group(0) + return None + + + +currentSeq = "" +currentId = "" +first=True +with open(args.input, 'r') as i: + with open(args.output, 'a') as o: + o.write(">>>" + args.id + "\n") + outputdic = dict() + for line in i: + if first: + first = False + continue + linesplt = line.split("\t") + ref = filterGene(linesplt[1], vPattern) + if not ref or not linesplt[2].rstrip(): + continue + if ref in outputdic: + outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] + else: + outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] + #print outputdic + + for k in outputdic.keys(): + if k in refdic: + o.write(">>" + k + "\n") + o.write(refdic[k] + "\n") + for seq in outputdic[k]: + #print seq + o.write(">" + seq[0] + "\n") + o.write(seq[1] + "\n") + else: + print k + " not in reference, skipping " + k
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/script_xlsx.py Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,58 @@ +import xlrd +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") +parser.add_argument("--ref", help="Reference file") +parser.add_argument("--output", help="Output file") + +args = parser.parse_args() + +gene_column = 6 +id_column = 7 +seq_column = 8 +LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"] + + +refdic = dict() +with open(args.ref, 'r') as ref: + currentSeq = "" + currentId = "" + for line in ref.readlines(): + if line[0] is ">": + if currentSeq is not "" and currentId is not "": + refdic[currentId[1:]] = currentSeq + currentId = line.rstrip() + currentSeq = "" + else: + currentSeq += line.rstrip() + refdic[currentId[1:]] = currentSeq + +currentSeq = "" +currentId = "" +with xlrd.open_workbook(args.input, 'r') as wb: + with open(args.output, 'a') as o: + for sheet in wb.sheets(): + if sheet.cell(1,gene_column).value.find("IGHV") < 0: + print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name + continue + o.write(">>>" + sheet.name + "\n") + outputdic = dict() + for rowindex in range(1, sheet.nrows): + ref = sheet.cell(rowindex, gene_column).value.replace(">", "") + if ref in outputdic: + outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] + else: + outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] + #print outputdic + + for k in outputdic.keys(): + if k in refdic: + o.write(">>" + k + "\n") + o.write(refdic[k] + "\n") + for seq in outputdic[k]: + #print seq + o.write(">" + seq[0] + "\n") + o.write(seq[1] + "\n") + else: + print k + " not in reference, skipping " + k
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/baseline/wrapper.sh Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,98 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +testID=$1 +species=$2 +substitutionModel=$3 +mutabilityModel=$4 +clonal=$5 +fixIndels=$6 +region=$7 +inputs=$8 +inputs=($inputs) +IDs=$9 +IDs=($IDs) +ref=${10} +output=${11} +selection=${12} +output_table=${13} +outID="result" + +echo "$PWD" + +echo "testID = $testID" +echo "species = $species" +echo "substitutionModel = $substitutionModel" +echo "mutabilityModel = $mutabilityModel" +echo "clonal = $clonal" +echo "fixIndels = $fixIndels" +echo "region = $region" +echo "inputs = ${inputs[@]}" +echo "IDs = ${IDs[@]}" +echo "ref = $ref" +echo "output = $output" +echo "outID = $outID" + +fasta="$PWD/baseline.fasta" + + +count=0 +for current in ${inputs[@]} +do + f=$(file $current) + zipType="Zip archive" + if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"XZ compressed data"* ]] + then + id=${IDs[$count]} + echo "id=$id" + if [[ "$f" == *"Zip archive"* ]] ; then + echo "Zip archive" + echo "unzip $input -d $PWD/files/" + unzip $current -d "$PWD/$id/" + elif [[ "$f" == *"XZ compressed data"* ]] ; then + echo "ZX archive" + echo "tar -xJf $input -C $PWD/files/" + mkdir -p "$PWD/$id/files" + tar -xJf $current -C "$PWD/$id/files/" + fi + summaryfile="$PWD/summary_${id}.txt" + gappedfile="$PWD/gappednt_${id}.txt" + filtered="$PWD/filtered_${id}.txt" + filecount=`ls -l $PWD/$id/ | wc -l` + if [[ "$filecount" -eq "2" ]] + then + cat $PWD/$id/*/1_* > $summaryfile + cat $PWD/$id/*/2_* > $gappedfile + else + cat $PWD/$id/1_* > $summaryfile + cat $PWD/$id/2_* > $gappedfile + fi + Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1 + + final="$PWD/final_${id}.txt" + cat $filtered | cut -f2,4,7 > $final + python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id + else + python $dir/script_xlsx.py --input $current --ref $ref --output $fasta + fi + count=$((count+1)) +done +workdir="$PWD" +cd $dir +echo "file: ${inputs[0]}" +#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1 +Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1 + +echo "$workdir/${outID}.txt" + +rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` +rows=($rows) +#unset rows[${#rows[@]}-1] + +cd $dir +Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1 +cp $workdir/result.txt ${output_table} + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tmp/igat.r Wed Jun 08 03:54:49 2016 -0400 @@ -0,0 +1,20 @@ +args <- commandArgs(trailingOnly = TRUE) + +imgt.dir = args[1] +merged.file = args[2] + +merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F) + +for(f in list.files(imgt.dir, pattern="*.txt$")){ + print(paste("filtering", f)) + path = paste(imgt.dir, f, sep="") + dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F) + + dat = dat[dat$Sequence.ID %in% merged$Sequence.ID,] + + if("FR1.IMGT" %in% colnames(dat)){ + dat$FR1.IMGT = "" + } + + write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T) +}
--- a/wrapper.sh Tue Jun 07 08:39:08 2016 -0400 +++ b/wrapper.sh Wed Jun 08 03:54:49 2016 -0400 @@ -77,6 +77,37 @@ Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 +echo "---------------- creating new IMGT zip ----------------<br />" +echo "---------------- creating new IMGT zip ----------------<br />" >> $output + +mkdir $outdir/new_IMGT + +cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" +cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" +cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" +cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" +cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" +cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" +cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" +cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" +cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" +cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" + +Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt 2>&1 + + +tmp="$PWD" +cd $outdir/new_IMGT/ #tar weirdness... +tar -cJf ../new_IMGT.txz * + +cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm + +#tar -cJf ../IgAT.txz * +zip -r ../IgAT.zip * + +cd $tmp + + echo "---------------- mutation_analysis.r ----------------" echo "---------------- mutation_analysis.r ----------------<br />" >> $output @@ -100,6 +131,7 @@ funcs=(sum mean median) echo "---------------- sequence_overview.r ----------------" +echo "---------------- sequence_overview.r ----------------" >> $output mkdir $outdir/sequence_overview @@ -173,6 +205,9 @@ echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output echo "<a href='base_overview.html'>Base overview</a><br />" >> $output +echo "<a href='baseline.pdf'>Baseline PDF</a><br />" >> $output +echo "<a href='baseline.txt'>Baseline Table</a><br />" >> $output +echo "<a href='IgAT.zip'>IgAT zip</a><br />" >> $output echo "---------------- images ----------------" @@ -224,6 +259,8 @@ echo "</html>" >> $output +echo "---------------- baseline ----------------" +bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "sample name" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" #optional output for naive @@ -234,6 +271,7 @@ echo "---------------- imgt_loader.r ----------------" #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 + echo "---------------- naive_output.r ----------------" Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 fi