Mercurial > repos > davidvanzessen > mutation_analysis
changeset 93:53fb2948726e draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 07 Jun 2016 04:45:50 -0400 |
parents | b869a126e2c4 |
children | e39176ccddc8 |
files | gene_identification.py merge_and_filter.r |
diffstat | 2 files changed, 5 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/gene_identification.py Mon Jun 06 08:26:54 2016 -0400 +++ b/gene_identification.py Tue Jun 07 04:45:50 2016 -0400 @@ -160,7 +160,7 @@ chunksInCM = len(compiledregex["cm"]) requiredChunkPercentage = 0.7 varsInCA = float(len(ca1.keys()) * 2) -varsInCG = float(len(cg1.keys()) * 2) - 2 # -1 because the sliding window doesn't hit the first nt twice +varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice varsInCM = 0
--- a/merge_and_filter.r Mon Jun 06 08:26:54 2016 -0400 +++ b/merge_and_filter.r Tue Jun 07 04:45:50 2016 -0400 @@ -140,20 +140,23 @@ write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) if(filter_unique != "no"){ - #clmns = names(result) + clmns = names(result) if(grepl("_c", filter_unique)){ result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq, result$best_match) } else { result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } #fltr = result$unique.def %in% result.filtered$unique.def if(grepl("keep", filter_unique)){ + result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes result = result[!duplicated(result$unique.def),] } else { result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] + result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes result = result[!duplicated(result$unique.def),] }