Mercurial > repos > davidvanzessen > mutation_analysis
changeset 73:13c3710604ef draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 04 May 2016 08:40:31 -0400 |
parents | 51d92233fb5d |
children | 1507436f6c62 |
files | mutation_analysis.py mutation_analysis.r |
diffstat | 2 files changed, 3 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/mutation_analysis.py Wed May 04 04:28:02 2016 -0400 +++ b/mutation_analysis.py Wed May 04 08:40:31 2016 -0400 @@ -7,7 +7,7 @@ parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation") parser.add_argument("--genes", help="The genes available in the 'best_match' column") -parser.add_argument("--includefr1", help="The genes available in the 'best_match' column") +parser.add_argument("--includefr1", help="Should the mutation/nucleotides in the FR1 region be included?") parser.add_argument("--output", help="Output file") args = parser.parse_args() @@ -57,6 +57,7 @@ linesplt = line.split("\t") ID = linesplt[IDIndex] genedic[ID] = linesplt[best_matchIndex] + print line try: mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else [] mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
--- a/mutation_analysis.r Wed May 04 04:28:02 2016 -0400 +++ b/mutation_analysis.r Wed May 04 08:40:31 2016 -0400 @@ -6,7 +6,6 @@ input = args[1] genes = unlist(strsplit(args[2], ",")) outputdir = args[3] -print(args[4]) include_fr1 = ifelse(args[4] == "yes", T, F) setwd(outputdir) @@ -28,8 +27,6 @@ stop("No data") } - - cleanup_columns = c("FR1.IMGT.c.a", "FR2.IMGT.g.t", "CDR1.IMGT.Nb.of.nucleotides", @@ -111,7 +108,7 @@ dat[,col] = gsub("\\(.*\\)", "", dat[,col]) #dat[dat[,col] == "",] = "0" dat[,col] = as.numeric(dat[,col]) - dat[is.na(dat[,col]),] = 0 + dat[is.na(dat[,col]),col] = 0 } regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3") @@ -171,7 +168,6 @@ setwd(outputdir) - calculate_result = function(i, gene, dat, matrx, f, fname, name){ tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] @@ -313,7 +309,6 @@ install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") } - genesForPlot = gsub("[0-9]", "", dat$best_match) genesForPlot = data.frame(table(genesForPlot)) colnames(genesForPlot) = c("Gene","Freq") @@ -330,7 +325,6 @@ pc dev.off() - #blegh genesForPlot = dat[grepl("ca", dat$best_match),]$best_match if(length(genesForPlot) > 0){