Mercurial > repos > davidvanzessen > mutation_analysis
changeset 97:6e8dfbe164c6 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 15 Jun 2016 04:48:41 -0400 |
parents | 925efcd00c58 |
children | 5ffbf40cdd4b |
files | merge_and_filter.r tmp/igat.r |
diffstat | 2 files changed, 6 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Wed Jun 08 03:58:40 2016 -0400 +++ b/merge_and_filter.r Wed Jun 15 04:48:41 2016 -0400 @@ -116,6 +116,10 @@ print(paste("Number of sequences in result after merging with sequences:", nrow(result))) +result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] + +print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) + result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] print(paste("Number of sequences in result after n filtering:", nrow(result)))
--- a/tmp/igat.r Wed Jun 08 03:58:40 2016 -0400 +++ b/tmp/igat.r Wed Jun 15 04:48:41 2016 -0400 @@ -5,6 +5,8 @@ merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F) +merged = merged[!grepl("unmatched", merged$best_match),] + for(f in list.files(imgt.dir, pattern="*.txt$")){ print(paste("filtering", f)) path = paste(imgt.dir, f, sep="")