changeset 97:6e8dfbe164c6 draft

Uploaded
author davidvanzessen
date Wed, 15 Jun 2016 04:48:41 -0400
parents 925efcd00c58
children 5ffbf40cdd4b
files merge_and_filter.r tmp/igat.r
diffstat 2 files changed, 6 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Wed Jun 08 03:58:40 2016 -0400
+++ b/merge_and_filter.r	Wed Jun 15 04:48:41 2016 -0400
@@ -116,6 +116,10 @@
 
 print(paste("Number of sequences in result after merging with sequences:", nrow(result)))
 
+result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+
+print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
+
 result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
 
 print(paste("Number of sequences in result after n filtering:", nrow(result)))
--- a/tmp/igat.r	Wed Jun 08 03:58:40 2016 -0400
+++ b/tmp/igat.r	Wed Jun 15 04:48:41 2016 -0400
@@ -5,6 +5,8 @@
 
 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F)
 
+merged = merged[!grepl("unmatched", merged$best_match),]
+
 for(f in list.files(imgt.dir, pattern="*.txt$")){
 	print(paste("filtering", f))
 	path = paste(imgt.dir, f, sep="")