# HG changeset patch # User davidvanzessen # Date 1446474013 18000 # Node ID 1b45c7d7d94175f878c39eadd83fe77940ca572d # Parent e022c21f8c479df0392d4b71097cdc52ad459946 Uploaded diff -r e022c21f8c47 -r 1b45c7d7d941 merge_and_filter.r --- a/merge_and_filter.r Mon Nov 02 07:10:36 2015 -0500 +++ b/merge_and_filter.r Mon Nov 02 09:20:13 2015 -0500 @@ -2,7 +2,7 @@ summaryfile = args[1] -junctionfile = args[2] +sequencesfile = args[2] mutationanalysisfile = args[3] mutationstatsfile = args[4] hotspotsfile = args[5] @@ -14,7 +14,7 @@ unique_type=args[11] summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) -junctions = read.table(junctionfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F) mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F) mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F) hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F) @@ -98,10 +98,10 @@ print(paste("Number of rows in unmatched:", nrow(unmatched))) -#remove the sequences that have an 'n' (or 'N') in the junction. -junctions = junctions[grepl("n|N", junctions$JUNCTION),] +#remove the sequences that have an 'n' (or 'N') in the FR2, FR3, CDR1 and CDR2 regions. +sequences = sequences[grepl("n|N", sequences$FR2.IMGT) | grepl("n|N", sequences$FR3.IMGT) | grepl("n|N", sequences$CDR1.IMGT) | grepl("n|N", sequences$CDR2.IMGT),] -result = result[!(result$Sequence.ID %in% junctions$Sequence.ID),] +result = result[!(result$Sequence.ID %in% sequences$Sequence.ID),] write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T) write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T) diff -r e022c21f8c47 -r 1b45c7d7d941 wrapper.sh --- a/wrapper.sh Mon Nov 02 07:10:36 2015 -0500 +++ b/wrapper.sh Mon Nov 02 09:20:13 2015 -0500 @@ -24,6 +24,7 @@ fi cat $PWD/files/*/1_* > $PWD/summary.txt +cat $PWD/files/*/3_* > $PWD/sequences.txt cat $PWD/files/*/6_* > $PWD/junction.txt cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt cat $PWD/files/*/8_* > $PWD/mutationstats.txt @@ -54,7 +55,7 @@ echo "merging" -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" echo "R mutation analysis"