Mercurial > repos > davidvanzessen > mutation_analysis
changeset 41:1b45c7d7d941 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 02 Nov 2015 09:20:13 -0500 |
parents | e022c21f8c47 |
children | 9afd8430de2c |
files | merge_and_filter.r wrapper.sh |
diffstat | 2 files changed, 7 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Mon Nov 02 07:10:36 2015 -0500 +++ b/merge_and_filter.r Mon Nov 02 09:20:13 2015 -0500 @@ -2,7 +2,7 @@ summaryfile = args[1] -junctionfile = args[2] +sequencesfile = args[2] mutationanalysisfile = args[3] mutationstatsfile = args[4] hotspotsfile = args[5] @@ -14,7 +14,7 @@ unique_type=args[11] summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) -junctions = read.table(junctionfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F) mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F) mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F) hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F) @@ -98,10 +98,10 @@ print(paste("Number of rows in unmatched:", nrow(unmatched))) -#remove the sequences that have an 'n' (or 'N') in the junction. -junctions = junctions[grepl("n|N", junctions$JUNCTION),] +#remove the sequences that have an 'n' (or 'N') in the FR2, FR3, CDR1 and CDR2 regions. +sequences = sequences[grepl("n|N", sequences$FR2.IMGT) | grepl("n|N", sequences$FR3.IMGT) | grepl("n|N", sequences$CDR1.IMGT) | grepl("n|N", sequences$CDR2.IMGT),] -result = result[!(result$Sequence.ID %in% junctions$Sequence.ID),] +result = result[!(result$Sequence.ID %in% sequences$Sequence.ID),] write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T) write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
--- a/wrapper.sh Mon Nov 02 07:10:36 2015 -0500 +++ b/wrapper.sh Mon Nov 02 09:20:13 2015 -0500 @@ -24,6 +24,7 @@ fi cat $PWD/files/*/1_* > $PWD/summary.txt +cat $PWD/files/*/3_* > $PWD/sequences.txt cat $PWD/files/*/6_* > $PWD/junction.txt cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt cat $PWD/files/*/8_* > $PWD/mutationstats.txt @@ -54,7 +55,7 @@ echo "merging" -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" echo "R mutation analysis"