# HG changeset patch # User davidvanzessen # Date 1459936453 14400 # Node ID 88e0e7665086db2a0e8b9c31b629f492169d486d # Parent ae8b721a2964104173c51e9f9afdfa778d620381 Uploaded diff -r ae8b721a2964 -r 88e0e7665086 merge_and_filter.r --- a/merge_and_filter.r Mon Apr 04 04:25:46 2016 -0400 +++ b/merge_and_filter.r Wed Apr 06 05:54:13 2016 -0400 @@ -13,6 +13,7 @@ functionality=args[10] unique_type=args[11] filter_unique=args[12] +class_filter=args[13] summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") @@ -50,7 +51,11 @@ print(paste("Number of sequences after productive filter:", nrow(summ))) -higher_than=(summ$chunk_hit_percentage >= 70 & summ$nt_hit_percentage >= 70) +splt = strsplit(class_filter, "_")[[1]] +chunk_hit_threshold = as.numeric(splt[1]) +nt_hit_threshold = as.numeric(splt[2]) + +higher_than=(summ$chunk_hit_percentage >= chunk_hit_threshold & summ$nt_hit_percentage >= nt_hit_threshold) unmatched=summ[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] diff -r ae8b721a2964 -r 88e0e7665086 mutation_analysis.xml --- a/mutation_analysis.xml Mon Apr 04 04:25:46 2016 -0400 +++ b/mutation_analysis.xml Wed Apr 06 05:54:13 2016 -0400 @@ -1,7 +1,7 @@ - wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output $filter_uniques + wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output $filter_uniques $class_filter @@ -19,7 +19,7 @@ - + @@ -33,6 +33,11 @@ + + + + + @@ -47,7 +52,28 @@ - Takes an IMGT zip (http://www.imgt.org/HighV-QUEST/search.action) file and creates a summarization of the mutation analysis. + Takes an IMGT zip (http://www.imgt.org/HighV-QUEST/search.action) file and creates a summarization of the mutation analysis. + + +--------------------------+ + | unique filter | + +--------+--------+--------+ + | values | remove | keep | + +--------+--------+--------+ + | A | A | A | + +--------+--------+--------+ + | A | B | B | + +--------+--------+--------+ + | B | D | C | + +--------+--------+--------+ + | B | | D | + +--------+--------+--------+ + | C | | | + +--------+--------+--------+ + | D | | | + +--------+--------+--------+ + | D | | | + +--------+--------+--------+ + blastn diff -r ae8b721a2964 -r 88e0e7665086 wrapper.sh --- a/wrapper.sh Mon Apr 04 04:25:46 2016 -0400 +++ b/wrapper.sh Wed Apr 06 05:54:13 2016 -0400 @@ -11,6 +11,7 @@ unique=$8 naive_output=$9 filter_unique=${10} +class_filter=${11} mkdir $outdir echo "---------------- read parameters ----------------" @@ -78,7 +79,7 @@ echo "---------------- merge_and_filter.r ----------------" echo "---------------- merge_and_filter.r ----------------
" >> $output -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} echo "---------------- mutation_analysis.r ----------------" echo "---------------- mutation_analysis.r ----------------
" >> $output