# HG changeset patch # User davidvanzessen # Date 1434611411 14400 # Node ID d436daae9d68517de3a9ab97e10e85c804b1ef1b # Parent ac9a4307861a53b688fc2149fe06384aa2146ae8 Uploaded diff -r ac9a4307861a -r d436daae9d68 merge_and_filter.r --- a/merge_and_filter.r Thu Apr 16 09:36:19 2015 -0400 +++ b/merge_and_filter.r Thu Jun 18 03:10:11 2015 -0400 @@ -9,6 +9,8 @@ output = args[6] unmatchedfile = args[7] method=args[8] +functionality=args[9] +unique_type=args[10] summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F) @@ -28,7 +30,13 @@ } summ = merge(summ, gene_identification, by="Sequence.ID") -summ = summ[summ$Functionality != "No results",] + +if(functionality == "no_results"){ + summ = summ[summ$Functionality != "No results",] +} else if (functionality == "no_result_unproductive"){ + summ = summ[summ$Functionality != "No results" | summ$Functionality != "unproductive",] +} + higher_than=(summ$chunk_hit_percentage >= 70 & summ$nt_hit_percentage >= 70) unmatched = summ[!higher_than,] unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] @@ -61,7 +69,20 @@ result$JGene = gsub("[*].*", "", result$JGene) #result$past = paste(result$AA.JUNCTION, result$VGene, result$JGene, (result$FR1.IMGT.Nb.of.mutations + result$CDR1.IMGT.Nb.of.mutations + result$FR2.IMGT.Nb.of.mutations + result$CDR2.IMGT.Nb.of.mutations + result$FR3.IMGT.Nb.of.mutations), result$best_match) -result$past = paste(result$AA.JUNCTION, result$VGene, result$best_match) +if(unique_type == "AA.JUNCTION_V_subclass"){ + result$past = paste(result$AA.JUNCTION, result$VGene, result$best_match) +} else if (unique_type == "AA.JUNCTION_subclass"){ + result$past = paste(result$AA.JUNCTION, result$best_match) +} else if (unique_type == "V_subclass"){ + result$past = paste(result$VGene, result$best_match) +} else if (unique_type == "AA.JUNCTION_V"){ + result$past = paste(result$AA.JUNCTION, result$VGene) +} else if (unique_type == "AA.JUNCTION"){ + result$past = paste(result$AA.JUNCTION) +} else { + result$past = 1:nrow(result) +} + result = result[!duplicated(result$past), ] diff -r ac9a4307861a -r d436daae9d68 mutation_analysis.xml --- a/mutation_analysis.xml Thu Apr 16 09:36:19 2015 -0400 +++ b/mutation_analysis.xml Thu Jun 18 03:10:11 2015 -0400 @@ -1,7 +1,7 @@ - wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} + wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique @@ -13,6 +13,21 @@ + + + + + + + + + + + + + + + diff -r ac9a4307861a -r d436daae9d68 wrapper.sh --- a/wrapper.sh Thu Apr 16 09:36:19 2015 -0400 +++ b/wrapper.sh Thu Jun 18 03:10:11 2015 -0400 @@ -7,6 +7,8 @@ outdir=$4 title=$5 include_fr1=$6 +functionality=$7 +unique=$8 mkdir $outdir unzip $input -d $PWD/files/ > $PWD/unziplog.log @@ -40,7 +42,7 @@ echo "merging" -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" echo "R mutation analysis" @@ -74,7 +76,7 @@ fi done < $outdir/result.txt echo "" >> $output -echo "unmatched
motif per sequence
" >> $output +echo "unmatched
motif per sequence
all data
" >> $output echo "
" >> $output