Mercurial > repos > davidvanzessen > mutation_analysis
changeset 52:d3542f87a304 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 29 Jan 2016 08:11:31 -0500 |
parents | d4e72eeea640 |
children | 7290a88ea202 |
files | merge_and_filter.r mutation_analysis.xml wrapper.sh |
diffstat | 3 files changed, 27 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Fri Jan 29 05:42:17 2016 -0500 +++ b/merge_and_filter.r Fri Jan 29 08:11:31 2016 -0500 @@ -12,6 +12,7 @@ method=args[9] functionality=args[10] unique_type=args[11] +filter_unique=args[12] == "yes" summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") @@ -89,6 +90,25 @@ result$past = 1:nrow(result) } +print(paste("filter uniques: ", filter_unique)) + +if(filter_unique){ + + clmns = names(result) + + result$unique.def = paste(result$CDR1.Seq, result$CDR2.Seq, result$CDR3.Seq, result$FR1.IMGT, result$FR2.IMGT, result$FR3.IMGT) + result.filtered = result[duplicated(result$unique.def),] + fltr = result$unique.def %in% result.filtered$unique.def + + result.removed = result[!fltr,] + + result = result[fltr,] + + result = result[,clmns] + + #write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T) +} + result = result[!duplicated(result$past), ]
--- a/mutation_analysis.xml Fri Jan 29 05:42:17 2016 -0500 +++ b/mutation_analysis.xml Fri Jan 29 08:11:31 2016 -0500 @@ -1,7 +1,7 @@ <tool id="mutation_analysis_shm" name="Mutation Analysis" version="1.0"> <description></description> <command interpreter="bash"> - wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output + wrapper.sh $in_file $method $out_file $out_file.files_path ${in_file.name} ${include_fr1} $functionality $unique $naive_output $filter_uniques </command> <inputs> <param name="in_file" type="data" label="IMGT zip file to be analysed" /> @@ -33,6 +33,10 @@ <option value="no" selected="true">No</option> </param> </conditional> + <param name="filter_uniques" type="select" label="Filter unique sequences" help="Filter out the sequences (based on CDR1, FR2, CDR2, FR3 and CDR3) that only occur once."> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> </inputs> <outputs> <data format="html" name="out_file" label = "Mutation analysis on ${in_file.name}"/>
--- a/wrapper.sh Fri Jan 29 05:42:17 2016 -0500 +++ b/wrapper.sh Fri Jan 29 08:11:31 2016 -0500 @@ -10,6 +10,7 @@ functionality=$7 unique=$8 naive_output=$9 +filter_unique=${10} mkdir $outdir type="`file $input`" @@ -57,7 +58,7 @@ echo "merging" -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" echo "R mutation analysis"