Mercurial > repos > davidvanzessen > mutation_analysis
changeset 127:afb0937ec0dc draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 13 Sep 2016 08:55:05 -0400 |
parents | ffd5462da9d1 |
children | |
files | merge_and_filter.r new_imgt.r wrapper.sh |
diffstat | 3 files changed, 48 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Mon Aug 29 04:20:30 2016 -0400 +++ b/merge_and_filter.r Tue Sep 13 08:55:05 2016 -0400 @@ -195,7 +195,11 @@ print(paste("Number of rows in result:", nrow(result))) print(paste("Number of rows in unmatched:", nrow(unmatched))) -matched.sequences.count = sum(!grepl("^unmatched", result$best_match)) +matched.sequences = result[!grepl("^unmatched", result$best_match),] + +write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\t",quote=F,row.names=F,col.names=T) + +matched.sequences.count = nrow(matched.sequences) unmatched.sequences.count = sum(grepl("^unmatched", result$best_match)) filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count))
--- a/new_imgt.r Mon Aug 29 04:20:30 2016 -0400 +++ b/new_imgt.r Tue Sep 13 08:55:05 2016 -0400 @@ -7,7 +7,9 @@ merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F) if(gene != "-"){ - merged = merged[grepl(gene, merged$best_match),] + merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),] +} else { + merged = merged[!grepl("unmatched", merged$best_match),] } merged = merged[!grepl("unmatched", merged$best_match),]
--- a/wrapper.sh Mon Aug 29 04:20:30 2016 -0400 +++ b/wrapper.sh Tue Sep 13 08:55:05 2016 -0400 @@ -372,7 +372,7 @@ cd $outdir/change_o -bash $dir/change_o/makedb.sh $input false false false $outdir/change_o/change-o-db.txt +bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 @@ -453,49 +453,50 @@ echo "<table class='pure-table pure-table-striped'>" >> $output echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output -echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the sexond SHM Overview plot</td><td><a href='plot2.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the second SHM Overview plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output +echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output -echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output +echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output -echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output -echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf' download='baseline_ca.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt' download='baseline_ca.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf' download='baseline_cg.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt' download='baseline_cg.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf' download='baseline_cm.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt' download='baseline_cm.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz' download='new_IMGT_ca.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz' download='new_IMGT_ca1.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz' download='new_IMGT_ca2.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz' download='new_IMGT_cg.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz' download='new_IMGT_cg1.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz' download='new_IMGT_cg2.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz' download='new_IMGT_cg3.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz' download='new_IMGT_cg4.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz' download='new_IMGT_cm.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt'>Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt' download='change_o/change-o-db-defined_clones-ca.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt' download='change_o/change-o-defined_clones-summary-ca.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt' download='change_o/change-o-db-defined_clones-cg.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt' download='change_o/change-o-defined_clones-summary-cg.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt' download='change_o/change-o-db-defined_clones-cm.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt' download='change_o/change-o-defined_clones-summary-cm.txt' >Download</a></td></tr>" >> $output echo "</table>" >> $output