changeset 125:e87dcca14bd6 draft

Uploaded
author davidvanzessen
date Mon, 29 Aug 2016 03:28:49 -0400
parents 4a93146f87aa
children ffd5462da9d1
files change_o/define_clones.sh change_o/makedb.sh merge.r wrapper.sh
diffstat 4 files changed, 77 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/change_o/define_clones.sh	Mon Aug 22 09:11:17 2016 -0400
+++ b/change_o/define_clones.sh	Mon Aug 29 03:28:49 2016 -0400
@@ -21,7 +21,8 @@
 	output=${10}
 	output2=${11}
 	
-	/data/users/david/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
+	python3 $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
+	#/data/users/david/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
 	#/home/galaxy/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
 	
 	Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
@@ -30,10 +31,13 @@
 	output=$4
 	output2=$5
 	
-	/data/users/david/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
+	python3 $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
+	#/data/users/david/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
 	#/home/galaxy/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
 	
 	Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
 fi
 
 cp $PWD/outdir/output_clone-pass.tab $output
+
+rm -rf $PWD/outdir/
--- a/change_o/makedb.sh	Mon Aug 22 09:11:17 2016 -0400
+++ b/change_o/makedb.sh	Mon Aug 29 03:28:49 2016 -0400
@@ -29,7 +29,10 @@
 
 echo "makedb: $PWD/outdir"
 
-/data/users/david/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
+python3 $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
+#/data/users/david/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
 #/home/galaxy/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
 
 mv $PWD/outdir/output_db-pass.tab $output
+
+rm -rf $PWD/outdir/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge.r	Mon Aug 29 03:28:49 2016 -0400
@@ -0,0 +1,27 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input.1 = args[1]
+input.2 = args[2]
+
+fields.1 = args[3]
+fields.2 = args[4]
+
+field.1 = args[5]
+field.2 = args[6]
+
+output = args[7]
+
+dat1 = read.table(input.1, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL)
+if(fields.1 != "all"){
+	fields.1 = unlist(strsplit(fields.1, ","))
+	dat1 = dat1[,fields.1]
+}
+dat2 = read.table(input.2, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL)
+if(fields.2 != "all"){
+	fields.2 = unlist(strsplit(fields.2, ","))
+	dat2 = dat2[,fields.2]
+}
+
+dat3 = merge(dat1, dat2, by.x=field.1, by.y=field.2)
+
+write.table(dat3, output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/wrapper.sh	Mon Aug 22 09:11:17 2016 -0400
+++ b/wrapper.sh	Mon Aug 29 03:28:49 2016 -0400
@@ -374,12 +374,15 @@
 echo "<tr><td>The data used to generate the sexond SHM Overview plot</td><td><a href='plot2.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
+
 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
+
 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
+
 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
@@ -388,6 +391,7 @@
 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
+
 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output
@@ -398,8 +402,16 @@
 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB file with defined clones</td><td><a href='change_o/change-o-db-defined_clones.txt'>Download</a></td></tr>" >> $output
+
+echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt'>Download</a></td></tr>" >> $output
 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt'>Download</a></td></tr>" >> $output
+
 echo "</table>" >> $output
 
 echo "</div>" >> $output #downloads tab end
@@ -462,7 +474,6 @@
 echo "</table>" >> $outdir/base_overview.html
 
 echo "---------------- change-o MakeDB ----------------"
-echo "---------------- change-o MakeDB ----------------<br />" >> $log
 
 mkdir $outdir/change_o
 
@@ -471,11 +482,35 @@
 cd $outdir/change_o
 
 bash $dir/change_o/makedb.sh $input false false false $outdir/change_o/change-o-db.txt
+bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
 
-echo "---------------- change-o DefineClones ----------------"
-echo "---------------- change-o DefineClones ----------------<br />" >> $log
+Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
+
+echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
+
+if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT_ca.txz false false false $outdir/change_o/change-o-db-ca.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-ca.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-ca.txt $outdir/change_o/change-o-defined_clones-summary-ca.txt
+else
+	echo "No ca sequences" > "$outdir/change_o/change-o-db-defined_clones-ca.txt"	
+	echo "No ca sequences" > "$outdir/change_o/change-o-defined_clones-summary-ca.txt"	
+fi
 
-bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
+if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT_cg.txz false false false $outdir/change_o/change-o-db-cg.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cg.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cg.txt $outdir/change_o/change-o-defined_clones-summary-cg.txt
+else
+	echo "No cg sequences" > "$outdir/change_o/change-o-db-defined_clones-cg.txt"	
+	echo "No cg sequences" > "$outdir/change_o/change-o-defined_clones-summary-cg.txt"	
+fi
+
+if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT_cm.txz false false false $outdir/change_o/change-o-db-cm.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cm.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cm.txt $outdir/change_o/change-o-defined_clones-summary-cm.txt
+else
+	echo "No cm sequences" > "$outdir/change_o/change-o-db-defined_clones-cm.txt"	
+	echo "No cm sequences" > "$outdir/change_o/change-o-defined_clones-summary-cm.txt"	
+fi
 
 PWD="$tmp"