Mercurial > repos > davidvanzessen > from_imgt_clonal_pairs

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/from_imgt.r	Fri Jul 24 04:44:39 2015 -0400
@@ -0,0 +1,56 @@
+library(data.table)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+infile="D:/wd/prisca/Mouse data Groningen July 2015/JIVFXVQ01_MAAIKE_1_PB_IGH_MID8_10nt_trimmed/1_Summary.txt"
+patient="JIVFXVQ01"
+sample="sample1"
+cell.count=10000
+receptor="IgH"
+output="D:/wd/prisca/mousetest.txt"
+
+infile=args[1]
+patient=args[2]
+sample=args[3]
+cell.count=args[4]
+receptor=args[5]
+output=args[6]
+
+dat = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F)
+dat = dat[,c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")]
+
+dat = dat[dat$V.GENE.and.allele != "",]
+dat = dat[dat$J.GENE.and.allele != "",]
+dat = dat[dat$Sequence != "",]
+
+dat$V.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$V.GENE.and.allele), ", "), "[[", 1)))
+dat$J.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$J.GENE.and.allele), ", "), "[[", 1)))
+
+dat$V.GENE.and.allele = gsub("Homsap ", "", dat$V.GENE.and.allele)
+dat$V.GENE.and.allele = gsub("\\*.*", "", dat$V.GENE.and.allele)
+
+dat$J.GENE.and.allele = gsub("Homsap ", "", dat$J.GENE.and.allele)
+dat$J.GENE.and.allele = gsub("\\*.*", "", dat$J.GENE.and.allele)
+
+dat = data.frame(data.table(dat)[, list(Clone_Molecule_Count_From_Spikes=.N), by=c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")])
+
+dat = dat[order(-dat$Clone_Molecule_Count_From_Spikes),]
+dat$perc = 100 / nrow(dat) * dat$Clone_Molecule_Count_From_Spikes
+
+dat$Log10_Frequency = log10(dat$perc / 100)
+
+dat$Patient = patient
+dat$Sample = sample
+dat$Receptor = receptor
+dat$Cell_Count = cell.count
+dat$Total_Read_Count = dat$Clone_Molecule_Count_From_Spikes
+dat$Related_to_leukemia_clone = F
+
+dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V.GENE.and.allele", "J.GENE.and.allele", "Sequence" ,"AA.JUNCTION", "Related_to_leukemia_clone")]
+
+names(dat) = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "Clone_Sequence" ,"CDR3_Sense_Sequence", "Related_to_leukemia_clone")
+
+write.table(dat, output, quote=F, sep="\t", na="", dec=".", row.names=F, col.names=F)
+
+output
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/from_imgt.sh	Fri Jul 24 04:44:39 2015 -0400
@@ -0,0 +1,104 @@
+set -e
+
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+args=($@)
+inputs=(${args[@]:1})
+output="${args[0]}"
+echo "$PWD"
+
+function get_summary_file
+{
+	imgt_zip=$1
+	summary_file=$2
+
+	mkdir ${PWD}/tmp/
+	type="`file ${imgt_zip}`"
+	if [[ "$type" == *"Zip archive"* ]] ; then
+		unzip ${imgt_zip} -d $PWD/tmp/
+	elif [[ "$type" == *"XZ compressed data"* ]] ; then
+		mkdir "$PWD/tmp/files"
+		tar -xJf ${imgt_zip} -C $PWD/tmp/files
+	fi
+
+	cat $PWD/tmp/*/1_* > ${summary_file}
+	rm -rf $PWD/tmp
+}
+
+index=0
+
+echo -e "Patient\tReceptor\tSample\tCell_Count\tClone_Molecule_Count_From_Spikes\tLog10_Frequency\tTotal_Read_Count\tV_Segment_Major_Gene\tJ_Segment_Major_Gene\tClone_Sequence\tCDR3_Sense_Sequence\tRelated_to_leukemia_clone" > "$output"
+
+while true
+do
+	patient="${inputs[$index]}"
+	index=$((index + 1))
+	cell_count="${inputs[$index]}"
+	index=$((index + 1))
+	receptor="${inputs[$index]}"
+	index=$((index + 1))
+	sample_count="${inputs[$index]}"
+	index=$((index + 1))
+
+	sample_name="${inputs[$index]}"
+	index=$((index + 1))
+
+	sample_file="${inputs[$index]}"
+	index=$((index + 1))
+
+	echo "patient: $patient"
+	echo "cell_count: ${cell_count}"
+	echo "receptor: $receptor"
+	echo "sample_count: ${sample_count}"
+	echo "sample_name: ${sample_name}"
+	echo "sample_file: ${sample_file}"
+
+	get_summary_file ${sample_file} ${PWD}/summ.txt
+
+	Rscript --verbose $dir/from_imgt.r ${PWD}/summ.txt ${patient} ${sample_name} ${cell_count} ${receptor} ${PWD}/tmp.txt 2>&1
+	cat "${PWD}/tmp.txt" >> "$output"
+
+	if [[ "${sample_count}" -gt "1" ]]; then
+		sample_name="${inputs[$index]}"
+		index=$((index + 1))
+
+		sample_file="${inputs[$index]}"
+		index=$((index + 1))
+
+		echo "sample_name: ${sample_name}"
+		echo "sample_file: ${sample_file}"
+
+		get_summary_file ${sample_file} ${PWD}/summ.txt
+
+		Rscript --verbose $dir/from_imgt.r ${PWD}/summ.txt ${patient} ${sample_name} ${cell_count} ${receptor} ${PWD}/tmp.txt 2>&1
+		cat "${PWD}/tmp.txt" >> "$output"
+	fi
+
+	if [[ "${sample_count}" -eq "3" ]]; then
+		sample_name="${inputs[$index]}"
+		index=$((index + 1))
+
+		sample_file="${inputs[$index]}"
+		index=$((index + 1))
+
+		echo "sample_name: ${sample_name}"
+		echo "sample_file: ${sample_file}"
+
+		get_summary_file ${sample_file} ${PWD}/summ.txt
+
+		Rscript --verbose $dir/from_imgt.r ${PWD}/summ.txt ${patient} ${sample_name} ${cell_count} ${receptor} ${PWD}/tmp.txt 2>&1
+		cat "${PWD}/tmp.txt" >> "$output"
+	fi
+	if [[ "${index}" -eq "${#inputs[@]}" ]]; then
+		exit 0
+	fi
+done
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/from_imgt.xml	Fri Jul 24 04:44:39 2015 -0400
@@ -0,0 +1,29 @@
+<tool id="from_imgt_clonal_pairs" name="IMGT to Paired Clonal Sequences" version="1.0">
+	<description>Comparison of clonal sequences in paired samples</description>
+	<command interpreter="bash">
+from_imgt.sh $out_file
+	#for $i, $f in enumerate($patients)
+"$f.name" "$f.cellcount" "$f.receptor" "$len($f.samples)"
+		#for $j, $g in enumerate($f.samples)
+"${g.sname}" "${g.file}"
+		#end for
+	#end for
+	</command>
+	<inputs>
+	<repeat name="patients" title="Patient" min="1" default="1">
+			<param name="name" type="text" label="Patient name" />
+			<param name="cellcount" type="text" label="Cell Count" />
+			<param name="receptor" type="text" label="Receptor" />
+			<repeat name="samples" title="Sample" min="1" max="2" default="1">
+				<param name="sname" type="text" label="Sample name" />
+				<param name="file" type="data" label="Data to Process" />
+			</repeat>
+	</repeat>
+
+	</inputs>
+	<outputs>
+		<data format="tabular" name="out_file" />
+	</outputs>
+	<help>
+	</help>
+</tool>