Mercurial > repos > davidvanzessen > from_imgt_clonal_pairs
changeset 0:5560672b1ca4 draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 24 Jul 2015 04:44:39 -0400 |
parents | |
children | |
files | from_imgt.r from_imgt.sh from_imgt.xml |
diffstat | 3 files changed, 189 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/from_imgt.r Fri Jul 24 04:44:39 2015 -0400 @@ -0,0 +1,56 @@ +library(data.table) + +args <- commandArgs(trailingOnly = TRUE) + +infile="D:/wd/prisca/Mouse data Groningen July 2015/JIVFXVQ01_MAAIKE_1_PB_IGH_MID8_10nt_trimmed/1_Summary.txt" +patient="JIVFXVQ01" +sample="sample1" +cell.count=10000 +receptor="IgH" +output="D:/wd/prisca/mousetest.txt" + +infile=args[1] +patient=args[2] +sample=args[3] +cell.count=args[4] +receptor=args[5] +output=args[6] + +dat = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F) +dat = dat[,c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")] + +dat = dat[dat$V.GENE.and.allele != "",] +dat = dat[dat$J.GENE.and.allele != "",] +dat = dat[dat$Sequence != "",] + +dat$V.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$V.GENE.and.allele), ", "), "[[", 1))) +dat$J.GENE.and.allele = as.factor(as.character(lapply(strsplit(as.character(dat$J.GENE.and.allele), ", "), "[[", 1))) + +dat$V.GENE.and.allele = gsub("Homsap ", "", dat$V.GENE.and.allele) +dat$V.GENE.and.allele = gsub("\\*.*", "", dat$V.GENE.and.allele) + +dat$J.GENE.and.allele = gsub("Homsap ", "", dat$J.GENE.and.allele) +dat$J.GENE.and.allele = gsub("\\*.*", "", dat$J.GENE.and.allele) + +dat = data.frame(data.table(dat)[, list(Clone_Molecule_Count_From_Spikes=.N), by=c("V.GENE.and.allele", "J.GENE.and.allele", "AA.JUNCTION", "Sequence")]) + +dat = dat[order(-dat$Clone_Molecule_Count_From_Spikes),] +dat$perc = 100 / nrow(dat) * dat$Clone_Molecule_Count_From_Spikes + +dat$Log10_Frequency = log10(dat$perc / 100) + +dat$Patient = patient +dat$Sample = sample +dat$Receptor = receptor +dat$Cell_Count = cell.count +dat$Total_Read_Count = dat$Clone_Molecule_Count_From_Spikes +dat$Related_to_leukemia_clone = F + +dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V.GENE.and.allele", "J.GENE.and.allele", "Sequence" ,"AA.JUNCTION", "Related_to_leukemia_clone")] + +names(dat) = c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "Clone_Sequence" ,"CDR3_Sense_Sequence", "Related_to_leukemia_clone") + +write.table(dat, output, quote=F, sep="\t", na="", dec=".", row.names=F, col.names=F) + +output +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/from_imgt.sh Fri Jul 24 04:44:39 2015 -0400 @@ -0,0 +1,104 @@ +set -e + +dir="$(cd "$(dirname "$0")" && pwd)" + +args=($@) +inputs=(${args[@]:1}) +output="${args[0]}" +echo "$PWD" + +function get_summary_file +{ + imgt_zip=$1 + summary_file=$2 + + mkdir ${PWD}/tmp/ + type="`file ${imgt_zip}`" + if [[ "$type" == *"Zip archive"* ]] ; then + unzip ${imgt_zip} -d $PWD/tmp/ + elif [[ "$type" == *"XZ compressed data"* ]] ; then + mkdir "$PWD/tmp/files" + tar -xJf ${imgt_zip} -C $PWD/tmp/files + fi + + cat $PWD/tmp/*/1_* > ${summary_file} + rm -rf $PWD/tmp +} + +index=0 + +echo -e "Patient\tReceptor\tSample\tCell_Count\tClone_Molecule_Count_From_Spikes\tLog10_Frequency\tTotal_Read_Count\tV_Segment_Major_Gene\tJ_Segment_Major_Gene\tClone_Sequence\tCDR3_Sense_Sequence\tRelated_to_leukemia_clone" > "$output" + +while true +do + patient="${inputs[$index]}" + index=$((index + 1)) + cell_count="${inputs[$index]}" + index=$((index + 1)) + receptor="${inputs[$index]}" + index=$((index + 1)) + sample_count="${inputs[$index]}" + index=$((index + 1)) + + sample_name="${inputs[$index]}" + index=$((index + 1)) + + sample_file="${inputs[$index]}" + index=$((index + 1)) + + echo "patient: $patient" + echo "cell_count: ${cell_count}" + echo "receptor: $receptor" + echo "sample_count: ${sample_count}" + echo "sample_name: ${sample_name}" + echo "sample_file: ${sample_file}" + + get_summary_file ${sample_file} ${PWD}/summ.txt + + Rscript --verbose $dir/from_imgt.r ${PWD}/summ.txt ${patient} ${sample_name} ${cell_count} ${receptor} ${PWD}/tmp.txt 2>&1 + cat "${PWD}/tmp.txt" >> "$output" + + if [[ "${sample_count}" -gt "1" ]]; then + sample_name="${inputs[$index]}" + index=$((index + 1)) + + sample_file="${inputs[$index]}" + index=$((index + 1)) + + echo "sample_name: ${sample_name}" + echo "sample_file: ${sample_file}" + + get_summary_file ${sample_file} ${PWD}/summ.txt + + Rscript --verbose $dir/from_imgt.r ${PWD}/summ.txt ${patient} ${sample_name} ${cell_count} ${receptor} ${PWD}/tmp.txt 2>&1 + cat "${PWD}/tmp.txt" >> "$output" + fi + + if [[ "${sample_count}" -eq "3" ]]; then + sample_name="${inputs[$index]}" + index=$((index + 1)) + + sample_file="${inputs[$index]}" + index=$((index + 1)) + + echo "sample_name: ${sample_name}" + echo "sample_file: ${sample_file}" + + get_summary_file ${sample_file} ${PWD}/summ.txt + + Rscript --verbose $dir/from_imgt.r ${PWD}/summ.txt ${patient} ${sample_name} ${cell_count} ${receptor} ${PWD}/tmp.txt 2>&1 + cat "${PWD}/tmp.txt" >> "$output" + fi + if [[ "${index}" -eq "${#inputs[@]}" ]]; then + exit 0 + fi +done + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/from_imgt.xml Fri Jul 24 04:44:39 2015 -0400 @@ -0,0 +1,29 @@ +<tool id="from_imgt_clonal_pairs" name="IMGT to Paired Clonal Sequences" version="1.0"> + <description>Comparison of clonal sequences in paired samples</description> + <command interpreter="bash"> +from_imgt.sh $out_file + #for $i, $f in enumerate($patients) +"$f.name" "$f.cellcount" "$f.receptor" "$len($f.samples)" + #for $j, $g in enumerate($f.samples) +"${g.sname}" "${g.file}" + #end for + #end for + </command> + <inputs> + <repeat name="patients" title="Patient" min="1" default="1"> + <param name="name" type="text" label="Patient name" /> + <param name="cellcount" type="text" label="Cell Count" /> + <param name="receptor" type="text" label="Receptor" /> + <repeat name="samples" title="Sample" min="1" max="2" default="1"> + <param name="sname" type="text" label="Sample name" /> + <param name="file" type="data" label="Data to Process" /> + </repeat> + </repeat> + + </inputs> + <outputs> + <data format="tabular" name="out_file" /> + </outputs> + <help> + </help> +</tool>