Mercurial > repos > jjohnson > defuse
diff defuse.xml @ 45:aedaa66483f1 draft
Uploaded
author | jjohnson |
---|---|
date | Wed, 18 Oct 2017 16:55:57 -0400 |
parents | 225750bf3770 |
children | e500b50b72fd |
line wrap: on
line diff
--- a/defuse.xml Mon Jan 04 15:55:00 2016 -0500 +++ b/defuse.xml Wed Oct 18 16:55:57 2017 -0400 @@ -5,10 +5,33 @@ </macros> <requirements> <expand macro="defuse_requirement" /> - <expand macro="mapping_requirements" /> - <expand macro="r_requirements" /> </requirements> - <command interpreter="command"> /bin/bash $shscript </command> + <command><![CDATA[ + #if $defuse_out.__str__ != 'None': + ## ln to output_dir in from_work_dir + mkdir -p $defuse_out.dataset.extra_files_path && + ln -s $defuse_out.dataset.extra_files_path output_dir && + #else + mkdir -p output_dir && + #end if + ## Put executable paths in config file + $__tool_directory__/config_sub.sh $defuse_config output_dir/defuse.cfg && + ## copy config to output + cp defuse.cfg $config_txt && + ## make a data_dir and ln -s the input fastq + mkdir -p data_dir && + ln -s "$left_pairendreads" data_dir/reads_1.fastq && + ln -s "$right_pairendreads" data_dir/reads_2.fastq && + ## run + perl defuse_run.pl --name "$library_name" --config defuse.cfg -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS && + grep -v cluster_id output_dir/results.filtered.tsv | awk '{print $1}' > cluster_id_list && + get_fusion_fastq.pl --list cluster_id_list --output output_dir --fastq1 results.fusions_1.fq --fastq2 results.fusions_2.fq && + cp output_dir/results.* . && + cp `find output_dir -name defuse.log` $defuse_log + #if $defuse_out.__str__ != 'None': + && $__tool_directory__/make_html.sh $defuse_out $defuse_out.dataset.extra_files_path + #end if + ]]></command> <inputs> <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> @@ -61,10 +84,18 @@ <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> </param> + <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries"> + <option value="no" select="true">no</option> + <option value="yes">yes</option> + </param> <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> <help>Position density when calculating covariance</help> <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> </param> + <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments"> + <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/> + </param> <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> <option value="">Use Default</option> <option value="no">no</option> @@ -76,29 +107,22 @@ <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> </when> <!-- full --> </conditional> <!-- defuse_param --> - <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/> <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, but they require considerable diskspace, and should be deleted and purged when no longer needed."/> + <param name="breakpoints_bam" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/> <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> </inputs> - <stdio> - <exit_code range="1:" level="fatal" description="Error Running Defuse" /> - </stdio> <outputs> <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> <filter>keep_output == True</filter> </data> - <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> - <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> - <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> - <filter>do_get_reads == True</filter> - </data> - <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam"> - <filter>breakpoints_bam == True</filter> - </data> + <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="results.classify.tsv"/> + <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="results.filtered.tsv"/> + <data format="fastqsanger" name="results_fusions1_fq" label="${tool.name} on ${on_string}: fusions_1.fq" from_work_dir="results.fusions_1.fq" /> + <data format="fastqsanger" name="results_fusions2_fq" label="${tool.name} on ${on_string}: fusions_2.fq" from_work_dir="results.fusions_2.fq" /> <!-- expression_plot circos plot @@ -107,7 +131,6 @@ <configfiles> <configfile name="defuse_config"> #import re -#set $ds = chr(36) #if $refGenomeSource.genomeSource == "history": #set config_file = $refGenomeSource.config.__str__ #else @@ -323,6 +346,12 @@ #except --phred33-quals #end try +bowtie_params = #slurp +#try +$ref_dict['bowtie_params'] +#except +--chunkmbs 200 +#end try max_insert_size = #slurp #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": $defuse_param.max_insert_size @@ -481,6 +510,19 @@ #end if positive_controls = \$(data_directory)/controls.txt +# Use multiple exon transcripts for stats calculations (yes/no) +# should be enabled for very small libraries +multi_exon_transcripts_stats = #slurp +#if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != "" +$defuse_param.multi_exon_transcripts_stats +#else +#try +$ref_dict['multi_exon_transcripts_stats'] +#except +no +#end try +#end if + # Position density when calculating covariance covariance_sampling_density = #slurp #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" @@ -492,6 +534,20 @@ 0.01 #end try #end if + +# Maximum number of alignments for a read pair +# Pairs with more alignments are filtered +max_paired_alignments = #slurp +#if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != "" +$defuse_param.max_paired_alignments +#else +#try +$ref_dict['max_paired_alignments'] +#except +10 +#end try +#end if + # Number of reads for each job in split reads_per_job = #slurp #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" @@ -512,117 +568,10 @@ remove_job_files = yes remove_job_temp_files = yes +qsub_params = "" + #end raw - - </configfile> - <configfile name="shscript"> -#!/bin/bash -## define some things for cheetah proccessing -#set $ds = chr(36) -#set $amp = chr(38) -#set $gt = chr(62) -#set $lt = chr(60) -#set $echo_cmd = 'echo' -## Find the defuse.pl in the galaxy tool path -#import Cheetah.FileUtils -## declare a bash function for converting a results tsv into html with links to the get_reads output files -results2html() { - rlts=${ds}1 - rslt_name=`basename ${ds}rlts` - html=${ds}2 - echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse '${ds}rslt_name'${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} ${ds}html - echo '${lt}h2${gt}Defuse '${ds}rslt_name'${lt}/h2${gt}${lt}table${gt}' ${gt}${gt} ${ds}html - if [ -z "${ds}3" ] - then - awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ - ${ds}1 ~ /[1-9][0-9]*/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html - echo '${lt}/table${gt}' ${gt}${gt} ${ds}html - echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html - else - export _EFP=${ds}3 - mkdir -p ${ds}_EFP - awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ - ${ds}1 ~ /[1-9][0-9]*/{fn="cluster_"${ds}1"_reads.txt"; \ - printf("${lt}tr${gt}${lt}td${gt}${lt}a href=\"%s\"${gt}%s${lt}/a${gt}${lt}/td${gt}",fn, ${ds}1);for (i = 2; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html - echo '${lt}/table${gt}' ${gt}${gt} ${ds}html - echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html - for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`; - do fn=cluster_${ds}{i}_reads.txt; - pn=${ds}_EFP/${ds}fn; - perl \${DEFUSE_PATH}/scripts/get_reads.pl -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn; - done - fi -} -## substitute pathnames into config file -if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi -if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi -if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi -if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi -if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi -if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi -if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi -if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi -if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi -if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi - - -## copy config to output -cp $defuse_config $config_txt -## make a data_dir and ln -s the input fastq -mkdir -p data_dir -## ln -s "$left_pairendreads" data_dir/reads_1.fastq -## ln -s "$right_pairendreads" data_dir/reads_2.fastq -cp "$left_pairendreads" data_dir/reads_1.fastq -cp "$right_pairendreads" data_dir/reads_2.fastq -## ln to output_dir in from_work_dir -#if $defuse_out.__str__ != 'None': -mkdir -p $defuse_out.dataset.extra_files_path -ln -s $defuse_out.dataset.extra_files_path output_dir -#else -mkdir -p output_dir -#end if -## run defuse.pl -perl \${DEFUSE_PATH}/scripts/defuse.pl -name "$library_name" -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS -## copy primary results to output datasets -if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi -## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi -if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi -if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi -#if $breakpoints_bam: -if [ -e output_dir/results.filtered.tsv ] ${amp}${amp} [ -e output_dir/breakpoints.genome.psl ] -then - awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp} - psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp} - samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp} - samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp} - ## samtools index breakpoints.bam - cp breakpoints.bam $fusions_bam -fi -#end if -## create html with links for output_dir -#if $defuse_out.__str__ != 'None': -if [ -e $defuse_out ] -then - echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out - echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt} $defuse_out - pushd $defuse_out.dataset.extra_files_path - for f in `find -L . -maxdepth 1 -type f`; - do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt} $defuse_out; - done - popd - echo '${lt}/ul${gt}' ${gt}${gt} $defuse_out - echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} $defuse_out -fi -#end if -## run get_reads.pl on each cluster -#if $fusion_reads.__str__ != 'None': -if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] -then - mkdir -p $fusion_reads.dataset.extra_files_path - results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.dataset.extra_files_path -fi -#end if </configfile> </configfiles>