Mercurial > repos > jjohnson > defuse8
diff defuse.xml @ 0:63f23d5db27c draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 2c2fd38cb761ec57bac7a0bd376e6aa2b88265d0-dirty
author | jjohnson |
---|---|
date | Mon, 20 May 2019 15:25:03 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/defuse.xml Mon May 20 15:25:03 2019 -0400 @@ -0,0 +1,701 @@ +<tool id="defuse" name="DeFuse" version="@DEFUSE_VERSION@.1"> + <description>identify fusion transcripts</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <expand macro="defuse_requirement" /> + </requirements> + <command detect_errors="default"><![CDATA[ + #if $defuse_out.__str__ != 'None': + ## ln to output_dir in from_work_dir + mkdir -p $defuse_out.files_path && + ln -s $defuse_out.files_path output_dir && + #else + mkdir -p output_dir && + #end if + ## Put executable paths in config file + $__tool_directory__/config_sub.sh $defuse_config output_dir/defuse.cfg && + ## copy config to output + cp output_dir/defuse.cfg $config_txt && + ## make a data_dir and ln -s the input fastq + mkdir -p data_dir && + ln -s "$left_pairendreads" data_dir/reads_1.fastq && + ln -s "$right_pairendreads" data_dir/reads_2.fastq && + ## run + DATASET_DIRECTORY=`grep '^dataset_directory' output_dir/defuse.cfg | awk '{print \$NF}'` && + defuse_run.pl --name "$library_name" --config output_dir/defuse.cfg --dataset \$DATASET_DIRECTORY -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS && + grep -v cluster_id output_dir/results.filtered.tsv | awk '{print $1}' > cluster_id_list && + get_fusion_fastq.pl --list cluster_id_list --output output_dir --fastq1 results.fusions_1.fq --fastq2 results.fusions_2.fq && + cp output_dir/results.* . && + cp `find -L output_dir -name defuse.log` $defuse_log + #if $defuse_out.__str__ != 'None': + && $__tool_directory__/make_html.sh $defuse_out $defuse_out.files_path + #end if + ]]></command> + <configfiles> + <configfile name="defuse_config"> +#import re +#if $refGenomeSource.genomeSource == "history": +#set config_file = $refGenomeSource.config.__str__ +#else +#set config_file = $refGenomeSource.index.value +#end if +#set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$' +#set fh = open($config_file) +#set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] +#set kv = [] +#for $line in $fh: + #set m = $re.match($pat,$line) + #if $m and len($m.groups()) == 2: + ## #echo $line + #if $m.groups()[0] in keys: + #set k = $m.groups()[0] + #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed": + ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory + #set v = $os.path.dirname($config_file) + #else: + #set v = $m.groups()[1] + #end if + #set kv = $kv + [[$k, $v]] + #end if + #end if +#end for +## #echo $kv +#set ref_dict = dict($kv) +## #echo $ref_dict +## include raw $refGenomeSource.config.__str__ +# +# Configuration file for defuse +# +# At a minimum, change all values enclused by [] +# + +# Directory where the defuse code was unpacked +## Default location in the tool/defuse directory +# source_directory = ${__root_dir__}/tools/defuse +source_directory = __DEFUSE_PATH__ + +# Directory where you want your dataset +dataset_directory = #slurp +#try +$ref_dict['dataset_directory'] +#except +/project/db/genomes/Hsapiens/hg19/defuse +#end try + +# Organism IDs +ensembl_organism = #slurp +#try +$ref_dict['ensembl_organism'] +#except +homo_sapiens +#end try + +ensembl_prefix = #slurp +#try +$ref_dict['ensembl_prefix'] +#except +Homo_sapiens +#end try + +ensembl_version = #slurp +#try +$ref_dict['ensembl_version'] +#except +71 +#end try + +ensembl_genome_version = #slurp +#try +$ref_dict['ensembl_genome_version'] +#except +GRCh37 +#end try + +ucsc_genome_version = #slurp +#try +$ref_dict['ucsc_genome_version'] +#except +hg19 +#end try + +ncbi_organism = #slurp +#try +$ref_dict['ncbi_organism'] +#except +Homo_sapiens +#end try + +ncbi_prefix = #slurp +#try +$ref_dict['ncbi_prefix'] +#except +Hs +#end try + +# Input genome and gene models +gene_models = #slurp +#try +$ref_dict['gene_models'] +#except +\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf +#end try +genome_fasta = #slurp +#try +$ref_dict['genome_fasta'] +#except +\$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa +#end try + +# Repeat table from ucsc genome browser +repeats_filename = #slurp +#try +$ref_dict['repeats_filename'] +#except +\$(dataset_directory)/rmsk.txt +#end try + +# EST info downloaded from ucsc genome browser +est_fasta = #slurp +#try +$ref_dict['est_fasta'] +#except +\$(dataset_directory)/est.fa +#end try +est_alignments = #slurp +#try +$ref_dict['est_alignments'] +#except +\$(dataset_directory)/intronEst.txt +#end try + +# Unigene clusters downloaded from ncbi +unigene_fasta = #slurp +#try +$ref_dict['unigene_fasta'] +#except +\$(dataset_directory)/\$(ncbi_prefix).seq.uniq +#end try + +# Paths to external tools +bowtie_bin = __BOWTIE_BIN__ +bowtie_build_bin = __BOWTIE_BUILD_BIN__ +blat_bin = __BLAT_BIN__ +fatotwobit_bin = __FATOTWOBIT_BIN__ +gmap_bin = __GMAP_BIN__ +gmap_bin = __GMAP_BIN__ +gmap_setup_bin = __GMAP_SETUP_BIN__ +r_bin = __R_BIN__ +rscript_bin = __RSCRIPT_BIN__ + +# Directory where you want your dataset +gmap_index_directory = #slurp +#try +$ref_dict['gmap_index_directory'] +#except +#raw +$(dataset_directory)/gmap +#end raw +#end try + +#raw +# Dataset files +dataset_prefix = $(dataset_directory)/defuse +chromosome_prefix = $(dataset_prefix).dna.chromosomes +exons_fasta = $(dataset_prefix).exons.fa +cds_fasta = $(dataset_prefix).cds.fa +cdna_regions = $(dataset_prefix).cdna.regions +cdna_fasta = $(dataset_prefix).cdna.fa +reference_fasta = $(dataset_prefix).reference.fa +rrna_fasta = $(dataset_prefix).rrna.fa +ig_gene_list = $(dataset_prefix).ig.gene.list +repeats_regions = $(dataset_directory)/repeats.regions +est_split_fasta1 = $(dataset_directory)/est.1.fa +est_split_fasta2 = $(dataset_directory)/est.2.fa +est_split_fasta3 = $(dataset_directory)/est.3.fa +est_split_fasta4 = $(dataset_directory)/est.4.fa +est_split_fasta5 = $(dataset_directory)/est.5.fa +est_split_fasta6 = $(dataset_directory)/est.6.fa +est_split_fasta7 = $(dataset_directory)/est.7.fa +est_split_fasta8 = $(dataset_directory)/est.8.fa +est_split_fasta9 = $(dataset_directory)/est.9.fa + +# Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs +prefilter1 = $(unigene_fasta) + +# deFuse scripts and tools +scripts_directory = $(source_directory)/scripts +tools_directory = $(source_directory)/tools +data_directory = $(source_directory)/data +#end raw + +# Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk +samtools_bin = #slurp +#try +$ref_dict['samtools_bin'] +#except +\$(source_directory)/external/samtools-0.1.8/samtools +#end try + +# Bowtie parameters +bowtie_threads = #slurp +#try +$ref_dict['bowtie_threads'] +#except +4 +#end try +bowtie_quals = #slurp +#try +$ref_dict['bowtie_quals'] +#except +--phred33-quals +#end try +bowtie_params = #slurp +#try +$ref_dict['bowtie_params'] +#except +--chunkmbs 200 +#end try +max_insert_size = #slurp +#if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": +$defuse_param.max_insert_size +#else +#try +$ref_dict['max_insert_size'] +#except +500 +#end try +#end if + +# Parameters for building the dataset +chromosomes = #slurp +#try +$ref_dict.chromosomes +#except +1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT +#end try +mt_chromosome = #slurp +#try +$ref_dict['mt_chromosome'] +#except +MT +#end try +gene_sources = #slurp +#try +$ref_dict['gene_sources'] +#except +IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding +#end try +ig_gene_sources = #slurp +#try +$ref_dict['ig_gene_sources'] +#except +IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene +#end try +rrna_gene_sources = #slurp +#try +$ref_dict['rrna_gene_sources'] +#except +Mt_rRNA,rRNA,rRNA_pseudogene +#end try + +# Blat sequences per job +num_blat_sequences = #slurp +#try +$ref_dict['num_blat_sequences'] +#except +10000 +#end try + +# Minimum gene fusion range +dna_concordant_length = #slurp +#if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "": +$defuse_param.dna_concordant_length +#else +#try +$ref_dict['dna_concordant_length'] +#except +2000 +#end try +#end if + +# Trim length for discordant reads (split reads are not trimmed) +discord_read_trim = #slurp +#if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "": +$defuse_param.discord_read_trim +#else +#try +$ref_dict['discord_read_trim'] +#except +50 +#end try +#end if +# Calculate extra annotations, fusion splice index and interrupted index +calculate_extra_annotations = #slurp +#if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "": +$defuse_param.calculate_extra_annotations +#else +#try +$ref_dict['calculate_extra_annotations'] +#except +no +#end try +#end if +# Filtering parameters +clustering_precision = #slurp +#if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != "" +$defuse_param.clustering_precision +#else +#try +$ref_dict['clustering_precision'] +#except +0.95 +#end try +#end if +span_count_threshold = #slurp +#if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != "" +$defuse_param.span_count_threshold +#else +#try +$ref_dict['span_count_threshold'] +#except +5 +#end try +#end if +percent_identity_threshold = #slurp +#if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != "" +$defuse_param.percent_identity_threshold +#else +#try +$ref_dict['percent_identity_threshold'] +#except +0.90 +#end try +#end if +split_min_anchor = #slurp +#if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != "" +$defuse_param.split_min_anchor +#else +#try +$ref_dict['split_min_anchor'] +#except +4 +#end try +#end if +splice_bias = #slurp +#if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != "" +$defuse_param.splice_bias +#else +#try +$ref_dict['splice_bias'] +#except +10 +#end try +#end if +denovo_assembly = #slurp +#if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != "" +$defuse_param.denovo_assembly +#else +#try +$ref_dict['denovo_assembly'] +#except +no +#end try +#end if +probability_threshold = #slurp +#if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != "" +$defuse_param.probability_threshold +#else +#try +$ref_dict['probability_threshold'] +#except +0.50 +#end try +#end if +positive_controls = \$(data_directory)/controls.txt + +# Use multiple exon transcripts for stats calculations (yes/no) +# should be enabled for very small libraries +multi_exon_transcripts_stats = #slurp +#if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != "" +$defuse_param.multi_exon_transcripts_stats +#else +#try +$ref_dict['multi_exon_transcripts_stats'] +#except +no +#end try +#end if + +# Position density when calculating covariance +covariance_sampling_density = #slurp +#if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" +$defuse_param.covariance_sampling_density +#else +#try +$ref_dict['covariance_sampling_density'] +#except +0.01 +#end try +#end if + +# Maximum number of alignments for a read pair +# Pairs with more alignments are filtered +max_paired_alignments = #slurp +#if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != "" +$defuse_param.max_paired_alignments +#else +#try +$ref_dict['max_paired_alignments'] +#except +10 +#end try +#end if + +# Number of reads for each job in split +reads_per_job = #slurp +#if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" +$defuse_param.reads_per_job +#else +#try +$ref_dict['reads_per_job'] +#except +1000000 +#end try +#end if + +#raw +# If you have command line 'mail' and wish to be notified +# mailto = andrew.mcpherson@gmail.com + +# Remove temp files +remove_job_files = yes +remove_job_temp_files = yes + +qsub_params = "" + +#end raw + + </configfile> + </configfiles> + <inputs> + <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> + <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> + <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column"> + <validator type="length" min="1"/> + </param> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> + <option value="indexed">Use a built-in DeFuse Reference Dataset</option> + <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> + <options from_file="defuse_reference.loc"> + <column name="name" index="1"/> + <column name="value" index="3"/> + <filter type="sort_by" column="0" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/> + </when> <!-- history --> + </conditional> <!-- refGenomeSource --> + <conditional name="defuse_param"> + <param name="settings" type="select" label="Defuse parameter settings" help=""> + <option value="preSet">Default settings</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> + <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> + <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> + <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help=""> + <option value="">Use Default</option> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> + <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> + </param> + <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> + <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> + <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> + </param> + <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> + <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> + <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> + </param> + <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> + <help>Position density when calculating covariance</help> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> + </param> + <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments"> + <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help> + <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/> + </param> + <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> + <option value="">Use Default</option> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <!-- + <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> + --> + <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> + </when> <!-- full --> + </conditional> <!-- defuse_param --> + <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" + help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, + but they require considerable diskspace, and should be deleted and purged when no longer needed."/> + </inputs> + <outputs> + <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> + <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> + <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> + <filter>keep_output == True</filter> + </data> + <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="results.classify.tsv"/> + <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="results.filtered.tsv"/> + <data format="fastqsanger" name="results_fusions1_fq" label="${tool.name} on ${on_string}: fusions_1.fq" from_work_dir="results.fusions_1.fq" /> + <data format="fastqsanger" name="results_fusions2_fq" label="${tool.name} on ${on_string}: fusions_2.fq" from_work_dir="results.fusions_2.fq" /> + <!-- + expression_plot + circos plot + --> + </outputs> + + <tests> + </tests> + <help> +**DeFuse** + +DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. + +Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 + +.. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page + +------ + +**Inputs** + +DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). + +If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. + +DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: + - genome_fasta from Ensembl + - gene_models from Ensembl + - repeats_filename from UCSC RepeatMasker rmsk.txt + - est_fasta from UCSC + - est_alignments from UCSC intronEst.txt + - unigene_fasta from NCBI + +.. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 + +------ + +**Outputs** + +The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates. + +DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt. + +The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order): + + - **Identification** + - cluster_id : random identifier assigned to each prediction + - library_name : library name given on the command line of defuse + - gene1 : ensembl id of gene 1 + - gene2 : ensembl id of gene 2 + - gene_name1 : name of gene 1 + - gene_name2 : name of gene 2 + - **Evidence** + - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable + - concordant_ratio : proportion of spanning reads considered concordant by blat + - denovo_min_count : minimum kmer count across denovo assembled sequence + - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly + - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive + - gene_align_strand1 : alignment strand for spanning read alignments to gene 1 + - gene_align_strand2 : alignment strand for spanning read alignments to gene 2 + - min_map_count : minimum of the number of genomic mappings for each spanning read + - max_map_count : maximum of the number of genomic mappings for each spanning read + - mean_map_count : average of the number of genomic mappings for each spanning read + - num_multi_map : number of spanning reads that map to more than one genomic location + - span_count : number of spanning reads supporting the fusion + - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage + - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage + - span_coverage_min : minimum of span_coverage1 and span_coverage2 + - span_coverage_max : maximum of span_coverage1 and span_coverage2 + - splitr_count : number of split reads supporting the prediction + - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive + - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive + - splitr_sequence : fusion sequence predicted by split reads + - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive + - **Annotation** + - adjacent : fusion between adjacent genes + - altsplice : fusion likely the product of alternative splicing between adjacent genes + - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1 + - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2 + - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2 + - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2 + - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands + - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna + - deletion : fusion produced by a genomic deletion + - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est + - eversion : fusion produced by a genomic eversion + - exonboundaries : fusion splice at exon boundaries + - expression1 : expression of gene 1 as number of concordant pairs aligned to exons + - expression2 : expression of gene 2 as number of concordant pairs aligned to exons + - gene_chromosome1 : chromosome of gene 1 + - gene_chromosome2 : chromosome of gene 2 + - gene_end1 : end position for gene 1 + - gene_end2 : end position for gene 2 + - gene_location1 : location of breakpoint in gene 1 + - gene_location2 : location of breakpoint in gene 2 + - gene_start1 : start of gene 1 + - gene_start2 : start of gene 2 + - gene_strand1 : strand of gene 1 + - gene_strand2 : strand of gene 2 + - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome + - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint + - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint + - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream + - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream + - interchromosomal : fusion produced by an interchromosomal translocation + - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1 + - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2 + - inversion : fusion produced by genomic inversion + - orf : fusion combines genes in a way that preserves a reading frame + - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt) + - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement + - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region + - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region + - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2 + - splice_score : number of nucleotides similar to GTAG at fusion splice + - num_splice_variants : number of potential splice variants for this gene pair + - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2 + - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1 + + +**Example** + +results.tsv:: + + cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2 + 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 - + 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - + + </help> + <expand macro="citations"/> +</tool>