Mercurial > repos > jjohnson > defuse
annotate defuse.xml @ 28:f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
| author | Jim Johnson <jj@umn.edu> |
|---|---|
| date | Tue, 03 Sep 2013 06:41:19 -0500 |
| parents | d57fcac025e2 |
| children | 3e3ebdecb0e1 |
| rev | line source |
|---|---|
|
8
06675bd664ee
Update to DeFuse verion 0.6.1 and change tool dependencies for autoinstall
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
1 <tool id="defuse" name="DeFuse" version="1.6.1"> |
| 0 | 2 <description>identify fusion transcripts</description> |
| 3 <requirements> | |
|
8
06675bd664ee
Update to DeFuse verion 0.6.1 and change tool dependencies for autoinstall
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
4 <requirement type="package" version="0.6.1">defuse</requirement> |
| 6 | 5 <requirement type="package" version="0.1.18">samtools</requirement> |
|
8
06675bd664ee
Update to DeFuse verion 0.6.1 and change tool dependencies for autoinstall
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
6 <requirement type="package" version="1.0.0">bowtie</requirement> |
|
06675bd664ee
Update to DeFuse verion 0.6.1 and change tool dependencies for autoinstall
Jim Johnson <jj@umn.edu>
parents:
7
diff
changeset
|
7 <requirement type="package" version="2013-05-09">gmap</requirement> |
|
10
08e9244aeab8
Change dependency for kent (blat faToTwoBit) to latest since versions not available at UCSC
Jim Johnson <jj@umn.edu>
parents:
8
diff
changeset
|
8 <requirement type="package" version="latest">kent</requirement> |
| 0 | 9 </requirements> |
| 10 <command interpreter="command"> /bin/bash $shscript </command> | |
| 11 <inputs> | |
| 12 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> | |
| 13 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> | |
|
27
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
14 <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column"> |
|
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
15 <validator type="length" min="1"/> |
|
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
16 </param> |
| 0 | 17 <conditional name="refGenomeSource"> |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
18 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
19 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
20 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
21 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
22 <when value="indexed"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
23 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
24 <options from_file="defuse_reference.loc"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
25 <column name="name" index="1"/> |
|
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
26 <column name="value" index="3"/> |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
27 <filter type="sort_by" column="0" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
28 <validator type="no_options" message="No indexes are available" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
29 </options> |
| 0 | 30 </param> |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
31 </when> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
32 <when value="history"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
33 <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
34 </when> <!-- history --> |
| 0 | 35 </conditional> <!-- refGenomeSource --> |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
36 <conditional name="defuse_param"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
37 <param name="settings" type="select" label="Defuse parameter settings" help=""> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
38 <option value="preSet">Default settings</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
39 <option value="full">Full parameter list</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
40 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
41 <when value="preSet" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
42 <when value="full"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
43 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
44 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
45 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
46 <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help=""> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
47 <option value="">Use Default</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
48 <option value="no">no</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
49 <option value="yes">yes</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
50 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
51 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
52 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
53 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
54 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
55 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
56 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
57 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
58 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
59 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
60 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
61 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
62 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
63 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
64 <help>Position density when calculating covariance</help> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
65 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
66 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
67 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
68 <option value="">Use Default</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
69 <option value="no">no</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
70 <option value="yes">yes</option> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
71 </param> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
72 <!-- |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
73 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
74 --> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
75 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
76 </when> <!-- full --> |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
77 </conditional> <!-- defuse_param --> |
|
28
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
78 <param name="breakpoints_bam" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Generate a Bam file for the fusions"/> |
| 5 | 79 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" |
| 80 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, | |
| 81 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> | |
| 3 | 82 <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> |
| 0 | 83 </inputs> |
|
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
84 <stdio> |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
85 <exit_code range="1:" level="fatal" description="Error Running Defuse" /> |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
86 </stdio> |
| 5 | 87 <outputs> |
| 88 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | |
| 89 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> | |
| 90 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> | |
| 91 <filter>keep_output == True</filter> | |
| 92 </data> | |
|
25
2ecf82136986
Define defuse.results.tsv ext as subclass of tabular, add defuse_results_to_vcf to generate vcf form DeFuse results
Jim Johnson <jj@umn.edu>
parents:
23
diff
changeset
|
93 <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> |
|
2ecf82136986
Define defuse.results.tsv ext as subclass of tabular, add defuse_results_to_vcf to generate vcf form DeFuse results
Jim Johnson <jj@umn.edu>
parents:
23
diff
changeset
|
94 <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> |
| 5 | 95 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> |
| 96 <filter>do_get_reads == True</filter> | |
| 97 </data> | |
|
28
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
98 <data format="bam" name="fusions_bam" label="${tool.name} on ${on_string}: fusions.bam"> |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
99 <filter>breakpoints_bam == True</filter> |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
100 </data> |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
101 <!-- |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
102 expression_plot |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
103 circos plot |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
104 --> |
| 5 | 105 </outputs> |
| 0 | 106 <configfiles> |
| 107 <configfile name="defuse_config"> | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
108 #import re |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
109 #set $ds = chr(36) |
| 0 | 110 #if $refGenomeSource.genomeSource == "history": |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
111 #set config_file = $refGenomeSource.config.__str__ |
| 0 | 112 #else |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
113 #set config_file = $refGenomeSource.index.value |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
114 #end if |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
115 #set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$' |
|
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
116 #set fh = open($config_file) |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
117 #set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
118 #set kv = [] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
119 #for $line in $fh: |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
120 #set m = $re.match($pat,$line) |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
121 #if $m and len($m.groups()) == 2: |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
122 ## #echo $line |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
123 #if $m.groups()[0] in keys: |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
124 #set k = $m.groups()[0] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
125 #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed": |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
126 ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
127 #set v = $os.path.dirname($config_file) |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
128 #else: |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
129 #set v = $m.groups()[1] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
130 #end if |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
131 #set kv = $kv + [[$k, $v]] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
132 #end if |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
133 #end if |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
134 #end for |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
135 ## #echo $kv |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
136 #set ref_dict = dict($kv) |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
137 ## #echo $ref_dict |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
138 ## include raw $refGenomeSource.config.__str__ |
| 0 | 139 # |
| 140 # Configuration file for defuse | |
| 141 # | |
| 142 # At a minimum, change all values enclused by [] | |
| 143 # | |
| 144 | |
| 145 # Directory where the defuse code was unpacked | |
| 146 ## Default location in the tool/defuse directory | |
| 147 # source_directory = ${__root_dir__}/tools/defuse | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
148 source_directory = __DEFUSE_PATH__ |
| 0 | 149 |
| 150 # Directory where you want your dataset | |
| 151 dataset_directory = #slurp | |
| 152 #try | |
| 153 $ref_dict['dataset_directory'] | |
| 154 #except | |
| 155 /project/db/genomes/Hsapiens/hg19/defuse | |
| 156 #end try | |
| 157 | |
|
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
158 # Organism IDs |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
159 ensembl_organism = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
160 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
161 $ref_dict['ensembl_organism'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
162 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
163 homo_sapiens |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
164 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
165 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
166 ensembl_prefix = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
167 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
168 $ref_dict['ensembl_prefix'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
169 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
170 Homo_sapiens |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
171 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
172 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
173 ensembl_version = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
174 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
175 $ref_dict['ensembl_version'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
176 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
177 71 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
178 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
179 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
180 ensembl_genome_version = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
181 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
182 $ref_dict['ensembl_genome_version'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
183 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
184 GRCh37 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
185 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
186 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
187 ucsc_genome_version = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
188 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
189 $ref_dict['ucsc_genome_version'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
190 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
191 hg19 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
192 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
193 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
194 ncbi_organism = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
195 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
196 $ref_dict['ncbi_organism'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
197 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
198 Homo_sapiens |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
199 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
200 |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
201 ncbi_prefix = #slurp |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
202 #try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
203 $ref_dict['ncbi_prefix'] |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
204 #except |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
205 Hs |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
206 #end try |
|
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
207 |
| 0 | 208 # Input genome and gene models |
| 209 gene_models = #slurp | |
| 210 #try | |
| 211 $ref_dict['gene_models'] | |
| 212 #except | |
|
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
213 \$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf |
| 0 | 214 #end try |
| 215 genome_fasta = #slurp | |
| 216 #try | |
| 217 $ref_dict['genome_fasta'] | |
| 218 #except | |
|
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
219 \$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa |
| 0 | 220 #end try |
| 221 | |
| 222 # Repeat table from ucsc genome browser | |
| 223 repeats_filename = #slurp | |
| 224 #try | |
| 225 $ref_dict['repeats_filename'] | |
| 226 #except | |
| 227 \$(dataset_directory)/rmsk.txt | |
| 228 #end try | |
| 229 | |
| 230 # EST info downloaded from ucsc genome browser | |
| 231 est_fasta = #slurp | |
| 232 #try | |
| 233 $ref_dict['est_fasta'] | |
| 234 #except | |
| 235 \$(dataset_directory)/est.fa | |
| 236 #end try | |
| 237 est_alignments = #slurp | |
| 238 #try | |
| 239 $ref_dict['est_alignments'] | |
| 240 #except | |
| 241 \$(dataset_directory)/intronEst.txt | |
| 242 #end try | |
| 243 | |
| 244 # Unigene clusters downloaded from ncbi | |
| 245 unigene_fasta = #slurp | |
| 246 #try | |
| 247 $ref_dict['unigene_fasta'] | |
| 248 #except | |
|
23
e8fc5de0578b
defuse.xml fix unigene_fasta path to use ncbi_prefix
Jim Johnson <jj@umn.edu>
parents:
22
diff
changeset
|
249 \$(dataset_directory)/\$(ncbi_prefix).seq.uniq |
| 0 | 250 #end try |
| 251 | |
| 252 # Paths to external tools | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
253 bowtie_bin = __BOWTIE_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
254 bowtie_build_bin = __BOWTIE_BUILD_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
255 blat_bin = __BLAT_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
256 fatotwobit_bin = __FATOTWOBIT_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
257 gmap_bin = __GMAP_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
258 gmap_bin = __GMAP_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
259 gmap_setup_bin = __GMAP_SETUP_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
260 r_bin = __R_BIN__ |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
261 rscript_bin = __RSCRIPT_BIN__ |
| 0 | 262 |
|
7
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
263 # Directory where you want your dataset |
|
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
264 gmap_index_directory = #slurp |
|
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
265 #try |
|
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
266 $ref_dict['gmap_index_directory'] |
|
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
267 #except |
| 12 | 268 #raw |
| 269 $(dataset_directory)/gmap | |
| 270 #end raw | |
|
7
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
271 #end try |
|
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
272 |
| 0 | 273 #raw |
| 274 # Dataset files | |
| 275 dataset_prefix = $(dataset_directory)/defuse | |
| 276 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
| 277 exons_fasta = $(dataset_prefix).exons.fa | |
| 278 cds_fasta = $(dataset_prefix).cds.fa | |
| 279 cdna_regions = $(dataset_prefix).cdna.regions | |
| 280 cdna_fasta = $(dataset_prefix).cdna.fa | |
| 281 reference_fasta = $(dataset_prefix).reference.fa | |
| 282 rrna_fasta = $(dataset_prefix).rrna.fa | |
| 283 ig_gene_list = $(dataset_prefix).ig.gene.list | |
| 284 repeats_regions = $(dataset_directory)/repeats.regions | |
| 285 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
| 286 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
| 287 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
| 288 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
| 289 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
| 290 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
| 291 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
| 292 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
| 293 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
| 294 | |
| 295 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
| 296 prefilter1 = $(unigene_fasta) | |
| 297 | |
| 298 # deFuse scripts and tools | |
| 299 scripts_directory = $(source_directory)/scripts | |
| 300 tools_directory = $(source_directory)/tools | |
| 301 data_directory = $(source_directory)/data | |
| 302 #end raw | |
| 303 | |
| 304 # Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk | |
| 305 samtools_bin = #slurp | |
| 306 #try | |
| 307 $ref_dict['samtools_bin'] | |
| 308 #except | |
| 309 \$(source_directory)/external/samtools-0.1.8/samtools | |
| 310 #end try | |
| 311 | |
| 312 # Bowtie parameters | |
| 313 bowtie_threads = #slurp | |
| 314 #try | |
| 315 $ref_dict['bowtie_threads'] | |
| 316 #except | |
| 317 4 | |
| 318 #end try | |
| 319 bowtie_quals = #slurp | |
| 320 #try | |
| 321 $ref_dict['bowtie_quals'] | |
| 322 #except | |
| 323 --phred33-quals | |
| 324 #end try | |
| 325 max_insert_size = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
326 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
327 $defuse_param.max_insert_size |
| 0 | 328 #else |
| 329 #try | |
| 330 $ref_dict['max_insert_size'] | |
| 331 #except | |
| 332 500 | |
| 333 #end try | |
| 334 #end if | |
| 335 | |
| 336 # Parameters for building the dataset | |
| 337 chromosomes = #slurp | |
| 338 #try | |
| 339 $ref_dict.chromosomes | |
| 340 #except | |
| 341 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
| 342 #end try | |
| 343 mt_chromosome = #slurp | |
| 344 #try | |
| 345 $ref_dict['mt_chromosome'] | |
| 346 #except | |
| 347 MT | |
| 348 #end try | |
| 349 gene_sources = #slurp | |
| 350 #try | |
| 351 $ref_dict['gene_sources'] | |
| 352 #except | |
| 353 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding | |
| 354 #end try | |
| 355 ig_gene_sources = #slurp | |
| 356 #try | |
| 357 $ref_dict['ig_gene_sources'] | |
| 358 #except | |
| 359 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene | |
| 360 #end try | |
| 361 rrna_gene_sources = #slurp | |
| 362 #try | |
| 363 $ref_dict['rrna_gene_sources'] | |
| 364 #except | |
| 365 Mt_rRNA,rRNA,rRNA_pseudogene | |
| 366 #end try | |
| 367 | |
| 368 # Blat sequences per job | |
| 369 num_blat_sequences = #slurp | |
| 370 #try | |
| 371 $ref_dict['num_blat_sequences'] | |
| 372 #except | |
| 373 10000 | |
| 374 #end try | |
| 375 | |
| 376 # Minimum gene fusion range | |
| 377 dna_concordant_length = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
378 #if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "": |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
379 $defuse_param.dna_concordant_length |
| 0 | 380 #else |
| 381 #try | |
| 382 $ref_dict['dna_concordant_length'] | |
| 383 #except | |
| 384 2000 | |
| 385 #end try | |
| 386 #end if | |
| 387 | |
| 388 # Trim length for discordant reads (split reads are not trimmed) | |
| 389 discord_read_trim = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
390 #if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "": |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
391 $defuse_param.discord_read_trim |
| 0 | 392 #else |
| 393 #try | |
| 394 $ref_dict['discord_read_trim'] | |
| 395 #except | |
| 396 50 | |
| 397 #end try | |
| 398 #end if | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
399 # Calculate extra annotations, fusion splice index and interrupted index |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
400 calculate_extra_annotations = #slurp |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
401 #if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "": |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
402 $defuse_param.calculate_extra_annotations |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
403 #else |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
404 #try |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
405 $ref_dict['calculate_extra_annotations'] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
406 #except |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
407 no |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
408 #end try |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
409 #end if |
| 0 | 410 # Filtering parameters |
| 411 clustering_precision = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
412 #if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
413 $defuse_param.clustering_precision |
| 0 | 414 #else |
| 415 #try | |
| 416 $ref_dict['clustering_precision'] | |
| 417 #except | |
| 418 0.95 | |
| 419 #end try | |
| 420 #end if | |
| 421 span_count_threshold = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
422 #if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
423 $defuse_param.span_count_threshold |
| 0 | 424 #else |
| 425 #try | |
| 426 $ref_dict['span_count_threshold'] | |
| 427 #except | |
| 428 5 | |
| 429 #end try | |
| 430 #end if | |
| 431 percent_identity_threshold = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
432 #if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
433 $defuse_param.percent_identity_threshold |
| 0 | 434 #else |
| 435 #try | |
| 436 $ref_dict['percent_identity_threshold'] | |
| 437 #except | |
| 438 0.90 | |
| 439 #end try | |
| 440 #end if | |
| 441 split_min_anchor = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
442 #if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
443 $defuse_param.split_min_anchor |
| 0 | 444 #else |
| 445 #try | |
| 446 $ref_dict['split_min_anchor'] | |
| 447 #except | |
| 448 4 | |
| 449 #end try | |
| 450 #end if | |
| 451 splice_bias = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
452 #if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
453 $defuse_param.splice_bias |
| 0 | 454 #else |
| 455 #try | |
| 456 $ref_dict['splice_bias'] | |
| 457 #except | |
| 458 10 | |
| 459 #end try | |
| 460 #end if | |
| 461 denovo_assembly = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
462 #if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
463 $defuse_param.denovo_assembly |
| 0 | 464 #else |
| 465 #try | |
| 466 $ref_dict['denovo_assembly'] | |
| 467 #except | |
| 468 no | |
| 469 #end try | |
| 470 #end if | |
| 471 probability_threshold = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
472 #if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
473 $defuse_param.probability_threshold |
| 0 | 474 #else |
| 475 #try | |
| 476 $ref_dict['probability_threshold'] | |
| 477 #except | |
| 478 0.50 | |
| 479 #end try | |
| 480 #end if | |
| 481 positive_controls = \$(data_directory)/controls.txt | |
| 482 | |
| 483 # Position density when calculating covariance | |
| 484 covariance_sampling_density = #slurp | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
485 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
486 $defuse_param.covariance_sampling_density |
| 0 | 487 #else |
| 488 #try | |
| 489 $ref_dict['covariance_sampling_density'] | |
| 490 #except | |
| 491 0.01 | |
| 492 #end try | |
| 493 #end if | |
| 494 # Number of reads for each job in split | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
495 reads_per_job = #slurp |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
496 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
497 $defuse_param.reads_per_job |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
498 #else |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
499 #try |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
500 $ref_dict['reads_per_job'] |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
501 #except |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
502 1000000 |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
503 #end try |
|
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
504 #end if |
| 0 | 505 |
| 506 #raw | |
| 507 # If you have command line 'mail' and wish to be notified | |
| 508 # mailto = andrew.mcpherson@gmail.com | |
| 509 | |
| 510 # Remove temp files | |
| 511 remove_job_files = yes | |
| 512 remove_job_temp_files = yes | |
| 513 | |
| 514 #end raw | |
| 515 | |
| 516 | |
| 517 </configfile> | |
| 518 <configfile name="shscript"> | |
| 519 #!/bin/bash | |
| 520 ## define some things for cheetah proccessing | |
| 521 #set $ds = chr(36) | |
| 522 #set $amp = chr(38) | |
| 523 #set $gt = chr(62) | |
| 524 #set $lt = chr(60) | |
| 525 #set $echo_cmd = 'echo' | |
| 526 ## Find the defuse.pl in the galaxy tool path | |
| 527 #import Cheetah.FileUtils | |
| 528 ## declare a bash function for converting a results tsv into html with links to the get_reads output files | |
| 529 results2html() { | |
| 530 rlts=${ds}1 | |
| 531 rslt_name=`basename ${ds}rlts` | |
| 532 html=${ds}2 | |
| 533 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse '${ds}rslt_name'${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} ${ds}html | |
| 534 echo '${lt}h2${gt}Defuse '${ds}rslt_name'${lt}/h2${gt}${lt}table${gt}' ${gt}${gt} ${ds}html | |
| 535 if [ -z "${ds}3" ] | |
| 536 then | |
| 537 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ | |
| 538 ${ds}1 ~ /[1-9][0-9]*/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html | |
| 539 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html | |
| 540 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html | |
| 541 else | |
| 542 export _EFP=${ds}3 | |
| 543 mkdir -p ${ds}_EFP | |
| 544 awk '${ds}1 ~ /cluster_id/{printf("${lt}tr${gt}");for (i = 1; i ${lt}= NF; i++) {printf("${lt}th${gt}%s${lt}/th${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}\ | |
| 545 ${ds}1 ~ /[1-9][0-9]*/{fn="cluster_"${ds}1"_reads.txt"; \ | |
| 546 printf("${lt}tr${gt}${lt}td${gt}${lt}a href=\"%s\"${gt}%s${lt}/a${gt}${lt}/td${gt}",fn, ${ds}1);for (i = 2; i ${lt}= NF; i++) {printf("${lt}td${gt}%s${lt}/td${gt}", ${ds}i);}; printf("${lt}/tr${gt}\n");}' ${ds}rlts ${gt}${gt} ${ds}html | |
| 547 echo '${lt}/table${gt}' ${gt}${gt} ${ds}html | |
| 548 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} ${ds}html | |
| 549 for i in `awk '${ds}1 ~ /[1-9][0-9]*/{print ${ds}1}' ${ds}rlts`; | |
| 550 do fn=cluster_${ds}{i}_reads.txt; | |
| 551 pn=${ds}_EFP/${ds}fn; | |
| 552 perl \${DEFUSE_PATH}/scripts/get_reads.pl -c $defuse_config -o output_dir -i ${ds}i ${gt} ${ds}pn; | |
| 553 done | |
| 554 fi | |
| 555 } | |
| 556 ## substitute pathnames into config file | |
| 1 | 557 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi |
| 558 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi | |
| 559 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi | |
| 560 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi | |
| 561 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi | |
| 2 | 562 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi |
|
7
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
563 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi |
|
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
564 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi |
| 1 | 565 if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi |
| 566 if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi | |
| 0 | 567 |
| 568 | |
| 569 ## copy config to output | |
| 570 cp $defuse_config $config_txt | |
| 571 ## make a data_dir and ln -s the input fastq | |
| 572 mkdir -p data_dir | |
| 573 ln -s $left_pairendreads data_dir/reads_1.fastq | |
| 574 ln -s $right_pairendreads data_dir/reads_2.fastq | |
| 575 ## ln to output_dir in from_work_dir | |
| 576 #if $defuse_out.__str__ != 'None': | |
| 577 mkdir -p $defuse_out.extra_files_path | |
| 578 ln -s $defuse_out.extra_files_path output_dir | |
| 579 #else | |
| 580 mkdir -p output_dir | |
| 581 #end if | |
| 582 ## run defuse.pl | |
|
27
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
583 perl \${DEFUSE_PATH}/scripts/defuse.pl -name "$library_name" -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p 8 |
| 0 | 584 ## copy primary results to output datasets |
| 585 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
586 ## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi |
| 0 | 587 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi |
| 588 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi | |
|
28
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
589 #if $breakpoints_bam: |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
590 if [ -e output_dir/results.filtered.tsv ] ${amp}${amp} [ -e output_dir/breakpoints.genome.psl ] |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
591 then |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
592 awk "\\$10 ~ /^(`awk '\\$1 ~ /[0-9]+/{print \\$1}' output_dir/results.filtered.tsv | tr '\n' '|'`)\\$/{print \\$0}" output_dir/breakpoints.genome.psl > breakpoints.genome.filtered.psl ${amp}${amp} |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
593 psl2sam.pl breakpoints.genome.filtered.psl > breakpoints.genome.filtered.sam ${amp}${amp} |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
594 samtools view -b -T /panfs/roc/rissdb/galaxy/genomes/NCBIM37/defuse/defuse.reference.fa -o breakpoints.genome.filtered.bam breakpoints.genome.filtered.sam ${amp}${amp} |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
595 samtools sort breakpoints.genome.filtered.bam breakpoints ${amp}${amp} |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
596 ## samtools index breakpoints.bam |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
597 cp breakpoints.bam $fusions_bam |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
598 fi |
|
f51a95bdc38e
Add breakpoints.bam output derived from breakpoints.genome.psl
Jim Johnson <jj@umn.edu>
parents:
27
diff
changeset
|
599 #end if |
| 0 | 600 ## create html with links for output_dir |
| 601 #if $defuse_out.__str__ != 'None': | |
| 602 if [ -e $defuse_out ] | |
| 603 then | |
| 604 echo '${lt}html${gt}${lt}head${gt}${lt}title${gt}Defuse Output${lt}/title${gt}${lt}/head${gt}${lt}body${gt}' ${gt} $defuse_out | |
| 605 echo '${lt}h2${gt}Defuse Output Files${lt}/h2${gt}${lt}ul${gt}' ${gt}${gt} $defuse_out | |
| 606 pushd $defuse_out.extra_files_path | |
| 607 for f in `find -L . -maxdepth 1 -type f`; | |
| 608 do fn=`basename ${ds}f`; echo '${lt}li${gt}${lt}a href="'${ds}fn'"${gt}'${ds}fn'${lt}/a${gt}${lt}/li${gt}' ${gt}${gt} $defuse_out; | |
| 609 done | |
| 610 popd | |
| 611 echo '${lt}/ul${gt}' ${gt}${gt} $defuse_out | |
| 612 echo '${lt}/body${gt}${lt}/html${gt}' ${gt}${gt} $defuse_out | |
| 613 fi | |
| 614 #end if | |
| 615 ## run get_reads.pl on each cluster | |
| 616 #if $fusion_reads.__str__ != 'None': | |
| 617 if [ -e output_dir/results.filtered.tsv -a -e $fusion_reads ] | |
| 618 then | |
| 619 mkdir -p $fusion_reads.extra_files_path | |
| 620 results2html output_dir/results.filtered.tsv $fusion_reads $fusion_reads.extra_files_path | |
| 621 fi | |
| 622 #end if | |
| 623 </configfile> | |
| 624 </configfiles> | |
| 5 | 625 |
| 0 | 626 <tests> |
| 627 </tests> | |
| 628 <help> | |
| 629 **DeFuse** | |
| 630 | |
| 631 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. | |
| 632 | |
| 633 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
| 634 | |
| 635 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
| 636 | |
| 637 ------ | |
| 638 | |
| 639 **Inputs** | |
| 640 | |
| 641 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). | |
| 642 | |
| 643 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. | |
| 644 | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
645 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: |
| 0 | 646 - genome_fasta from Ensembl |
| 647 - gene_models from Ensembl | |
| 648 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
| 649 - est_fasta from UCSC | |
| 650 - est_alignments from UCSC intronEst.txt | |
| 651 - unigene_fasta from NCBI | |
| 652 | |
|
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
653 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 |
| 0 | 654 |
| 655 ------ | |
| 656 | |
| 657 **Outputs** | |
| 658 | |
| 659 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates. | |
| 660 | |
| 661 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt. | |
| 662 | |
| 663 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order): | |
| 664 | |
| 665 - **Identification** | |
| 666 - cluster_id : random identifier assigned to each prediction | |
| 667 - library_name : library name given on the command line of defuse | |
| 668 - gene1 : ensembl id of gene 1 | |
| 669 - gene2 : ensembl id of gene 2 | |
| 670 - gene_name1 : name of gene 1 | |
| 671 - gene_name2 : name of gene 2 | |
| 672 - **Evidence** | |
| 673 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable | |
| 674 - concordant_ratio : proportion of spanning reads considered concordant by blat | |
| 675 - denovo_min_count : minimum kmer count across denovo assembled sequence | |
| 676 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly | |
| 677 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 678 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1 | |
| 679 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2 | |
| 680 - min_map_count : minimum of the number of genomic mappings for each spanning read | |
| 681 - max_map_count : maximum of the number of genomic mappings for each spanning read | |
| 682 - mean_map_count : average of the number of genomic mappings for each spanning read | |
| 683 - num_multi_map : number of spanning reads that map to more than one genomic location | |
| 684 - span_count : number of spanning reads supporting the fusion | |
| 685 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage | |
| 686 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage | |
| 687 - span_coverage_min : minimum of span_coverage1 and span_coverage2 | |
| 688 - span_coverage_max : maximum of span_coverage1 and span_coverage2 | |
| 689 - splitr_count : number of split reads supporting the prediction | |
| 690 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 691 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 692 - splitr_sequence : fusion sequence predicted by split reads | |
| 693 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 694 - **Annotation** | |
| 695 - adjacent : fusion between adjacent genes | |
| 696 - altsplice : fusion likely the product of alternative splicing between adjacent genes | |
| 697 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1 | |
| 698 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2 | |
| 699 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2 | |
| 700 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2 | |
| 701 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands | |
| 702 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna | |
| 703 - deletion : fusion produced by a genomic deletion | |
| 704 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est | |
| 705 - eversion : fusion produced by a genomic eversion | |
| 706 - exonboundaries : fusion splice at exon boundaries | |
| 707 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons | |
| 708 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons | |
| 709 - gene_chromosome1 : chromosome of gene 1 | |
| 710 - gene_chromosome2 : chromosome of gene 2 | |
| 711 - gene_end1 : end position for gene 1 | |
| 712 - gene_end2 : end position for gene 2 | |
| 713 - gene_location1 : location of breakpoint in gene 1 | |
| 714 - gene_location2 : location of breakpoint in gene 2 | |
| 715 - gene_start1 : start of gene 1 | |
| 716 - gene_start2 : start of gene 2 | |
| 717 - gene_strand1 : strand of gene 1 | |
| 718 - gene_strand2 : strand of gene 2 | |
| 719 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome | |
| 720 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint | |
| 721 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint | |
| 722 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
| 723 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
| 724 - interchromosomal : fusion produced by an interchromosomal translocation | |
| 725 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1 | |
| 726 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2 | |
| 727 - inversion : fusion produced by genomic inversion | |
| 728 - orf : fusion combines genes in a way that preserves a reading frame | |
| 729 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt) | |
| 730 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement | |
| 731 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region | |
| 732 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region | |
| 733 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2 | |
| 734 - splice_score : number of nucleotides similar to GTAG at fusion splice | |
| 735 - num_splice_variants : number of potential splice variants for this gene pair | |
| 736 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2 | |
| 737 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1 | |
| 738 | |
| 739 | |
| 740 **Example** | |
| 741 | |
| 742 results.tsv:: | |
| 743 | |
| 744 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2 | |
| 745 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 - | |
| 746 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - | |
| 747 | |
| 748 </help> | |
| 749 </tool> |
