Mercurial > repos > jjohnson > defuse
annotate defuse.xml @ 46:e500b50b72fd draft default tip
Uploaded
author | jjohnson |
---|---|
date | Thu, 19 Oct 2017 10:05:54 -0400 |
parents | aedaa66483f1 |
children |
rev | line source |
---|---|
44 | 1 <tool id="defuse" name="DeFuse" version="@DEFUSE_VERSION@.1"> |
2 <description>identify fusion transcripts</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <requirements> | |
7 <expand macro="defuse_requirement" /> | |
8 </requirements> | |
45 | 9 <command><![CDATA[ |
10 #if $defuse_out.__str__ != 'None': | |
11 ## ln to output_dir in from_work_dir | |
46 | 12 mkdir -p $defuse_out.files_path && |
13 ln -s $defuse_out.files_path output_dir && | |
45 | 14 #else |
15 mkdir -p output_dir && | |
16 #end if | |
17 ## Put executable paths in config file | |
18 $__tool_directory__/config_sub.sh $defuse_config output_dir/defuse.cfg && | |
19 ## copy config to output | |
46 | 20 cp output_dir/defuse.cfg $config_txt && |
45 | 21 ## make a data_dir and ln -s the input fastq |
22 mkdir -p data_dir && | |
23 ln -s "$left_pairendreads" data_dir/reads_1.fastq && | |
24 ln -s "$right_pairendreads" data_dir/reads_2.fastq && | |
25 ## run | |
46 | 26 DATASET_DIRECTORY=`grep '^dataset_directory' output_dir/defuse.cfg | awk '{print \$NF}'` && |
27 defuse_run.pl --name "$library_name" --config output_dir/defuse.cfg --dataset \$DATASET_DIRECTORY -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS && | |
45 | 28 grep -v cluster_id output_dir/results.filtered.tsv | awk '{print $1}' > cluster_id_list && |
29 get_fusion_fastq.pl --list cluster_id_list --output output_dir --fastq1 results.fusions_1.fq --fastq2 results.fusions_2.fq && | |
30 cp output_dir/results.* . && | |
46 | 31 cp `find -L output_dir -name defuse.log` $defuse_log |
45 | 32 #if $defuse_out.__str__ != 'None': |
46 | 33 && $__tool_directory__/make_html.sh $defuse_out $defuse_out.files_path |
45 | 34 #end if |
35 ]]></command> | |
0 | 36 <inputs> |
37 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> | |
38 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> | |
27
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
39 <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column"> |
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
40 <validator type="length" min="1"/> |
d57fcac025e2
Add more info fields to defuse_results_to_vcf.py
Jim Johnson <jj@umn.edu>
parents:
25
diff
changeset
|
41 </param> |
0 | 42 <conditional name="refGenomeSource"> |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
43 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
44 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
45 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
46 </param> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
47 <when value="indexed"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
48 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
49 <options from_file="defuse_reference.loc"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
50 <column name="name" index="1"/> |
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
51 <column name="value" index="3"/> |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
52 <filter type="sort_by" column="0" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
53 <validator type="no_options" message="No indexes are available" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
54 </options> |
0 | 55 </param> |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
56 </when> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
57 <when value="history"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
58 <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
59 </when> <!-- history --> |
0 | 60 </conditional> <!-- refGenomeSource --> |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
61 <conditional name="defuse_param"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
62 <param name="settings" type="select" label="Defuse parameter settings" help=""> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
63 <option value="preSet">Default settings</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
64 <option value="full">Full parameter list</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
65 </param> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
66 <when value="preSet" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
67 <when value="full"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
68 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
69 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
70 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
71 <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help=""> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
72 <option value="">Use Default</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
73 <option value="no">no</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
74 <option value="yes">yes</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
75 </param> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
76 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
77 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
78 </param> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
79 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
80 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
81 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
82 </param> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
83 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
84 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
85 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
86 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
87 </param> |
45 | 88 <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries"> |
89 <option value="no" select="true">no</option> | |
90 <option value="yes">yes</option> | |
91 </param> | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
92 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
93 <help>Position density when calculating covariance</help> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
94 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
95 </param> |
45 | 96 <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments"> |
97 <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help> | |
98 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/> | |
99 </param> | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
100 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
101 <option value="">Use Default</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
102 <option value="no">no</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
103 <option value="yes">yes</option> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
104 </param> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
105 <!-- |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
106 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
107 --> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
108 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
109 </when> <!-- full --> |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
110 </conditional> <!-- defuse_param --> |
5 | 111 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" |
112 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, | |
113 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> | |
0 | 114 </inputs> |
5 | 115 <outputs> |
116 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | |
117 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> | |
118 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> | |
119 <filter>keep_output == True</filter> | |
120 </data> | |
45 | 121 <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="results.classify.tsv"/> |
122 <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="results.filtered.tsv"/> | |
123 <data format="fastqsanger" name="results_fusions1_fq" label="${tool.name} on ${on_string}: fusions_1.fq" from_work_dir="results.fusions_1.fq" /> | |
124 <data format="fastqsanger" name="results_fusions2_fq" label="${tool.name} on ${on_string}: fusions_2.fq" from_work_dir="results.fusions_2.fq" /> | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
125 <!-- |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
126 expression_plot |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
127 circos plot |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
128 --> |
5 | 129 </outputs> |
0 | 130 <configfiles> |
131 <configfile name="defuse_config"> | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
132 #import re |
0 | 133 #if $refGenomeSource.genomeSource == "history": |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
134 #set config_file = $refGenomeSource.config.__str__ |
0 | 135 #else |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
136 #set config_file = $refGenomeSource.index.value |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
137 #end if |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
138 #set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$' |
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
139 #set fh = open($config_file) |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
140 #set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
141 #set kv = [] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
142 #for $line in $fh: |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
143 #set m = $re.match($pat,$line) |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
144 #if $m and len($m.groups()) == 2: |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
145 ## #echo $line |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
146 #if $m.groups()[0] in keys: |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
147 #set k = $m.groups()[0] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
148 #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed": |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
149 ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
150 #set v = $os.path.dirname($config_file) |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
151 #else: |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
152 #set v = $m.groups()[1] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
153 #end if |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
154 #set kv = $kv + [[$k, $v]] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
155 #end if |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
156 #end if |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
157 #end for |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
158 ## #echo $kv |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
159 #set ref_dict = dict($kv) |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
160 ## #echo $ref_dict |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
161 ## include raw $refGenomeSource.config.__str__ |
0 | 162 # |
163 # Configuration file for defuse | |
164 # | |
165 # At a minimum, change all values enclused by [] | |
166 # | |
167 | |
168 # Directory where the defuse code was unpacked | |
169 ## Default location in the tool/defuse directory | |
170 # source_directory = ${__root_dir__}/tools/defuse | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
171 source_directory = __DEFUSE_PATH__ |
0 | 172 |
173 # Directory where you want your dataset | |
174 dataset_directory = #slurp | |
175 #try | |
176 $ref_dict['dataset_directory'] | |
177 #except | |
178 /project/db/genomes/Hsapiens/hg19/defuse | |
179 #end try | |
180 | |
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
181 # Organism IDs |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
182 ensembl_organism = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
183 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
184 $ref_dict['ensembl_organism'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
185 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
186 homo_sapiens |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
187 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
188 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
189 ensembl_prefix = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
190 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
191 $ref_dict['ensembl_prefix'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
192 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
193 Homo_sapiens |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
194 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
195 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
196 ensembl_version = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
197 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
198 $ref_dict['ensembl_version'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
199 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
200 71 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
201 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
202 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
203 ensembl_genome_version = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
204 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
205 $ref_dict['ensembl_genome_version'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
206 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
207 GRCh37 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
208 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
209 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
210 ucsc_genome_version = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
211 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
212 $ref_dict['ucsc_genome_version'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
213 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
214 hg19 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
215 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
216 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
217 ncbi_organism = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
218 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
219 $ref_dict['ncbi_organism'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
220 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
221 Homo_sapiens |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
222 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
223 |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
224 ncbi_prefix = #slurp |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
225 #try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
226 $ref_dict['ncbi_prefix'] |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
227 #except |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
228 Hs |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
229 #end try |
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
230 |
0 | 231 # Input genome and gene models |
232 gene_models = #slurp | |
233 #try | |
234 $ref_dict['gene_models'] | |
235 #except | |
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
236 \$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf |
0 | 237 #end try |
238 genome_fasta = #slurp | |
239 #try | |
240 $ref_dict['genome_fasta'] | |
241 #except | |
22
68494d6aabeb
Update datamanager and defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
19
diff
changeset
|
242 \$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa |
0 | 243 #end try |
244 | |
245 # Repeat table from ucsc genome browser | |
246 repeats_filename = #slurp | |
247 #try | |
248 $ref_dict['repeats_filename'] | |
249 #except | |
250 \$(dataset_directory)/rmsk.txt | |
251 #end try | |
252 | |
253 # EST info downloaded from ucsc genome browser | |
254 est_fasta = #slurp | |
255 #try | |
256 $ref_dict['est_fasta'] | |
257 #except | |
258 \$(dataset_directory)/est.fa | |
259 #end try | |
260 est_alignments = #slurp | |
261 #try | |
262 $ref_dict['est_alignments'] | |
263 #except | |
264 \$(dataset_directory)/intronEst.txt | |
265 #end try | |
266 | |
267 # Unigene clusters downloaded from ncbi | |
268 unigene_fasta = #slurp | |
269 #try | |
270 $ref_dict['unigene_fasta'] | |
271 #except | |
23
e8fc5de0578b
defuse.xml fix unigene_fasta path to use ncbi_prefix
Jim Johnson <jj@umn.edu>
parents:
22
diff
changeset
|
272 \$(dataset_directory)/\$(ncbi_prefix).seq.uniq |
0 | 273 #end try |
274 | |
275 # Paths to external tools | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
276 bowtie_bin = __BOWTIE_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
277 bowtie_build_bin = __BOWTIE_BUILD_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
278 blat_bin = __BLAT_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
279 fatotwobit_bin = __FATOTWOBIT_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
280 gmap_bin = __GMAP_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
281 gmap_bin = __GMAP_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
282 gmap_setup_bin = __GMAP_SETUP_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
283 r_bin = __R_BIN__ |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
284 rscript_bin = __RSCRIPT_BIN__ |
0 | 285 |
7
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
286 # Directory where you want your dataset |
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
287 gmap_index_directory = #slurp |
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
288 #try |
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
289 $ref_dict['gmap_index_directory'] |
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
290 #except |
12 | 291 #raw |
292 $(dataset_directory)/gmap | |
293 #end raw | |
7
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
294 #end try |
f4eadbd2e7c1
Add gmap_setup_bin and gmap_index_directory to defuse.xml config file generation
Jim Johnson <jj@umn.edu>
parents:
6
diff
changeset
|
295 |
0 | 296 #raw |
297 # Dataset files | |
298 dataset_prefix = $(dataset_directory)/defuse | |
299 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
300 exons_fasta = $(dataset_prefix).exons.fa | |
301 cds_fasta = $(dataset_prefix).cds.fa | |
302 cdna_regions = $(dataset_prefix).cdna.regions | |
303 cdna_fasta = $(dataset_prefix).cdna.fa | |
304 reference_fasta = $(dataset_prefix).reference.fa | |
305 rrna_fasta = $(dataset_prefix).rrna.fa | |
306 ig_gene_list = $(dataset_prefix).ig.gene.list | |
307 repeats_regions = $(dataset_directory)/repeats.regions | |
308 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
309 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
310 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
311 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
312 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
313 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
314 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
315 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
316 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
317 | |
318 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
319 prefilter1 = $(unigene_fasta) | |
320 | |
321 # deFuse scripts and tools | |
322 scripts_directory = $(source_directory)/scripts | |
323 tools_directory = $(source_directory)/tools | |
324 data_directory = $(source_directory)/data | |
325 #end raw | |
326 | |
327 # Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk | |
328 samtools_bin = #slurp | |
329 #try | |
330 $ref_dict['samtools_bin'] | |
331 #except | |
332 \$(source_directory)/external/samtools-0.1.8/samtools | |
333 #end try | |
334 | |
335 # Bowtie parameters | |
336 bowtie_threads = #slurp | |
337 #try | |
338 $ref_dict['bowtie_threads'] | |
339 #except | |
340 4 | |
341 #end try | |
342 bowtie_quals = #slurp | |
343 #try | |
344 $ref_dict['bowtie_quals'] | |
345 #except | |
346 --phred33-quals | |
347 #end try | |
45 | 348 bowtie_params = #slurp |
349 #try | |
350 $ref_dict['bowtie_params'] | |
351 #except | |
352 --chunkmbs 200 | |
353 #end try | |
0 | 354 max_insert_size = #slurp |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
355 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
356 $defuse_param.max_insert_size |
0 | 357 #else |
358 #try | |
359 $ref_dict['max_insert_size'] | |
360 #except | |
361 500 | |
362 #end try | |
363 #end if | |
364 | |
365 # Parameters for building the dataset | |
366 chromosomes = #slurp | |
367 #try | |
368 $ref_dict.chromosomes | |
369 #except | |
370 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
371 #end try | |
372 mt_chromosome = #slurp | |
373 #try | |
374 $ref_dict['mt_chromosome'] | |
375 #except | |
376 MT | |
377 #end try | |
378 gene_sources = #slurp | |
379 #try | |
380 $ref_dict['gene_sources'] | |
381 #except | |
382 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding | |
383 #end try | |
384 ig_gene_sources = #slurp | |
385 #try | |
386 $ref_dict['ig_gene_sources'] | |
387 #except | |
388 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene | |
389 #end try | |
390 rrna_gene_sources = #slurp | |
391 #try | |
392 $ref_dict['rrna_gene_sources'] | |
393 #except | |
394 Mt_rRNA,rRNA,rRNA_pseudogene | |
395 #end try | |
396 | |
397 # Blat sequences per job | |
398 num_blat_sequences = #slurp | |
399 #try | |
400 $ref_dict['num_blat_sequences'] | |
401 #except | |
402 10000 | |
403 #end try | |
404 | |
405 # Minimum gene fusion range | |
406 dna_concordant_length = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
407 #if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "": |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
408 $defuse_param.dna_concordant_length |
0 | 409 #else |
410 #try | |
411 $ref_dict['dna_concordant_length'] | |
412 #except | |
413 2000 | |
414 #end try | |
415 #end if | |
416 | |
417 # Trim length for discordant reads (split reads are not trimmed) | |
418 discord_read_trim = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
419 #if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "": |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
420 $defuse_param.discord_read_trim |
0 | 421 #else |
422 #try | |
423 $ref_dict['discord_read_trim'] | |
424 #except | |
425 50 | |
426 #end try | |
427 #end if | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
428 # Calculate extra annotations, fusion splice index and interrupted index |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
429 calculate_extra_annotations = #slurp |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
430 #if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "": |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
431 $defuse_param.calculate_extra_annotations |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
432 #else |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
433 #try |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
434 $ref_dict['calculate_extra_annotations'] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
435 #except |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
436 no |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
437 #end try |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
438 #end if |
0 | 439 # Filtering parameters |
440 clustering_precision = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
441 #if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
442 $defuse_param.clustering_precision |
0 | 443 #else |
444 #try | |
445 $ref_dict['clustering_precision'] | |
446 #except | |
447 0.95 | |
448 #end try | |
449 #end if | |
450 span_count_threshold = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
451 #if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
452 $defuse_param.span_count_threshold |
0 | 453 #else |
454 #try | |
455 $ref_dict['span_count_threshold'] | |
456 #except | |
457 5 | |
458 #end try | |
459 #end if | |
460 percent_identity_threshold = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
461 #if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
462 $defuse_param.percent_identity_threshold |
0 | 463 #else |
464 #try | |
465 $ref_dict['percent_identity_threshold'] | |
466 #except | |
467 0.90 | |
468 #end try | |
469 #end if | |
470 split_min_anchor = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
471 #if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
472 $defuse_param.split_min_anchor |
0 | 473 #else |
474 #try | |
475 $ref_dict['split_min_anchor'] | |
476 #except | |
477 4 | |
478 #end try | |
479 #end if | |
480 splice_bias = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
481 #if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
482 $defuse_param.splice_bias |
0 | 483 #else |
484 #try | |
485 $ref_dict['splice_bias'] | |
486 #except | |
487 10 | |
488 #end try | |
489 #end if | |
490 denovo_assembly = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
491 #if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
492 $defuse_param.denovo_assembly |
0 | 493 #else |
494 #try | |
495 $ref_dict['denovo_assembly'] | |
496 #except | |
497 no | |
498 #end try | |
499 #end if | |
500 probability_threshold = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
501 #if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
502 $defuse_param.probability_threshold |
0 | 503 #else |
504 #try | |
505 $ref_dict['probability_threshold'] | |
506 #except | |
507 0.50 | |
508 #end try | |
509 #end if | |
510 positive_controls = \$(data_directory)/controls.txt | |
511 | |
45 | 512 # Use multiple exon transcripts for stats calculations (yes/no) |
513 # should be enabled for very small libraries | |
514 multi_exon_transcripts_stats = #slurp | |
515 #if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != "" | |
516 $defuse_param.multi_exon_transcripts_stats | |
517 #else | |
518 #try | |
519 $ref_dict['multi_exon_transcripts_stats'] | |
520 #except | |
521 no | |
522 #end try | |
523 #end if | |
524 | |
0 | 525 # Position density when calculating covariance |
526 covariance_sampling_density = #slurp | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
527 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
528 $defuse_param.covariance_sampling_density |
0 | 529 #else |
530 #try | |
531 $ref_dict['covariance_sampling_density'] | |
532 #except | |
533 0.01 | |
534 #end try | |
535 #end if | |
45 | 536 |
537 # Maximum number of alignments for a read pair | |
538 # Pairs with more alignments are filtered | |
539 max_paired_alignments = #slurp | |
540 #if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != "" | |
541 $defuse_param.max_paired_alignments | |
542 #else | |
543 #try | |
544 $ref_dict['max_paired_alignments'] | |
545 #except | |
546 10 | |
547 #end try | |
548 #end if | |
549 | |
0 | 550 # Number of reads for each job in split |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
551 reads_per_job = #slurp |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
552 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
553 $defuse_param.reads_per_job |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
554 #else |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
555 #try |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
556 $ref_dict['reads_per_job'] |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
557 #except |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
558 1000000 |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
559 #end try |
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
560 #end if |
0 | 561 |
562 #raw | |
563 # If you have command line 'mail' and wish to be notified | |
564 # mailto = andrew.mcpherson@gmail.com | |
565 | |
566 # Remove temp files | |
567 remove_job_files = yes | |
568 remove_job_temp_files = yes | |
569 | |
45 | 570 qsub_params = "" |
571 | |
0 | 572 #end raw |
573 | |
574 </configfile> | |
575 </configfiles> | |
5 | 576 |
0 | 577 <tests> |
578 </tests> | |
579 <help> | |
580 **DeFuse** | |
581 | |
582 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. | |
583 | |
584 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
585 | |
586 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
587 | |
588 ------ | |
589 | |
590 **Inputs** | |
591 | |
592 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). | |
593 | |
594 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. | |
595 | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
596 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: |
0 | 597 - genome_fasta from Ensembl |
598 - gene_models from Ensembl | |
599 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
600 - est_fasta from UCSC | |
601 - est_alignments from UCSC intronEst.txt | |
602 - unigene_fasta from NCBI | |
603 | |
19
1af6f32ff592
Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
18
diff
changeset
|
604 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 |
0 | 605 |
606 ------ | |
607 | |
608 **Outputs** | |
609 | |
610 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates. | |
611 | |
612 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt. | |
613 | |
614 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order): | |
615 | |
616 - **Identification** | |
617 - cluster_id : random identifier assigned to each prediction | |
618 - library_name : library name given on the command line of defuse | |
619 - gene1 : ensembl id of gene 1 | |
620 - gene2 : ensembl id of gene 2 | |
621 - gene_name1 : name of gene 1 | |
622 - gene_name2 : name of gene 2 | |
623 - **Evidence** | |
624 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable | |
625 - concordant_ratio : proportion of spanning reads considered concordant by blat | |
626 - denovo_min_count : minimum kmer count across denovo assembled sequence | |
627 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly | |
628 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
629 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1 | |
630 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2 | |
631 - min_map_count : minimum of the number of genomic mappings for each spanning read | |
632 - max_map_count : maximum of the number of genomic mappings for each spanning read | |
633 - mean_map_count : average of the number of genomic mappings for each spanning read | |
634 - num_multi_map : number of spanning reads that map to more than one genomic location | |
635 - span_count : number of spanning reads supporting the fusion | |
636 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage | |
637 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage | |
638 - span_coverage_min : minimum of span_coverage1 and span_coverage2 | |
639 - span_coverage_max : maximum of span_coverage1 and span_coverage2 | |
640 - splitr_count : number of split reads supporting the prediction | |
641 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive | |
642 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive | |
643 - splitr_sequence : fusion sequence predicted by split reads | |
644 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
645 - **Annotation** | |
646 - adjacent : fusion between adjacent genes | |
647 - altsplice : fusion likely the product of alternative splicing between adjacent genes | |
648 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1 | |
649 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2 | |
650 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2 | |
651 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2 | |
652 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands | |
653 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna | |
654 - deletion : fusion produced by a genomic deletion | |
655 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est | |
656 - eversion : fusion produced by a genomic eversion | |
657 - exonboundaries : fusion splice at exon boundaries | |
658 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons | |
659 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons | |
660 - gene_chromosome1 : chromosome of gene 1 | |
661 - gene_chromosome2 : chromosome of gene 2 | |
662 - gene_end1 : end position for gene 1 | |
663 - gene_end2 : end position for gene 2 | |
664 - gene_location1 : location of breakpoint in gene 1 | |
665 - gene_location2 : location of breakpoint in gene 2 | |
666 - gene_start1 : start of gene 1 | |
667 - gene_start2 : start of gene 2 | |
668 - gene_strand1 : strand of gene 1 | |
669 - gene_strand2 : strand of gene 2 | |
670 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome | |
671 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint | |
672 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint | |
673 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
674 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
675 - interchromosomal : fusion produced by an interchromosomal translocation | |
676 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1 | |
677 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2 | |
678 - inversion : fusion produced by genomic inversion | |
679 - orf : fusion combines genes in a way that preserves a reading frame | |
680 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt) | |
681 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement | |
682 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region | |
683 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region | |
684 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2 | |
685 - splice_score : number of nucleotides similar to GTAG at fusion splice | |
686 - num_splice_variants : number of potential splice variants for this gene pair | |
687 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2 | |
688 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1 | |
689 | |
690 | |
691 **Example** | |
692 | |
693 results.tsv:: | |
694 | |
695 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2 | |
696 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 - | |
697 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - | |
698 | |
699 </help> | |
44 | 700 <expand macro="citations"/> |
0 | 701 </tool> |