comparison create_reference_dataset.xml @ 19:1af6f32ff592

Add datamanager, move to defuse_reference.loc
author Jim Johnson <jj@umn.edu>
date Fri, 21 Jun 2013 14:46:11 -0500
parents 547d8db4673e
children 3099cec648e7
comparison
equal deleted inserted replaced
18:547d8db4673e 19:1af6f32ff592
5 <requirement type="package" version="0.1.18">samtools</requirement> 5 <requirement type="package" version="0.1.18">samtools</requirement>
6 <requirement type="package" version="1.0.0">bowtie</requirement> 6 <requirement type="package" version="1.0.0">bowtie</requirement>
7 <requirement type="package" version="2013-05-09">gmap</requirement> 7 <requirement type="package" version="2013-05-09">gmap</requirement>
8 <requirement type="package" version="latest">kent</requirement> 8 <requirement type="package" version="latest">kent</requirement>
9 </requirements> 9 </requirements>
10 <command interpreter="command"> /bin/bash $shscript </command> 10 <command interpreter="command"> /bin/bash $defuse_script </command>
11 <inputs> 11 <inputs>
12 <conditional name="genome"> 12 <conditional name="genome">
13 <param name="choice" type="select" label="Select a Genome Build"> 13 <param name="choice" type="select" label="Select a Genome Build">
14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> 14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option>
15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> 15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> 110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" />
111 </when> 111 </when>
112 </conditional> 112 </conditional>
113 </inputs> 113 </inputs>
114 <outputs> 114 <outputs>
115 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> 115 <data format="defuse.conf" name="config_txt" label="${tool.name} on ${genome.ensembl_genome_version} : config.txt"/>
116 </outputs> 116 </outputs>
117 <stdio> 117 <stdio>
118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" /> 118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" />
119 <regex match="Error:" 119 <regex match="Error:"
120 source="both" 120 source="both"
122 description="Error running Create DeFuse Reference" /> 122 description="Error running Create DeFuse Reference" />
123 123
124 </stdio> 124 </stdio>
125 <configfiles> 125 <configfiles>
126 <configfile name="defuse_config"> 126 <configfile name="defuse_config">
127 #import ast
128 # 127 #
129 # Configuration file for defuse 128 # Configuration file for defuse
130 # 129 #
131 # At a minimum, change all values enclused by [] 130 # Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__
131 # will be set by the runtime script using the ENV PATH
132 # 132 #
133 133
134 # Directory where the defuse code was unpacked 134 # Directory where the defuse code was unpacked
135 ## Default location in the tool/defuse directory
136 # source_directory = ${__root_dir__}/tools/defuse
137 source_directory = __DEFUSE_PATH__ 135 source_directory = __DEFUSE_PATH__
138 136
137 # Organism IDs
139 ensembl_organism = $genome.ensembl_organism 138 ensembl_organism = $genome.ensembl_organism
140 ensembl_prefix = $genome.ensembl_prefix 139 ensembl_prefix = $genome.ensembl_prefix
141 ensembl_version = $genome.ensembl_version 140 ensembl_version = $genome.ensembl_version
142 ensembl_genome_version = $genome.ensembl_genome_version 141 ensembl_genome_version = $genome.ensembl_genome_version
143 ucsc_genome_version = $genome.ucsc_genome_version 142 ucsc_genome_version = $genome.ucsc_genome_version
208 scripts_directory = $(source_directory)/scripts 207 scripts_directory = $(source_directory)/scripts
209 tools_directory = $(source_directory)/tools 208 tools_directory = $(source_directory)/tools
210 data_directory = $(source_directory)/data 209 data_directory = $(source_directory)/data
211 #end raw 210 #end raw
212 211
213 #raw
214 # Bowtie parameters
215 bowtie_threads = 1
216 bowtie_quals = --phred33-quals
217 max_insert_size = 500
218 #end raw
219
220 # Parameters for building the dataset 212 # Parameters for building the dataset
221 chromosomes = $genome.chromosomes 213 chromosomes = $genome.chromosomes
222 mt_chromosome = $genome.mt_chromosome 214 mt_chromosome = $genome.mt_chromosome
223 gene_sources = $genome.gene_sources 215 gene_sources = $genome.gene_sources
224 ig_gene_sources = $genome.ig_gene_sources 216 ig_gene_sources = $genome.ig_gene_sources
225 rrna_gene_sources = $genome.rrna_gene_sources 217 rrna_gene_sources = $genome.rrna_gene_sources
226 218
227 #raw 219 #raw
228 # Blat sequences per job
229 num_blat_sequences = 10000
230
231 # Minimum gene fusion range
232 dna_concordant_length = 2000
233
234 # Trim length for discordant reads (split reads are not trimmed)
235 discord_read_trim = 50
236
237 # Calculate extra annotations, fusion splice index and interrupted index
238 calculate_extra_annotations = no
239
240 # Filtering parameters
241 clustering_precision = 0.95
242 span_count_threshold = 5
243 percent_identity_threshold = 0.90
244 split_min_anchor = 4
245 splice_bias = 10
246 positive_controls = $(data_directory)/controls.txt
247 probability_threshold = 0.50
248
249 # Position density when calculating covariance
250 covariance_sampling_density = 0.01
251
252 # Number of reads for each job in split
253 reads_per_job = 1000000
254
255 # If you have command line 'mail' and wish to be notified
256 mailto = andrew.mcpherson@gmail.com
257
258 # Remove temp files 220 # Remove temp files
259 remove_job_files = yes 221 remove_job_files = yes
260 remove_job_temp_files = yes 222 remove_job_temp_files = yes
261 #end raw 223 #end raw
262 </configfile> 224 </configfile>
263 <configfile name="shscript"> 225 <configfile name="defuse_script">
264 #!/bin/bash 226 #!/bin/bash
265 ## define some things for cheetah proccessing 227 ## define some things for cheetah proccessing
266 #set $amp = chr(38) 228 #set $amp = chr(38)
267 #set $gt = chr(62) 229 #set $gt = chr(62)
268 ## substitute pathnames into config file 230 ## substitute pathnames into config file
289 <tests> 251 <tests>
290 </tests> 252 </tests>
291 <help> 253 <help>
292 **DeFuse** 254 **DeFuse**
293 255
294 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6.1_ manual for details. 256 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6_ manual for details.
295 257
296 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: 258 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
297 - genome_fasta from Ensembl 259 - genome_fasta from Ensembl
298 - gene_models from Ensembl 260 - gene_models from Ensembl
299 - repeats_filename from UCSC RepeatMasker rmsk.txt 261 - repeats_filename from UCSC RepeatMasker rmsk.txt