Mercurial > repos > jjohnson > defuse
comparison create_reference_dataset.xml @ 19:1af6f32ff592
Add datamanager, move to defuse_reference.loc
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 21 Jun 2013 14:46:11 -0500 |
parents | 547d8db4673e |
children | 3099cec648e7 |
comparison
equal
deleted
inserted
replaced
18:547d8db4673e | 19:1af6f32ff592 |
---|---|
5 <requirement type="package" version="0.1.18">samtools</requirement> | 5 <requirement type="package" version="0.1.18">samtools</requirement> |
6 <requirement type="package" version="1.0.0">bowtie</requirement> | 6 <requirement type="package" version="1.0.0">bowtie</requirement> |
7 <requirement type="package" version="2013-05-09">gmap</requirement> | 7 <requirement type="package" version="2013-05-09">gmap</requirement> |
8 <requirement type="package" version="latest">kent</requirement> | 8 <requirement type="package" version="latest">kent</requirement> |
9 </requirements> | 9 </requirements> |
10 <command interpreter="command"> /bin/bash $shscript </command> | 10 <command interpreter="command"> /bin/bash $defuse_script </command> |
11 <inputs> | 11 <inputs> |
12 <conditional name="genome"> | 12 <conditional name="genome"> |
13 <param name="choice" type="select" label="Select a Genome Build"> | 13 <param name="choice" type="select" label="Select a Genome Build"> |
14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> | 14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> |
15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> | 15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> |
110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> | 110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> |
111 </when> | 111 </when> |
112 </conditional> | 112 </conditional> |
113 </inputs> | 113 </inputs> |
114 <outputs> | 114 <outputs> |
115 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | 115 <data format="defuse.conf" name="config_txt" label="${tool.name} on ${genome.ensembl_genome_version} : config.txt"/> |
116 </outputs> | 116 </outputs> |
117 <stdio> | 117 <stdio> |
118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" /> | 118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" /> |
119 <regex match="Error:" | 119 <regex match="Error:" |
120 source="both" | 120 source="both" |
122 description="Error running Create DeFuse Reference" /> | 122 description="Error running Create DeFuse Reference" /> |
123 | 123 |
124 </stdio> | 124 </stdio> |
125 <configfiles> | 125 <configfiles> |
126 <configfile name="defuse_config"> | 126 <configfile name="defuse_config"> |
127 #import ast | |
128 # | 127 # |
129 # Configuration file for defuse | 128 # Configuration file for defuse |
130 # | 129 # |
131 # At a minimum, change all values enclused by [] | 130 # Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ |
131 # will be set by the runtime script using the ENV PATH | |
132 # | 132 # |
133 | 133 |
134 # Directory where the defuse code was unpacked | 134 # Directory where the defuse code was unpacked |
135 ## Default location in the tool/defuse directory | |
136 # source_directory = ${__root_dir__}/tools/defuse | |
137 source_directory = __DEFUSE_PATH__ | 135 source_directory = __DEFUSE_PATH__ |
138 | 136 |
137 # Organism IDs | |
139 ensembl_organism = $genome.ensembl_organism | 138 ensembl_organism = $genome.ensembl_organism |
140 ensembl_prefix = $genome.ensembl_prefix | 139 ensembl_prefix = $genome.ensembl_prefix |
141 ensembl_version = $genome.ensembl_version | 140 ensembl_version = $genome.ensembl_version |
142 ensembl_genome_version = $genome.ensembl_genome_version | 141 ensembl_genome_version = $genome.ensembl_genome_version |
143 ucsc_genome_version = $genome.ucsc_genome_version | 142 ucsc_genome_version = $genome.ucsc_genome_version |
208 scripts_directory = $(source_directory)/scripts | 207 scripts_directory = $(source_directory)/scripts |
209 tools_directory = $(source_directory)/tools | 208 tools_directory = $(source_directory)/tools |
210 data_directory = $(source_directory)/data | 209 data_directory = $(source_directory)/data |
211 #end raw | 210 #end raw |
212 | 211 |
213 #raw | |
214 # Bowtie parameters | |
215 bowtie_threads = 1 | |
216 bowtie_quals = --phred33-quals | |
217 max_insert_size = 500 | |
218 #end raw | |
219 | |
220 # Parameters for building the dataset | 212 # Parameters for building the dataset |
221 chromosomes = $genome.chromosomes | 213 chromosomes = $genome.chromosomes |
222 mt_chromosome = $genome.mt_chromosome | 214 mt_chromosome = $genome.mt_chromosome |
223 gene_sources = $genome.gene_sources | 215 gene_sources = $genome.gene_sources |
224 ig_gene_sources = $genome.ig_gene_sources | 216 ig_gene_sources = $genome.ig_gene_sources |
225 rrna_gene_sources = $genome.rrna_gene_sources | 217 rrna_gene_sources = $genome.rrna_gene_sources |
226 | 218 |
227 #raw | 219 #raw |
228 # Blat sequences per job | |
229 num_blat_sequences = 10000 | |
230 | |
231 # Minimum gene fusion range | |
232 dna_concordant_length = 2000 | |
233 | |
234 # Trim length for discordant reads (split reads are not trimmed) | |
235 discord_read_trim = 50 | |
236 | |
237 # Calculate extra annotations, fusion splice index and interrupted index | |
238 calculate_extra_annotations = no | |
239 | |
240 # Filtering parameters | |
241 clustering_precision = 0.95 | |
242 span_count_threshold = 5 | |
243 percent_identity_threshold = 0.90 | |
244 split_min_anchor = 4 | |
245 splice_bias = 10 | |
246 positive_controls = $(data_directory)/controls.txt | |
247 probability_threshold = 0.50 | |
248 | |
249 # Position density when calculating covariance | |
250 covariance_sampling_density = 0.01 | |
251 | |
252 # Number of reads for each job in split | |
253 reads_per_job = 1000000 | |
254 | |
255 # If you have command line 'mail' and wish to be notified | |
256 mailto = andrew.mcpherson@gmail.com | |
257 | |
258 # Remove temp files | 220 # Remove temp files |
259 remove_job_files = yes | 221 remove_job_files = yes |
260 remove_job_temp_files = yes | 222 remove_job_temp_files = yes |
261 #end raw | 223 #end raw |
262 </configfile> | 224 </configfile> |
263 <configfile name="shscript"> | 225 <configfile name="defuse_script"> |
264 #!/bin/bash | 226 #!/bin/bash |
265 ## define some things for cheetah proccessing | 227 ## define some things for cheetah proccessing |
266 #set $amp = chr(38) | 228 #set $amp = chr(38) |
267 #set $gt = chr(62) | 229 #set $gt = chr(62) |
268 ## substitute pathnames into config file | 230 ## substitute pathnames into config file |
289 <tests> | 251 <tests> |
290 </tests> | 252 </tests> |
291 <help> | 253 <help> |
292 **DeFuse** | 254 **DeFuse** |
293 | 255 |
294 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6.1_ manual for details. | 256 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6_ manual for details. |
295 | 257 |
296 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: | 258 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: |
297 - genome_fasta from Ensembl | 259 - genome_fasta from Ensembl |
298 - gene_models from Ensembl | 260 - gene_models from Ensembl |
299 - repeats_filename from UCSC RepeatMasker rmsk.txt | 261 - repeats_filename from UCSC RepeatMasker rmsk.txt |