Mercurial > repos > jjohnson > defuse
comparison create_reference_dataset.xml @ 19:1af6f32ff592
Add datamanager, move to defuse_reference.loc
| author | Jim Johnson <jj@umn.edu> |
|---|---|
| date | Fri, 21 Jun 2013 14:46:11 -0500 |
| parents | 547d8db4673e |
| children | 3099cec648e7 |
comparison
equal
deleted
inserted
replaced
| 18:547d8db4673e | 19:1af6f32ff592 |
|---|---|
| 5 <requirement type="package" version="0.1.18">samtools</requirement> | 5 <requirement type="package" version="0.1.18">samtools</requirement> |
| 6 <requirement type="package" version="1.0.0">bowtie</requirement> | 6 <requirement type="package" version="1.0.0">bowtie</requirement> |
| 7 <requirement type="package" version="2013-05-09">gmap</requirement> | 7 <requirement type="package" version="2013-05-09">gmap</requirement> |
| 8 <requirement type="package" version="latest">kent</requirement> | 8 <requirement type="package" version="latest">kent</requirement> |
| 9 </requirements> | 9 </requirements> |
| 10 <command interpreter="command"> /bin/bash $shscript </command> | 10 <command interpreter="command"> /bin/bash $defuse_script </command> |
| 11 <inputs> | 11 <inputs> |
| 12 <conditional name="genome"> | 12 <conditional name="genome"> |
| 13 <param name="choice" type="select" label="Select a Genome Build"> | 13 <param name="choice" type="select" label="Select a Genome Build"> |
| 14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> | 14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option> |
| 15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> | 15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option> |
| 110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> | 110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" /> |
| 111 </when> | 111 </when> |
| 112 </conditional> | 112 </conditional> |
| 113 </inputs> | 113 </inputs> |
| 114 <outputs> | 114 <outputs> |
| 115 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | 115 <data format="defuse.conf" name="config_txt" label="${tool.name} on ${genome.ensembl_genome_version} : config.txt"/> |
| 116 </outputs> | 116 </outputs> |
| 117 <stdio> | 117 <stdio> |
| 118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" /> | 118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" /> |
| 119 <regex match="Error:" | 119 <regex match="Error:" |
| 120 source="both" | 120 source="both" |
| 122 description="Error running Create DeFuse Reference" /> | 122 description="Error running Create DeFuse Reference" /> |
| 123 | 123 |
| 124 </stdio> | 124 </stdio> |
| 125 <configfiles> | 125 <configfiles> |
| 126 <configfile name="defuse_config"> | 126 <configfile name="defuse_config"> |
| 127 #import ast | |
| 128 # | 127 # |
| 129 # Configuration file for defuse | 128 # Configuration file for defuse |
| 130 # | 129 # |
| 131 # At a minimum, change all values enclused by [] | 130 # Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ |
| 131 # will be set by the runtime script using the ENV PATH | |
| 132 # | 132 # |
| 133 | 133 |
| 134 # Directory where the defuse code was unpacked | 134 # Directory where the defuse code was unpacked |
| 135 ## Default location in the tool/defuse directory | |
| 136 # source_directory = ${__root_dir__}/tools/defuse | |
| 137 source_directory = __DEFUSE_PATH__ | 135 source_directory = __DEFUSE_PATH__ |
| 138 | 136 |
| 137 # Organism IDs | |
| 139 ensembl_organism = $genome.ensembl_organism | 138 ensembl_organism = $genome.ensembl_organism |
| 140 ensembl_prefix = $genome.ensembl_prefix | 139 ensembl_prefix = $genome.ensembl_prefix |
| 141 ensembl_version = $genome.ensembl_version | 140 ensembl_version = $genome.ensembl_version |
| 142 ensembl_genome_version = $genome.ensembl_genome_version | 141 ensembl_genome_version = $genome.ensembl_genome_version |
| 143 ucsc_genome_version = $genome.ucsc_genome_version | 142 ucsc_genome_version = $genome.ucsc_genome_version |
| 208 scripts_directory = $(source_directory)/scripts | 207 scripts_directory = $(source_directory)/scripts |
| 209 tools_directory = $(source_directory)/tools | 208 tools_directory = $(source_directory)/tools |
| 210 data_directory = $(source_directory)/data | 209 data_directory = $(source_directory)/data |
| 211 #end raw | 210 #end raw |
| 212 | 211 |
| 213 #raw | |
| 214 # Bowtie parameters | |
| 215 bowtie_threads = 1 | |
| 216 bowtie_quals = --phred33-quals | |
| 217 max_insert_size = 500 | |
| 218 #end raw | |
| 219 | |
| 220 # Parameters for building the dataset | 212 # Parameters for building the dataset |
| 221 chromosomes = $genome.chromosomes | 213 chromosomes = $genome.chromosomes |
| 222 mt_chromosome = $genome.mt_chromosome | 214 mt_chromosome = $genome.mt_chromosome |
| 223 gene_sources = $genome.gene_sources | 215 gene_sources = $genome.gene_sources |
| 224 ig_gene_sources = $genome.ig_gene_sources | 216 ig_gene_sources = $genome.ig_gene_sources |
| 225 rrna_gene_sources = $genome.rrna_gene_sources | 217 rrna_gene_sources = $genome.rrna_gene_sources |
| 226 | 218 |
| 227 #raw | 219 #raw |
| 228 # Blat sequences per job | |
| 229 num_blat_sequences = 10000 | |
| 230 | |
| 231 # Minimum gene fusion range | |
| 232 dna_concordant_length = 2000 | |
| 233 | |
| 234 # Trim length for discordant reads (split reads are not trimmed) | |
| 235 discord_read_trim = 50 | |
| 236 | |
| 237 # Calculate extra annotations, fusion splice index and interrupted index | |
| 238 calculate_extra_annotations = no | |
| 239 | |
| 240 # Filtering parameters | |
| 241 clustering_precision = 0.95 | |
| 242 span_count_threshold = 5 | |
| 243 percent_identity_threshold = 0.90 | |
| 244 split_min_anchor = 4 | |
| 245 splice_bias = 10 | |
| 246 positive_controls = $(data_directory)/controls.txt | |
| 247 probability_threshold = 0.50 | |
| 248 | |
| 249 # Position density when calculating covariance | |
| 250 covariance_sampling_density = 0.01 | |
| 251 | |
| 252 # Number of reads for each job in split | |
| 253 reads_per_job = 1000000 | |
| 254 | |
| 255 # If you have command line 'mail' and wish to be notified | |
| 256 mailto = andrew.mcpherson@gmail.com | |
| 257 | |
| 258 # Remove temp files | 220 # Remove temp files |
| 259 remove_job_files = yes | 221 remove_job_files = yes |
| 260 remove_job_temp_files = yes | 222 remove_job_temp_files = yes |
| 261 #end raw | 223 #end raw |
| 262 </configfile> | 224 </configfile> |
| 263 <configfile name="shscript"> | 225 <configfile name="defuse_script"> |
| 264 #!/bin/bash | 226 #!/bin/bash |
| 265 ## define some things for cheetah proccessing | 227 ## define some things for cheetah proccessing |
| 266 #set $amp = chr(38) | 228 #set $amp = chr(38) |
| 267 #set $gt = chr(62) | 229 #set $gt = chr(62) |
| 268 ## substitute pathnames into config file | 230 ## substitute pathnames into config file |
| 289 <tests> | 251 <tests> |
| 290 </tests> | 252 </tests> |
| 291 <help> | 253 <help> |
| 292 **DeFuse** | 254 **DeFuse** |
| 293 | 255 |
| 294 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6.1_ manual for details. | 256 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6_ manual for details. |
| 295 | 257 |
| 296 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: | 258 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: |
| 297 - genome_fasta from Ensembl | 259 - genome_fasta from Ensembl |
| 298 - gene_models from Ensembl | 260 - gene_models from Ensembl |
| 299 - repeats_filename from UCSC RepeatMasker rmsk.txt | 261 - repeats_filename from UCSC RepeatMasker rmsk.txt |
