Mercurial > repos > jjohnson > defuse8
comparison defuse.xml @ 0:63f23d5db27c draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/defuse commit 2c2fd38cb761ec57bac7a0bd376e6aa2b88265d0-dirty
| author | jjohnson |
|---|---|
| date | Mon, 20 May 2019 15:25:03 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:63f23d5db27c |
|---|---|
| 1 <tool id="defuse" name="DeFuse" version="@DEFUSE_VERSION@.1"> | |
| 2 <description>identify fusion transcripts</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <expand macro="defuse_requirement" /> | |
| 8 </requirements> | |
| 9 <command detect_errors="default"><![CDATA[ | |
| 10 #if $defuse_out.__str__ != 'None': | |
| 11 ## ln to output_dir in from_work_dir | |
| 12 mkdir -p $defuse_out.files_path && | |
| 13 ln -s $defuse_out.files_path output_dir && | |
| 14 #else | |
| 15 mkdir -p output_dir && | |
| 16 #end if | |
| 17 ## Put executable paths in config file | |
| 18 $__tool_directory__/config_sub.sh $defuse_config output_dir/defuse.cfg && | |
| 19 ## copy config to output | |
| 20 cp output_dir/defuse.cfg $config_txt && | |
| 21 ## make a data_dir and ln -s the input fastq | |
| 22 mkdir -p data_dir && | |
| 23 ln -s "$left_pairendreads" data_dir/reads_1.fastq && | |
| 24 ln -s "$right_pairendreads" data_dir/reads_2.fastq && | |
| 25 ## run | |
| 26 DATASET_DIRECTORY=`grep '^dataset_directory' output_dir/defuse.cfg | awk '{print \$NF}'` && | |
| 27 defuse_run.pl --name "$library_name" --config output_dir/defuse.cfg --dataset \$DATASET_DIRECTORY -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p \$GALAXY_SLOTS && | |
| 28 grep -v cluster_id output_dir/results.filtered.tsv | awk '{print $1}' > cluster_id_list && | |
| 29 get_fusion_fastq.pl --list cluster_id_list --output output_dir --fastq1 results.fusions_1.fq --fastq2 results.fusions_2.fq && | |
| 30 cp output_dir/results.* . && | |
| 31 cp `find -L output_dir -name defuse.log` $defuse_log | |
| 32 #if $defuse_out.__str__ != 'None': | |
| 33 && $__tool_directory__/make_html.sh $defuse_out $defuse_out.files_path | |
| 34 #end if | |
| 35 ]]></command> | |
| 36 <configfiles> | |
| 37 <configfile name="defuse_config"> | |
| 38 #import re | |
| 39 #if $refGenomeSource.genomeSource == "history": | |
| 40 #set config_file = $refGenomeSource.config.__str__ | |
| 41 #else | |
| 42 #set config_file = $refGenomeSource.index.value | |
| 43 #end if | |
| 44 #set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$' | |
| 45 #set fh = open($config_file) | |
| 46 #set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] | |
| 47 #set kv = [] | |
| 48 #for $line in $fh: | |
| 49 #set m = $re.match($pat,$line) | |
| 50 #if $m and len($m.groups()) == 2: | |
| 51 ## #echo $line | |
| 52 #if $m.groups()[0] in keys: | |
| 53 #set k = $m.groups()[0] | |
| 54 #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed": | |
| 55 ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory | |
| 56 #set v = $os.path.dirname($config_file) | |
| 57 #else: | |
| 58 #set v = $m.groups()[1] | |
| 59 #end if | |
| 60 #set kv = $kv + [[$k, $v]] | |
| 61 #end if | |
| 62 #end if | |
| 63 #end for | |
| 64 ## #echo $kv | |
| 65 #set ref_dict = dict($kv) | |
| 66 ## #echo $ref_dict | |
| 67 ## include raw $refGenomeSource.config.__str__ | |
| 68 # | |
| 69 # Configuration file for defuse | |
| 70 # | |
| 71 # At a minimum, change all values enclused by [] | |
| 72 # | |
| 73 | |
| 74 # Directory where the defuse code was unpacked | |
| 75 ## Default location in the tool/defuse directory | |
| 76 # source_directory = ${__root_dir__}/tools/defuse | |
| 77 source_directory = __DEFUSE_PATH__ | |
| 78 | |
| 79 # Directory where you want your dataset | |
| 80 dataset_directory = #slurp | |
| 81 #try | |
| 82 $ref_dict['dataset_directory'] | |
| 83 #except | |
| 84 /project/db/genomes/Hsapiens/hg19/defuse | |
| 85 #end try | |
| 86 | |
| 87 # Organism IDs | |
| 88 ensembl_organism = #slurp | |
| 89 #try | |
| 90 $ref_dict['ensembl_organism'] | |
| 91 #except | |
| 92 homo_sapiens | |
| 93 #end try | |
| 94 | |
| 95 ensembl_prefix = #slurp | |
| 96 #try | |
| 97 $ref_dict['ensembl_prefix'] | |
| 98 #except | |
| 99 Homo_sapiens | |
| 100 #end try | |
| 101 | |
| 102 ensembl_version = #slurp | |
| 103 #try | |
| 104 $ref_dict['ensembl_version'] | |
| 105 #except | |
| 106 71 | |
| 107 #end try | |
| 108 | |
| 109 ensembl_genome_version = #slurp | |
| 110 #try | |
| 111 $ref_dict['ensembl_genome_version'] | |
| 112 #except | |
| 113 GRCh37 | |
| 114 #end try | |
| 115 | |
| 116 ucsc_genome_version = #slurp | |
| 117 #try | |
| 118 $ref_dict['ucsc_genome_version'] | |
| 119 #except | |
| 120 hg19 | |
| 121 #end try | |
| 122 | |
| 123 ncbi_organism = #slurp | |
| 124 #try | |
| 125 $ref_dict['ncbi_organism'] | |
| 126 #except | |
| 127 Homo_sapiens | |
| 128 #end try | |
| 129 | |
| 130 ncbi_prefix = #slurp | |
| 131 #try | |
| 132 $ref_dict['ncbi_prefix'] | |
| 133 #except | |
| 134 Hs | |
| 135 #end try | |
| 136 | |
| 137 # Input genome and gene models | |
| 138 gene_models = #slurp | |
| 139 #try | |
| 140 $ref_dict['gene_models'] | |
| 141 #except | |
| 142 \$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).gtf | |
| 143 #end try | |
| 144 genome_fasta = #slurp | |
| 145 #try | |
| 146 $ref_dict['genome_fasta'] | |
| 147 #except | |
| 148 \$(dataset_directory)/\$(ensembl_prefix).\$(ensembl_genome_version).\$(ensembl_version).dna.chromosomes.fa | |
| 149 #end try | |
| 150 | |
| 151 # Repeat table from ucsc genome browser | |
| 152 repeats_filename = #slurp | |
| 153 #try | |
| 154 $ref_dict['repeats_filename'] | |
| 155 #except | |
| 156 \$(dataset_directory)/rmsk.txt | |
| 157 #end try | |
| 158 | |
| 159 # EST info downloaded from ucsc genome browser | |
| 160 est_fasta = #slurp | |
| 161 #try | |
| 162 $ref_dict['est_fasta'] | |
| 163 #except | |
| 164 \$(dataset_directory)/est.fa | |
| 165 #end try | |
| 166 est_alignments = #slurp | |
| 167 #try | |
| 168 $ref_dict['est_alignments'] | |
| 169 #except | |
| 170 \$(dataset_directory)/intronEst.txt | |
| 171 #end try | |
| 172 | |
| 173 # Unigene clusters downloaded from ncbi | |
| 174 unigene_fasta = #slurp | |
| 175 #try | |
| 176 $ref_dict['unigene_fasta'] | |
| 177 #except | |
| 178 \$(dataset_directory)/\$(ncbi_prefix).seq.uniq | |
| 179 #end try | |
| 180 | |
| 181 # Paths to external tools | |
| 182 bowtie_bin = __BOWTIE_BIN__ | |
| 183 bowtie_build_bin = __BOWTIE_BUILD_BIN__ | |
| 184 blat_bin = __BLAT_BIN__ | |
| 185 fatotwobit_bin = __FATOTWOBIT_BIN__ | |
| 186 gmap_bin = __GMAP_BIN__ | |
| 187 gmap_bin = __GMAP_BIN__ | |
| 188 gmap_setup_bin = __GMAP_SETUP_BIN__ | |
| 189 r_bin = __R_BIN__ | |
| 190 rscript_bin = __RSCRIPT_BIN__ | |
| 191 | |
| 192 # Directory where you want your dataset | |
| 193 gmap_index_directory = #slurp | |
| 194 #try | |
| 195 $ref_dict['gmap_index_directory'] | |
| 196 #except | |
| 197 #raw | |
| 198 $(dataset_directory)/gmap | |
| 199 #end raw | |
| 200 #end try | |
| 201 | |
| 202 #raw | |
| 203 # Dataset files | |
| 204 dataset_prefix = $(dataset_directory)/defuse | |
| 205 chromosome_prefix = $(dataset_prefix).dna.chromosomes | |
| 206 exons_fasta = $(dataset_prefix).exons.fa | |
| 207 cds_fasta = $(dataset_prefix).cds.fa | |
| 208 cdna_regions = $(dataset_prefix).cdna.regions | |
| 209 cdna_fasta = $(dataset_prefix).cdna.fa | |
| 210 reference_fasta = $(dataset_prefix).reference.fa | |
| 211 rrna_fasta = $(dataset_prefix).rrna.fa | |
| 212 ig_gene_list = $(dataset_prefix).ig.gene.list | |
| 213 repeats_regions = $(dataset_directory)/repeats.regions | |
| 214 est_split_fasta1 = $(dataset_directory)/est.1.fa | |
| 215 est_split_fasta2 = $(dataset_directory)/est.2.fa | |
| 216 est_split_fasta3 = $(dataset_directory)/est.3.fa | |
| 217 est_split_fasta4 = $(dataset_directory)/est.4.fa | |
| 218 est_split_fasta5 = $(dataset_directory)/est.5.fa | |
| 219 est_split_fasta6 = $(dataset_directory)/est.6.fa | |
| 220 est_split_fasta7 = $(dataset_directory)/est.7.fa | |
| 221 est_split_fasta8 = $(dataset_directory)/est.8.fa | |
| 222 est_split_fasta9 = $(dataset_directory)/est.9.fa | |
| 223 | |
| 224 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs | |
| 225 prefilter1 = $(unigene_fasta) | |
| 226 | |
| 227 # deFuse scripts and tools | |
| 228 scripts_directory = $(source_directory)/scripts | |
| 229 tools_directory = $(source_directory)/tools | |
| 230 data_directory = $(source_directory)/data | |
| 231 #end raw | |
| 232 | |
| 233 # Path to samtools, 0.1.8 is compiled for you, use other versions at your own risk | |
| 234 samtools_bin = #slurp | |
| 235 #try | |
| 236 $ref_dict['samtools_bin'] | |
| 237 #except | |
| 238 \$(source_directory)/external/samtools-0.1.8/samtools | |
| 239 #end try | |
| 240 | |
| 241 # Bowtie parameters | |
| 242 bowtie_threads = #slurp | |
| 243 #try | |
| 244 $ref_dict['bowtie_threads'] | |
| 245 #except | |
| 246 4 | |
| 247 #end try | |
| 248 bowtie_quals = #slurp | |
| 249 #try | |
| 250 $ref_dict['bowtie_quals'] | |
| 251 #except | |
| 252 --phred33-quals | |
| 253 #end try | |
| 254 bowtie_params = #slurp | |
| 255 #try | |
| 256 $ref_dict['bowtie_params'] | |
| 257 #except | |
| 258 --chunkmbs 200 | |
| 259 #end try | |
| 260 max_insert_size = #slurp | |
| 261 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": | |
| 262 $defuse_param.max_insert_size | |
| 263 #else | |
| 264 #try | |
| 265 $ref_dict['max_insert_size'] | |
| 266 #except | |
| 267 500 | |
| 268 #end try | |
| 269 #end if | |
| 270 | |
| 271 # Parameters for building the dataset | |
| 272 chromosomes = #slurp | |
| 273 #try | |
| 274 $ref_dict.chromosomes | |
| 275 #except | |
| 276 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT | |
| 277 #end try | |
| 278 mt_chromosome = #slurp | |
| 279 #try | |
| 280 $ref_dict['mt_chromosome'] | |
| 281 #except | |
| 282 MT | |
| 283 #end try | |
| 284 gene_sources = #slurp | |
| 285 #try | |
| 286 $ref_dict['gene_sources'] | |
| 287 #except | |
| 288 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding | |
| 289 #end try | |
| 290 ig_gene_sources = #slurp | |
| 291 #try | |
| 292 $ref_dict['ig_gene_sources'] | |
| 293 #except | |
| 294 IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene | |
| 295 #end try | |
| 296 rrna_gene_sources = #slurp | |
| 297 #try | |
| 298 $ref_dict['rrna_gene_sources'] | |
| 299 #except | |
| 300 Mt_rRNA,rRNA,rRNA_pseudogene | |
| 301 #end try | |
| 302 | |
| 303 # Blat sequences per job | |
| 304 num_blat_sequences = #slurp | |
| 305 #try | |
| 306 $ref_dict['num_blat_sequences'] | |
| 307 #except | |
| 308 10000 | |
| 309 #end try | |
| 310 | |
| 311 # Minimum gene fusion range | |
| 312 dna_concordant_length = #slurp | |
| 313 #if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "": | |
| 314 $defuse_param.dna_concordant_length | |
| 315 #else | |
| 316 #try | |
| 317 $ref_dict['dna_concordant_length'] | |
| 318 #except | |
| 319 2000 | |
| 320 #end try | |
| 321 #end if | |
| 322 | |
| 323 # Trim length for discordant reads (split reads are not trimmed) | |
| 324 discord_read_trim = #slurp | |
| 325 #if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "": | |
| 326 $defuse_param.discord_read_trim | |
| 327 #else | |
| 328 #try | |
| 329 $ref_dict['discord_read_trim'] | |
| 330 #except | |
| 331 50 | |
| 332 #end try | |
| 333 #end if | |
| 334 # Calculate extra annotations, fusion splice index and interrupted index | |
| 335 calculate_extra_annotations = #slurp | |
| 336 #if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "": | |
| 337 $defuse_param.calculate_extra_annotations | |
| 338 #else | |
| 339 #try | |
| 340 $ref_dict['calculate_extra_annotations'] | |
| 341 #except | |
| 342 no | |
| 343 #end try | |
| 344 #end if | |
| 345 # Filtering parameters | |
| 346 clustering_precision = #slurp | |
| 347 #if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != "" | |
| 348 $defuse_param.clustering_precision | |
| 349 #else | |
| 350 #try | |
| 351 $ref_dict['clustering_precision'] | |
| 352 #except | |
| 353 0.95 | |
| 354 #end try | |
| 355 #end if | |
| 356 span_count_threshold = #slurp | |
| 357 #if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != "" | |
| 358 $defuse_param.span_count_threshold | |
| 359 #else | |
| 360 #try | |
| 361 $ref_dict['span_count_threshold'] | |
| 362 #except | |
| 363 5 | |
| 364 #end try | |
| 365 #end if | |
| 366 percent_identity_threshold = #slurp | |
| 367 #if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != "" | |
| 368 $defuse_param.percent_identity_threshold | |
| 369 #else | |
| 370 #try | |
| 371 $ref_dict['percent_identity_threshold'] | |
| 372 #except | |
| 373 0.90 | |
| 374 #end try | |
| 375 #end if | |
| 376 split_min_anchor = #slurp | |
| 377 #if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != "" | |
| 378 $defuse_param.split_min_anchor | |
| 379 #else | |
| 380 #try | |
| 381 $ref_dict['split_min_anchor'] | |
| 382 #except | |
| 383 4 | |
| 384 #end try | |
| 385 #end if | |
| 386 splice_bias = #slurp | |
| 387 #if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != "" | |
| 388 $defuse_param.splice_bias | |
| 389 #else | |
| 390 #try | |
| 391 $ref_dict['splice_bias'] | |
| 392 #except | |
| 393 10 | |
| 394 #end try | |
| 395 #end if | |
| 396 denovo_assembly = #slurp | |
| 397 #if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != "" | |
| 398 $defuse_param.denovo_assembly | |
| 399 #else | |
| 400 #try | |
| 401 $ref_dict['denovo_assembly'] | |
| 402 #except | |
| 403 no | |
| 404 #end try | |
| 405 #end if | |
| 406 probability_threshold = #slurp | |
| 407 #if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != "" | |
| 408 $defuse_param.probability_threshold | |
| 409 #else | |
| 410 #try | |
| 411 $ref_dict['probability_threshold'] | |
| 412 #except | |
| 413 0.50 | |
| 414 #end try | |
| 415 #end if | |
| 416 positive_controls = \$(data_directory)/controls.txt | |
| 417 | |
| 418 # Use multiple exon transcripts for stats calculations (yes/no) | |
| 419 # should be enabled for very small libraries | |
| 420 multi_exon_transcripts_stats = #slurp | |
| 421 #if $defuse_param.settings == "full" and $defuse_param.multi_exon_transcripts_stats.__str__ != "" | |
| 422 $defuse_param.multi_exon_transcripts_stats | |
| 423 #else | |
| 424 #try | |
| 425 $ref_dict['multi_exon_transcripts_stats'] | |
| 426 #except | |
| 427 no | |
| 428 #end try | |
| 429 #end if | |
| 430 | |
| 431 # Position density when calculating covariance | |
| 432 covariance_sampling_density = #slurp | |
| 433 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" | |
| 434 $defuse_param.covariance_sampling_density | |
| 435 #else | |
| 436 #try | |
| 437 $ref_dict['covariance_sampling_density'] | |
| 438 #except | |
| 439 0.01 | |
| 440 #end try | |
| 441 #end if | |
| 442 | |
| 443 # Maximum number of alignments for a read pair | |
| 444 # Pairs with more alignments are filtered | |
| 445 max_paired_alignments = #slurp | |
| 446 #if $defuse_param.settings == "full" and $defuse_param.max_paired_alignments.__str__ != "" | |
| 447 $defuse_param.max_paired_alignments | |
| 448 #else | |
| 449 #try | |
| 450 $ref_dict['max_paired_alignments'] | |
| 451 #except | |
| 452 10 | |
| 453 #end try | |
| 454 #end if | |
| 455 | |
| 456 # Number of reads for each job in split | |
| 457 reads_per_job = #slurp | |
| 458 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" | |
| 459 $defuse_param.reads_per_job | |
| 460 #else | |
| 461 #try | |
| 462 $ref_dict['reads_per_job'] | |
| 463 #except | |
| 464 1000000 | |
| 465 #end try | |
| 466 #end if | |
| 467 | |
| 468 #raw | |
| 469 # If you have command line 'mail' and wish to be notified | |
| 470 # mailto = andrew.mcpherson@gmail.com | |
| 471 | |
| 472 # Remove temp files | |
| 473 remove_job_files = yes | |
| 474 remove_job_temp_files = yes | |
| 475 | |
| 476 qsub_params = "" | |
| 477 | |
| 478 #end raw | |
| 479 | |
| 480 </configfile> | |
| 481 </configfiles> | |
| 482 <inputs> | |
| 483 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> | |
| 484 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> | |
| 485 <param name="library_name" type="text" value="unknown" label="library name" help="Value to put in the results library_name column"> | |
| 486 <validator type="length" min="1"/> | |
| 487 </param> | |
| 488 <conditional name="refGenomeSource"> | |
| 489 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> | |
| 490 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> | |
| 491 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> | |
| 492 </param> | |
| 493 <when value="indexed"> | |
| 494 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> | |
| 495 <options from_file="defuse_reference.loc"> | |
| 496 <column name="name" index="1"/> | |
| 497 <column name="value" index="3"/> | |
| 498 <filter type="sort_by" column="0" /> | |
| 499 <validator type="no_options" message="No indexes are available" /> | |
| 500 </options> | |
| 501 </param> | |
| 502 </when> | |
| 503 <when value="history"> | |
| 504 <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/> | |
| 505 </when> <!-- history --> | |
| 506 </conditional> <!-- refGenomeSource --> | |
| 507 <conditional name="defuse_param"> | |
| 508 <param name="settings" type="select" label="Defuse parameter settings" help=""> | |
| 509 <option value="preSet">Default settings</option> | |
| 510 <option value="full">Full parameter list</option> | |
| 511 </param> | |
| 512 <when value="preSet" /> | |
| 513 <when value="full"> | |
| 514 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> | |
| 515 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> | |
| 516 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> | |
| 517 <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help=""> | |
| 518 <option value="">Use Default</option> | |
| 519 <option value="no">no</option> | |
| 520 <option value="yes">yes</option> | |
| 521 </param> | |
| 522 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> | |
| 523 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
| 524 </param> | |
| 525 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> | |
| 526 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> | |
| 527 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
| 528 </param> | |
| 529 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> | |
| 530 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> | |
| 531 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> | |
| 532 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 533 </param> | |
| 534 <param name="multi_exon_transcripts_stats" type="select" label="Use multiple exon transcripts for stats calculations" help="should be enabled for very small libraries"> | |
| 535 <option value="no" selected="true">no</option> | |
| 536 <option value="yes">yes</option> | |
| 537 </param> | |
| 538 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> | |
| 539 <help>Position density when calculating covariance</help> | |
| 540 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 541 </param> | |
| 542 <param name="max_paired_alignments" type="integer" value="10" optional="true" label="max_paired_alignments"> | |
| 543 <help>Maximum number of alignments for a read pair, Pairs with more alignments are filtered, default is 10</help> | |
| 544 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="1" max="100"/> | |
| 545 </param> | |
| 546 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> | |
| 547 <option value="">Use Default</option> | |
| 548 <option value="no">no</option> | |
| 549 <option value="yes">yes</option> | |
| 550 </param> | |
| 551 <!-- | |
| 552 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> | |
| 553 --> | |
| 554 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> | |
| 555 </when> <!-- full --> | |
| 556 </conditional> <!-- defuse_param --> | |
| 557 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" | |
| 558 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, | |
| 559 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> | |
| 560 </inputs> | |
| 561 <outputs> | |
| 562 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | |
| 563 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> | |
| 564 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> | |
| 565 <filter>keep_output == True</filter> | |
| 566 </data> | |
| 567 <data format="defuse.results.tsv" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" from_work_dir="results.classify.tsv"/> | |
| 568 <data format="defuse.results.tsv" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" from_work_dir="results.filtered.tsv"/> | |
| 569 <data format="fastqsanger" name="results_fusions1_fq" label="${tool.name} on ${on_string}: fusions_1.fq" from_work_dir="results.fusions_1.fq" /> | |
| 570 <data format="fastqsanger" name="results_fusions2_fq" label="${tool.name} on ${on_string}: fusions_2.fq" from_work_dir="results.fusions_2.fq" /> | |
| 571 <!-- | |
| 572 expression_plot | |
| 573 circos plot | |
| 574 --> | |
| 575 </outputs> | |
| 576 | |
| 577 <tests> | |
| 578 </tests> | |
| 579 <help> | |
| 580 **DeFuse** | |
| 581 | |
| 582 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. | |
| 583 | |
| 584 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138 | |
| 585 | |
| 586 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page | |
| 587 | |
| 588 ------ | |
| 589 | |
| 590 **Inputs** | |
| 591 | |
| 592 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). | |
| 593 | |
| 594 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. | |
| 595 | |
| 596 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: | |
| 597 - genome_fasta from Ensembl | |
| 598 - gene_models from Ensembl | |
| 599 - repeats_filename from UCSC RepeatMasker rmsk.txt | |
| 600 - est_fasta from UCSC | |
| 601 - est_alignments from UCSC intronEst.txt | |
| 602 - unigene_fasta from NCBI | |
| 603 | |
| 604 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 | |
| 605 | |
| 606 ------ | |
| 607 | |
| 608 **Outputs** | |
| 609 | |
| 610 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates. | |
| 611 | |
| 612 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt. | |
| 613 | |
| 614 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order): | |
| 615 | |
| 616 - **Identification** | |
| 617 - cluster_id : random identifier assigned to each prediction | |
| 618 - library_name : library name given on the command line of defuse | |
| 619 - gene1 : ensembl id of gene 1 | |
| 620 - gene2 : ensembl id of gene 2 | |
| 621 - gene_name1 : name of gene 1 | |
| 622 - gene_name2 : name of gene 2 | |
| 623 - **Evidence** | |
| 624 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable | |
| 625 - concordant_ratio : proportion of spanning reads considered concordant by blat | |
| 626 - denovo_min_count : minimum kmer count across denovo assembled sequence | |
| 627 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly | |
| 628 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 629 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1 | |
| 630 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2 | |
| 631 - min_map_count : minimum of the number of genomic mappings for each spanning read | |
| 632 - max_map_count : maximum of the number of genomic mappings for each spanning read | |
| 633 - mean_map_count : average of the number of genomic mappings for each spanning read | |
| 634 - num_multi_map : number of spanning reads that map to more than one genomic location | |
| 635 - span_count : number of spanning reads supporting the fusion | |
| 636 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage | |
| 637 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage | |
| 638 - span_coverage_min : minimum of span_coverage1 and span_coverage2 | |
| 639 - span_coverage_max : maximum of span_coverage1 and span_coverage2 | |
| 640 - splitr_count : number of split reads supporting the prediction | |
| 641 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 642 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 643 - splitr_sequence : fusion sequence predicted by split reads | |
| 644 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive | |
| 645 - **Annotation** | |
| 646 - adjacent : fusion between adjacent genes | |
| 647 - altsplice : fusion likely the product of alternative splicing between adjacent genes | |
| 648 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1 | |
| 649 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2 | |
| 650 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2 | |
| 651 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2 | |
| 652 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands | |
| 653 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna | |
| 654 - deletion : fusion produced by a genomic deletion | |
| 655 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est | |
| 656 - eversion : fusion produced by a genomic eversion | |
| 657 - exonboundaries : fusion splice at exon boundaries | |
| 658 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons | |
| 659 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons | |
| 660 - gene_chromosome1 : chromosome of gene 1 | |
| 661 - gene_chromosome2 : chromosome of gene 2 | |
| 662 - gene_end1 : end position for gene 1 | |
| 663 - gene_end2 : end position for gene 2 | |
| 664 - gene_location1 : location of breakpoint in gene 1 | |
| 665 - gene_location2 : location of breakpoint in gene 2 | |
| 666 - gene_start1 : start of gene 1 | |
| 667 - gene_start2 : start of gene 2 | |
| 668 - gene_strand1 : strand of gene 1 | |
| 669 - gene_strand2 : strand of gene 2 | |
| 670 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome | |
| 671 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint | |
| 672 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint | |
| 673 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
| 674 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream | |
| 675 - interchromosomal : fusion produced by an interchromosomal translocation | |
| 676 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1 | |
| 677 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2 | |
| 678 - inversion : fusion produced by genomic inversion | |
| 679 - orf : fusion combines genes in a way that preserves a reading frame | |
| 680 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt) | |
| 681 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement | |
| 682 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region | |
| 683 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region | |
| 684 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2 | |
| 685 - splice_score : number of nucleotides similar to GTAG at fusion splice | |
| 686 - num_splice_variants : number of potential splice variants for this gene pair | |
| 687 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2 | |
| 688 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1 | |
| 689 | |
| 690 | |
| 691 **Example** | |
| 692 | |
| 693 results.tsv:: | |
| 694 | |
| 695 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2 | |
| 696 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 - | |
| 697 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - - | |
| 698 | |
| 699 </help> | |
| 700 <expand macro="citations"/> | |
| 701 </tool> |
