comparison maker.xml @ 1:c98b3fa910bf draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/maker commit a1535cdf1f6fe06a9b11110c0c9627eef732d398
author iuc
date Sun, 01 Jul 2018 16:11:34 -0400
parents 0cb75547ecc2
children be967733bf81
comparison
equal deleted inserted replaced
0:0cb75547ecc2 1:c98b3fa910bf
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="maker" name="Maker" profile="16.04" version="@VERSION@"> 2 <tool id="maker" name="Maker" profile="16.04" version="@VERSION@.1">
3 <description>genome annotation pipeline</description> 3 <description>genome annotation pipeline</description>
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command><![CDATA[ 8 <command><![CDATA[
9 RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries &&
10 mkdir lib &&
11 export REPEATMASKER_LIB_DIR=\$(pwd)/lib &&
12 for file in \$(ls \$RM_LIB_PATH) ; do ln -s \$RM_LIB_PATH/\$file lib/\$file ; done &&
13 #if $repeat_masking.repeat_source.source_type == "repbase":
14 cp '${repeat_masking.repeat_source.repbase_file}' 'lib/${repeat_masking.repeat_source.repbase_file_name}' &&
15 #end if
16
9 maker -CTL 17 maker -CTL
10 18
11 && 19 &&
12 20
13 sed "s/cpus=/cpus=\${GALAXY_SLOTS:-4}/g" '$ctl' > maker_opts.ctl 21 sed "s/cpus=/cpus=\${GALAXY_SLOTS:-4}/g" '$ctl' > maker_opts.ctl
102 #else 110 #else
103 protein_gff= # aligned protein homology evidence from an external GFF3 file 111 protein_gff= # aligned protein homology evidence from an external GFF3 file
104 #end if 112 #end if
105 113
106 #-----Repeat Masking (leave values blank to skip repeat masking) 114 #-----Repeat Masking (leave values blank to skip repeat masking)
107 #if $repeat_masking.repeatmasker.do_rm == 'simple' 115 #if $repeat_masking.repeat_source.source_type == 'repbase'
108 model_org=simple # select a model organism for RepBase masking in RepeatMasker 116
117 #if $repeat_masking.repeat_source.species_source.species_from_list == 'yes'
118 model_org=${repeat_masking.repeat_source.species_source.species_list}
119 #else
120 model_org=${repeat_masking.repeat_source.species_source.species_name}
121 #end if
122
109 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker 123 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
110 #else if $repeat_masking.repeatmasker.do_rm == 'lib' 124 softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
125
126 #else if $repeat_masking.repeat_source.source_type == 'library'
111 model_org= # select a model organism for RepBase masking in RepeatMasker 127 model_org= # select a model organism for RepBase masking in RepeatMasker
112 rmlib=${repeat_masking.repeatmasker.rmlib} # provide an organism specific repeat library in fasta format for RepeatMasker 128 rmlib=${repeat_masking.repeat_source.repeatmasker.rmlib} # provide an organism specific repeat library in fasta format for RepeatMasker
129
130 #if $repeat_masking.repeat_source.repeat_protein
131 repeat_protein=${repeat_masking.repeat_source.repeat_protein} # provide a fasta file of transposable element proteins for RepeatRunner
132 #else
133 repeat_protein= # provide a fasta file of transposable element proteins for RepeatRunner
134 #end if
135 #if $repeat_masking.repeat_source.rm_gff
136 rm_gff=${repeat_masking.repeat_source.rm_gff} # pre-identified repeat elements from an external GFF3 file
137 #else
138 rm_gff= # pre-identified repeat elements from an external GFF3 file
139 #end if
140
141 softmask=${repeat_masking.repeat_source.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
142
113 #else 143 #else
114 model_org= # select a model organism for RepBase masking in RepeatMasker 144 model_org= # select a model organism for RepBase masking in RepeatMasker
115 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker 145 rmlib= # provide an organism specific repeat library in fasta format for RepeatMasker
116 #end if 146 softmask=0 # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
117 #if $repeat_masking.repeat_protein 147 #end if
118 repeat_protein=${repeat_masking.repeat_protein} # provide a fasta file of transposable element proteins for RepeatRunner 148
119 #else
120 repeat_protein= # provide a fasta file of transposable element proteins for RepeatRunner
121 #end if
122 #if $repeat_masking.rm_gff
123 rm_gff=${repeat_masking.rm_gff} # pre-identified repeat elements from an external GFF3 file
124 #else
125 rm_gff= # pre-identified repeat elements from an external GFF3 file
126 #end if
127 prok_rm=0 # forces MAKER to repeatmask prokaryotes (no reason to change this), 1 = yes, 0 = no 149 prok_rm=0 # forces MAKER to repeatmask prokaryotes (no reason to change this), 1 = yes, 0 = no
128 softmask=${repeat_masking.softmask} # use soft-masking rather than hard-masking in BLAST (i.e. seg and dust filtering)
129 150
130 #-----Gene Prediction 151 #-----Gene Prediction
131 #if $abinitio_gene_prediction.snaphmm 152 #if $abinitio_gene_prediction.snaphmm
132 snaphmm=${abinitio_gene_prediction.snaphmm} # SNAP HMM file 153 snaphmm=${abinitio_gene_prediction.snaphmm} # SNAP HMM file
133 #else 154 #else
357 </conditional> 378 </conditional>
358 <param name="unmask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Run ab-initio prediction programs on unmasked sequence" help="Predictors will look for genes in repeated elements (like transposons). Only useful when you believe that transposons might have been integrated into a real gene structure (rare)."/> 379 <param name="unmask" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Run ab-initio prediction programs on unmasked sequence" help="Predictors will look for genes in repeated elements (like transposons). Only useful when you believe that transposons might have been integrated into a real gene structure (rare)."/>
359 </section> 380 </section>
360 381
361 <section name="repeat_masking" title="Repeat masking" expanded="True"> 382 <section name="repeat_masking" title="Repeat masking" expanded="True">
362 <conditional name="repeatmasker"> 383 <conditional name="repeat_source">
363 <param name="do_rm" type="select" label="Enable repeat masking with RepeatMasker"> 384 <param label="Repeat library source" name="source_type" type="select">
385 <option selected="true" value="repbase">RepBase</option>
386 <option value="library">Custom library of repeats</option>
387 <option value="no">Disable repeat masking (not recommended)</option>
388 </param>
389 <when value="repbase">
390 <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" />
391 <param name="repbase_file_name" type="hidden" value="RMRBSeqs.embl"/> <!-- This is an ugly hack to allow testing with a fake repbase -->
392 <conditional name="species_source">
393 <param label="Select species name from a list?" name="species_from_list" type="select">
394 <option value="yes" selected="true">Yes</option>
364 <option value="no">No</option> 395 <option value="no">No</option>
365 <option value="simple" selected="true">Yes, run RepeatMasker with default simple models</option> 396 </param>
366 <option value="lib">Yes, use an organism specific repeat library (fasta)</option> 397 <when value="yes">
367 </param> 398 <param name="species_list" type="select" label="Species">
368 <!-- full repbase cannot be redistributed (for licensing reasons, see https://hpc.nih.gov/apps/repbase_license.html), 399 <option value="anopheles" selected="true">anopheles</option>
369 using only the default maker base 400 <option value="arabidopsis">arabidopsis</option>
370 model_org is ignored and replaced by 'simple' if the full RepBase is not available. 401 <option value="artiodactyl">artiodactyl</option>
371 model_org=simple means maker will search using the simple models shipped by default 402 <option value="aspergillus">aspergillus</option>
372 Installing RepBase requires to replace files in the RepeatMasker installation dir 403 <option value="carnivore">carnivore</option>
373 --> 404 <option value="cat">cat</option>
374 <when value="no"/> 405 <option value="chicken">chicken</option>
375 <when value="simple"/> 406 <option value="ciona intestinalis">ciona intestinalis</option>
376 <when value="lib"> 407 <option value="ciona savignyi">ciona savignyi</option>
377 <param name="rmlib" type="data" format="fasta" label="Organism specific repeat library for RepeatMasker (fasta)"/> 408 <option value="cow">cow</option>
378 </when> 409 <option value="danio">danio</option>
410 <option value="diatoaea">diatomea</option>
411 <option value="dog">dog</option>
412 <option value="drosophila">drosophila</option>
413 <option value="elegans">elegans</option>
414 <option value="fugu">fugu</option>
415 <option value="fungi" selected="true">fungi</option>
416 <option value="human">human</option>
417 <option value="maize">maize</option>
418 <option value="mammal">mammal</option>
419 <option value="mouse">mouse</option>
420 <option value="pig">pig</option>
421 <option value="rat">rat</option>
422 <option value="rice">rice</option>
423 <option value="rodentia">rodentia</option>
424 <option value="ruminantia">ruminantia</option>
425 <option value="wheat">wheat</option>
426 </param>
427 </when>
428 <when value="no">
429 <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" />
430 </when>
431 </conditional>
432 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
433 </when>
434 <when value="library">
435 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner" optional="True" />
436 <param name="rm_gff" type="data" format="fasta" label="Pre-identified repeat elements from an external GFF file" optional="True" />
437 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
438 </when>
439 <when value="no"/>
379 </conditional> 440 </conditional>
380 <param name="repeat_protein" type="data" format="fasta" label="Transposable element protein sequences for RepeatRunner (fasta)" help="Leave empty to skip" optional="True"/>
381 <param name="rm_gff" type="data" format="gff" label="Pre-identified repeat elements from an external GFF file" help="Leave empty to skip" optional="True"/>
382 <param name="softmask" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Use soft-masking rather than hard-masking in BLAST" help="i.e. seg and dust filtering"/>
383 </section> 441 </section>
384 442
385 <section name="gene_prediction" title="Other predictions" expanded="True"> 443 <section name="gene_prediction" title="Other predictions" expanded="True">
386 <param name="pred_gff" type="data" format="gff" label="Predictions from an external GFF3 file" optional="True"/> 444 <param name="pred_gff" type="data" format="gff" label="Predictions from an external GFF3 file" optional="True"/>
387 <param name="model_gff" type="data" format="gff" label="Annotated gene models an external GFF3 file" help="annotation pass-through" optional="True"/> 445 <param name="model_gff" type="data" format="gff" label="Annotated gene models from an external GFF3 file" help="annotation pass-through" optional="True"/>
388 <param name="trna" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Find tRNAs with tRNAscan"/> 446 <param name="trna" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Find tRNAs with tRNAscan"/>
389 <param name="snoscan_rrna" type="data" format="fasta" label="rRNA file to have Snoscan find snoRNAs" optional="True"/> 447 <param name="snoscan_rrna" type="data" format="fasta" label="rRNA file to have Snoscan find snoRNAs" optional="True"/>
390 </section> 448 </section>
391 449
392 <section name="advanced" title="Advanced settings" expanded="False"> 450 <section name="advanced" title="Advanced settings" expanded="False">
426 <tests> 484 <tests>
427 <test> 485 <test>
428 <param name="genome" value="genome.fasta"/> 486 <param name="genome" value="genome.fasta"/>
429 <param name="est_evidences|est" value="est.fasta"/> 487 <param name="est_evidences|est" value="est.fasta"/>
430 <param name="est_evidences|est2genome" value="1"/> 488 <param name="est_evidences|est2genome" value="1"/>
489 <param name="repeat_masking|repeat_source|source_type" value="no"/>
431 <output name="output_gff" file="annot.gff3"/> 490 <output name="output_gff" file="annot.gff3"/>
432 <output name="output_evidences" file="evidences.gff3" compare="sim_size"/> 491 <output name="output_evidences" file="evidences.gff3" compare="sim_size"/>
433 </test> 492 </test>
434 <test> 493 <test>
435 <param name="genome" value="genome.fasta"/> 494 <param name="genome" value="genome.fasta"/>
436 <param name="organism_type" value="prokaryotic"/> 495 <param name="organism_type" value="prokaryotic"/>
437 <param name="est_evidences|est" value="est.fasta"/> 496 <param name="est_evidences|est" value="est.fasta"/>
438 <param name="est_evidences|est2genome" value="1"/> 497 <param name="est_evidences|est2genome" value="1"/>
498 <param name="repeat_masking|repeat_source|source_type" value="no"/>
439 <output name="output_gff" file="annot_proc.gff3"/> 499 <output name="output_gff" file="annot_proc.gff3"/>
440 <output name="output_evidences" file="evidences_proc.gff3" compare="sim_size"/> 500 <output name="output_evidences" file="evidences_proc.gff3" compare="sim_size"/>
441 </test> 501 </test>
442 <test> 502 <test>
443 <param name="genome" value="genome.fasta"/> 503 <param name="genome" value="genome.fasta"/>
444 <param name="reannotation|reannotate" value="yes"/> 504 <param name="reannotation|reannotate" value="yes"/>
445 <param name="reannotation|maker_gff" value="evidences.gff3"/> 505 <param name="reannotation|maker_gff" value="evidences.gff3"/>
446 <param name="reannotation|est_pass" value="true"/> 506 <param name="reannotation|est_pass" value="true"/>
447 <param name="est_evidences|est2genome" value="1"/> 507 <param name="est_evidences|est2genome" value="1"/>
508 <param name="repeat_masking|repeat_source|source_type" value="no"/>
448 <output name="output_gff" file="annot_reuse.gff3"/> 509 <output name="output_gff" file="annot_reuse.gff3"/>
449 <output name="output_evidences" file="evidences_reuse.gff3" compare="sim_size"/> 510 <output name="output_evidences" file="evidences_reuse.gff3" compare="sim_size"/>
450 </test> 511 </test>
451 <test> 512 <test>
452 <param name="genome" value="genome.fasta"/> 513 <param name="genome" value="genome.fasta"/>
453 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/> 514 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/>
454 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="builtin"/> 515 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="builtin"/>
455 <param name="abinitio_gene_prediction|aug_prediction|augustus_species" value="human"/> 516 <param name="abinitio_gene_prediction|aug_prediction|augustus_species" value="human"/>
456 <param name="est_evidences|est" value="est.fasta"/> 517 <param name="est_evidences|est" value="est.fasta"/>
457 <param name="est_evidences|est2genome" value="1"/> 518 <param name="est_evidences|est2genome" value="1"/>
519 <param name="repeat_masking|repeat_source|source_type" value="no"/>
458 <output name="output_gff" file="annot_human.gff3" compare="sim_size"/> 520 <output name="output_gff" file="annot_human.gff3" compare="sim_size"/>
459 <output name="output_evidences" file="evidences_human.gff3" compare="sim_size"/> 521 <output name="output_evidences" file="evidences_human.gff3" compare="sim_size"/>
460 </test> 522 </test>
461 <test> 523 <test>
462 <param name="genome" value="genome.fasta"/> 524 <param name="genome" value="genome.fasta"/>
463 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/> 525 <param name="abinitio_gene_prediction|snaphmm" value="snap.hmm"/>
464 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="history"/> 526 <param name="abinitio_gene_prediction|aug_prediction|augustus_mode" value="history"/>
465 <param name="abinitio_gene_prediction|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/> 527 <param name="abinitio_gene_prediction|aug_prediction|augustus_model" value="local.tar.gz" ftype="augustus"/>
466 <param name="est_evidences|est" value="est.fasta"/> 528 <param name="est_evidences|est" value="est.fasta"/>
467 <param name="est_evidences|est2genome" value="1"/> 529 <param name="est_evidences|est2genome" value="1"/>
530 <param name="repeat_masking|repeat_source|source_type" value="no"/>
468 <output name="output_gff" file="annot_model.gff3" compare="sim_size"/> 531 <output name="output_gff" file="annot_model.gff3" compare="sim_size"/>
469 <output name="output_evidences" file="evidences_model.gff3" compare="sim_size"/> 532 <output name="output_evidences" file="evidences_model.gff3" compare="sim_size"/>
470 </test> 533 </test>
471 <test> 534 <test>
472 <param name="genome" value="genome.fasta"/> 535 <param name="genome" value="genome.fasta"/>
473 <param name="est_evidences|est" value="est.fasta"/> 536 <param name="est_evidences|est" value="est.fasta"/>
474 <param name="est_evidences|est2genome" value="1"/> 537 <param name="est_evidences|est2genome" value="1"/>
475 <param name="repeat_masking|repeatmasker|do_rm" value="no"/> 538 <param name="repeat_masking|repeat_source|source_type" value="no"/>
476 <output name="output_gff" file="annot_norm.gff3"/> 539 <output name="output_gff" file="annot_norm.gff3"/>
540 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/>
541 </test>
542 <test>
543 <param name="genome" value="genome.fasta"/>
544 <param name="est_evidences|est" value="est.fasta"/>
545 <param name="est_evidences|est2genome" value="1"/>
546 <param name="repeat_masking|repeat_source|source_type" value="repbase"/>
547 <param name="repeat_masking|repeat_source|repbase_file" value="fake_repbase.embl" />
548 <param name="repeat_masking|repeat_source|repbase_file_name" value="fake.embl" />
549 <param name="repeat_masking|repeat_source|species_list" value="anopheles" />
550 <output name="output_gff" file="annot_repbase.gff3"/>
477 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/> 551 <output name="output_evidences" file="evidences_norm.gff3" compare="sim_size"/>
478 </test> 552 </test>
479 </tests> 553 </tests>
480 <help><![CDATA[ 554 <help><![CDATA[
481 MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources. 555 MAKER is a portable and easily configurable genome annotation pipeline. Its purpose is to allow smaller eukaryotic and prokaryotic genome projects to independently annotate their genomes and to create genome databases. MAKER identifies repeats, aligns ESTs and proteins to a genome, produces ab-initio gene predictions and automatically synthesizes these data into gene annotations having evidence-based quality values. MAKER is also easily trainable: outputs of preliminary runs can be used to automatically retrain its gene prediction algorithm, producing higher quality gene-models on seusequent runs. MAKER's inputs are minimal and its ouputs can be directly loaded into a GMOD database. They can also be viewed in the Apollo genome browser; this feature of MAKER provides an easy means to annotate, view and edit individual contigs and BACs without the overhead of a database. MAKER should prove especially useful for emerging model organism projects with minimal bioinformatics expertise and computer resources.