Mercurial > repos > csbl > repeatmodeler

diff repeatmodeler.xml @ 0:25c221ef3104 draft default tip
"planemo upload commit 5c6a5c0f9aacbc7def652b33cc35ee37aa543d05-dirty"
author: csbl
date: Tue, 24 Nov 2020 03:55:18 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repeatmodeler.xml	Tue Nov 24 03:55:18 2020 +0000
@@ -0,0 +1,135 @@
+<tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5">
+    <requirements>
+        <requirement type="package" version="2.0.1">repeatmodeler</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output'
+    ]]></command>
+    <inputs>
+        <param type="data" name="input_file" format="fasta" label="Input genome fasta"/>
+        <param argument="-name" type="text" value="" label="Title for building database" />
+        <param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" />
+    </inputs>
+    <outputs>
+      <!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> -->
+      <!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> -->
+      <data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="eco.fasta" ftype="fasta"/>
+            <param name="name" value="eco" />
+            <param name="pa" value="4" />
+            <output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" />
+        </test>
+    </tests>
+    <help><![CDATA[
+      RepeatModeler - 2.0.1
+
+      NAME
+          RepeatModeler - Model repetitive DNA
+
+      SYNOPSIS
+            RepeatModeler [-options] -database <XDF Database>
+
+      DESCRIPTION
+          The options are:
+
+          -h(elp)
+              Detailed help
+
+          -database <DBNAME>
+              The name of the sequence database to run an analysis on. This is the
+              name that was provided to the BuildDatabase script using the "-name"
+              option.
+
+          -pa #
+              Specify the number of parallel search jobs to run. RMBlast jobs will
+              use 4 cores each and ABBlast jobs will use a single core each. i.e.
+              on a machine with 12 cores and running with RMBlast you would use
+              -pa 3 to fully utilize the machine.
+
+          -recoverDir <Previous Output Directory>
+              If a run fails in the middle of processing, it may be possible
+              recover some results and continue where the previous run left off.
+              Simply supply the output directory where the results of the failed
+              run were saved and the program will attempt to recover and continue
+              the run.
+
+          -srand #
+              Optionally set the seed of the random number generator to a known
+              value before the batches are randomly selected ( using Fisher Yates
+              Shuffling ). This is only useful if you need to reproduce the sample
+              choice between runs. This should be an integer number.
+
+          -LTRStruct [optional]
+              Run the LTR structural discovery pipeline ( LTR_Harvest and
+              LTR_retreiver ) and combine results with the RepeatScout/RECON
+              pipeline. [optional]
+
+          -genomeSampleSizeMax #
+              Optionally change the maximum bp of the genome to sample in all
+              rounds of RECON (default=243000000).
+
+      CONFIGURATION OVERRIDES
+          -ltr_retriever_dir <string>
+              The path to the installation of the LTR_Retriever structural LTR
+              analysis package.
+
+          -rmblast_dir <string>
+              The path to the installation of the RMBLAST sequence alignment
+              program.
+
+          -repeatmasker_dir <string>
+              The path to the installation of RepeatMasker.
+
+          -trf_prgm <string>
+              The full path including the name for the TRF program ( 4.0.9 or
+              higher )
+
+          -ninja_dir <string>
+              The path to the installation of the Ninja phylogenetic analysis
+              package.
+
+          -recon_dir <string>
+              The path to the installation of the RECON de-novo repeatfinding
+              program.
+
+          -genometools_dir <string>
+              The path to the installation of the GenomeTools package.
+
+          -abblast_dir <string>
+              The path to the installation of the ABBLAST sequence alignment
+              program.
+
+          -rscout_dir <string>
+              The path to the installation of the RepeatScout ( 1.0.6 or higher )
+              de-novo repeatfinding program.
+
+          -mafft_dir <string>
+              The path to the installation of the MAFFT multiple alignment
+              program.
+
+          -cdhit_dir <string>
+              The path to the installation of the CD-Hit sequence clustering
+              package.
+
+      SEE ALSO
+              RepeatMasker, RMBlast
+
+      COPYRIGHT
+           Copyright 2005-2019 Institute for Systems Biology
+
+      AUTHOR
+           RepeatModeler:
+             Robert Hubley <rhubley@systemsbiology.org>
+             Arian Smit <asmit@systemsbiology.org>
+
+           LTR Pipeline Extensions:
+             Jullien Michelle Flynn <jmf422@cornell.edu>
+    ]]></help>
+    <citations>
+      <citation type="doi">10.1073/pnas.1921046117</citation>
+      <citation type="doi">10.1186/s13059-018-1577-z</citation>
+    </citations>
+</tool>
author	csbl
date	Tue, 24 Nov 2020 03:55:18 +0000
parents
children