gottcha: gottcha.xml comparison

comparison gottcha.xml @ 0:2569a83977f5 draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/gottcha commit 5d24210279e26623ae6c98f7551e3565fdc9bc48

author	jdv
date	Mon, 30 Jan 2017 19:07:21 -0500
parents
children	87efdde6105f

comparison

equal deleted inserted replaced

--1:000000000000
+:2569a83977f5
+<tool id="gottcha" name="GOTTCHA" version="0.0.1">
+<description>Read-based metagenome characterization</description>
+<!-- ***************************************************************** -->
+<requirements>
+<requirement type="package" version="1.0b-564cf3b">gottcha</requirement>
+</requirements>
+<!-- ***************************************************************** -->
+<version_command>gottcha.pl -h | perl -wnE'print "$1\n" for /VERSION: (\S+)/g'</version_command>
+<!-- ***************************************************************** -->
+<command detect_errors="aggressive">
+<![CDATA[
+gottcha.pl
+--input    '${fn_in}'
+--database '${db.fields.path}'
+--threads  '\${GALAXY_SLOTS:-1}'
+--outdir   './'
+--prefix   results
+##--General Options------------------------------
+--relAbu   ${general.rel_abund}
+--mode ${general.output_full}
+${general.filt_plasmid}
+##--Split-trim Options---------------------------
+--minQ     ${split.min_qual}
+--fixL     ${split.fixed_len}
+--ascii    ${split.qual_offset}
+##--Filtering Options----------------------------
+--minCov   ${filter.min_cov}
+--minMLHL  ${filter.min_mlhl}
+--cCov     ${filter.c_cov}
+--minLen   ${filter.min_len}
+--minHits  ${filter.min_hits}
+]]>
+</command>
+<!-- ***************************************************************** -->
+<inputs>
+<param name="fn_in" type="data" format="fastq" label="Input reads" help="--input" />
+<param name="db" type="select" label="Select a reference database" help="--database">
+<options from_data_table="gottcha_indices">
+<filter type="sort_by" column="2"/>
+<validator type="no_options" message="No indexes are available for the selected input dataset"/>
+</options>
+</param>
+<section name="general" title="General Options" expanded="True">
+<param name="rel_abund" type="select" label="Abundance field" help="--relAbu">
+<option value="LINEAR_DOC" selected="true">Linear DOC</option>
+<option value="LINEAR_LENGTH">Linear length</option>
+<option value="TOTAL_BP_MAPPED">Total bp mapped</option>
+<option value="HIT_COUNT">Hit count</option>
+</param>
+<param name="output_full" type="boolean" truevalue="full" falsevalue="summary" checked="no" label="Output full report" help="--mode full" />
+<param name="filt_plasmid" type="boolean" truevalue="--noPlasmidHit" falsevalue="" checked="no" label="Filter plasmid hits" help="If true, ignore alignments to plasmids" />
+</section>
+<section name="split" title="Split-trim Options" expanded="False">
+<param name="min_qual" size="4" type="integer" value="20" min="0" max="41" label="Minimum quality" help="Minimum quality for a read to be considered valid (0-41)" />
+<param name="fixed_len" size="4" type="integer" value="30" min="1" label="Trim length" help="Fixed length to which each read will be trimmed" />
+<param name="qual_offset" type="select" label="Quality offset" help="Base call quality offset for ASCII encoding">
+<option value="33" selected="true">33</option>
+<option value="64">64</option>
+</param>
+</section>
+<section name="filter" title="Filtering Options" expanded="False">
+<param name="min_cov" size="5" type="float" value="0.005" min="0" label="Minimum coverage" help="Minimum linear coverage to be considered valid in abundance calculation" />
+<param name="min_mlhl" size="4" type="integer" value="5" min="0" label="Minimum MLHL" help="Minimum mean-linear-hit-length to be considered valid in abundance calculations"  />
+<param name="c_cov" size="5" type="float" value="0.006" min="0" label="Critical coverage for MLHL" help="Critical coverage below which Minimum MLHL will cause an organism to fail" />
+<param name="min_len" size="4" type="integer" value="100" min="0" label="Minimum length" help="Minimum unique length to be considered valid in abundance calculation" />
+<param name="min_hits" size="4" type="integer" value="10" min="0" label="Minimum hits" help="Minimum number of hits to be considered valid in abundance calculation" />
+</section>
+</inputs>
+<!-- ***************************************************************** -->
+<outputs>
+<data name="out_log" format="txt" label="GOTTCHA on ${on_string}: Log" from_work_dir="results.gottcha.log" />
+<data name="out_tsv" format="txt" label="GOTTCHA on ${on_string}: Summary" from_work_dir="results.gottcha.tsv" />
+<data name="out_full" format="txt" label="GOTTCHA on ${on_string}: Full Report" from_work_dir="results.gottcha_full.tsv">
+<filter>output_full</filter>
+</data>
+</outputs>
+<!-- ***************************************************************** -->
+<tests>
+<test>
+<param name="db" value="test_db" />
+<param name="fn_in" ftype="fastq" value="test.fq" />
+<param name="output_full" value="no" />
+<param name="min_hits" value="1" />
+<output name="out_tsv" file="test_02.tsv" />
+<output name="out_log" file="test_02.log" compare="sim_size" delta="2000"/>
+</test>
+</tests>
+<!-- ***************************************************************** -->
+<help>
+<![CDATA[
+.. class:: infomark
+Description
+--------------------
+Genomic Origin Through Taxonomic CHAllenge (GOTTCHA) is an
+annotation-independent and signature-based metagenomic taxonomic profiling
+tool that has significantly smaller FDR than other profiling tools. This Perl
+script is a wrapper to run the GOTTCHA profiling tool with pre-computed
+signature databases. The procedure includes 3 major steps: split-trimming the
+input data, mapping reads to a GOTTCHA database using BWA, profiling/filtering
+the result.
+Options
+--------------------
+::
+--relAbu|r   <STRING>  The field will be used to calculate relative
+abundance. You can specify one of the following
+fields: "LINEAR_LENGTH", "TOTAL_BP_MAPPED",
+"HIT_COUNT", "LINEAR_DOC".
+[default: LINEAR_DOC]
+--mode|m     <STRING>  You can specify one of the output mode:
+"summary" : this mode will report a summary of
+profiling result to *.gottcha.tsv file.
+"full"    : other than a summary, this mode will
+report unfiltered result to
+*.gottcha_full.tsv with more detail.
+"all"     : other than two tables, this mode will
+keep all output files that were
+generated by each profiling step.
+[default: summary]
+--noPlasmidHit|n       Ignore alignments that hit to plasmids
+[default: null]
+*** OPTIONS FOR SPLIT-TRIMMING READS ***
+--minQ       <INT>     Minimum quality for a read to be considered valid
+(0-41) [default: 20]
+--fixL       <INT>     Fixed length to which each trimmed read will be cut
+down to [default: 30]
+--ascii      <INT>     ASCII encoding of quality score (33 or 64) [default:
+33]
+*** OPTIONS FOR FILTERING PROFILING RESULT ***
+--minCov     <FLOAT>   Minimum linear coverage to be considered valid in
+abundance calculation [default: 0.005]
+--minMLHL    <INT>     Minimum Mean-Linear-Hit-Length to be considered valid
+in abundance calculation [default: 5]
+--cCov       <FLOAT>   Critical coverage below which --minMLHL will cause an
+organism to fail [default: 0.006]
+--minLen     <INT>     Minimum unique length to be considered valid in
+abundance calculation [default: 100]
+--minHits    <INT>     Minimum number of hits to be considered valid in
+abundance calculation [10]
+Interpreting Results
+--------------------
+GOTTCHA reports profiling results in a neat summary table
+by default. The tsv file will list the organism(s) at all taxonomic
+levels from STRAIN to PHYLUM, their linear length, total bases mapped,
+linear depth of coverage, and the normalized linear depth of coverage. The
+linear depth of coverage (LINEAR_DOC) is used to calculate relative
+abundance of each organism or taxonomic name in the sample.
+Summary table:
+=================  ==============================
+Column             Description
+=================  ==============================
+LEVEL              taxonomic rank
+NAME               taxonomic name
+REL_ABUNDANCE      relative abundance (equivalent to NORM_COV by default)
+LINEAR_LENGTH      number of non-overlapping bases covering the signatures
+TOTAL_BP_MAPPED    sum total of all hit lengths recruited to signatures
+HIT_COUNT          number of hits recruited to signatures
+HIT_COUNT_PLASMID  number of hits recruited to signatures
+READ_COUNT         number of reads recruited to signatures
+LINEAR_DOC         linear depth-of-coverage (TOTAL_BP_MAPPED / LINEAR_LENGTH)
+NORM_COV           normalized linear depth-of-coverage (LINEAR_DOC / SUM(LINEAR_DOC in certain level))
+=================  ==============================
+Other than a summary table, "full" report mode will report a table with more
+detail information from unfiltered results.  The explanation of each column in
+the full report is as follows:
+==================================  ==========================
+Column                              Description
+==================================  ==========================
+RANKNAME                            (REPLICON)  = replicon name (source + plasmid/chr)<br>(STRAIN)    = strain name<br>(SPECIES)   = species name<br>(GENUS)     = genus name<br>...
+NUM_SUBRANKS                        no. of distinct subranks for the current rank<br>(E.g.  the no. of SPECIES under the current GENUS)
+GPROJ_ENTRIES                       no. of genome projects (i.e. STRAINS) under this RANK NAME
+LINEAR_LENGTH                       N/O_LENGTH<br>= non-overlapping length <br>= no. of non-overlapping bases covering the unique DB
+UNIQUE_DB_LENGTH                    no. of unique bases for this organism
+FULL_REFDB_LENGTH                   no. of bases in full reference
+LINEAR_COV                          LINEAR_LENGTH / UNIQUE_DB_LENGTH
+HIT_COUNT                           no. of hits recruited to genome
+HIT_COUNT_PLASMID                   no. of hits recruited to plasmid
+READ_COUNT                          no. of reads recruited to genome
+FULL_HIT_COUNT                      no. of full-length read hits recruited to genome
+TOTAL_BP_MAPPED                     sum total of all hit lengths recruited to genome<br>= hit1.length + hit2.length + ... hitX.length<br>[formerly FOLD_COV_UNIQUE_SAMPLE]
+LINEAR_DOC                          linear depth-of-coverage<br>= fold coverage of sample's LINEAR_LENGTH <br>= TOTAL_BP_MAPPED / LINEAR_LENGTH<br>[formerly FOLD_COV_UNIQUE_REFDB]
+UREF_DOC                            unique reference's depth-of-coverage<br>= fold coverage of reference's UNIQUE_DB_LENGTH<br>= TOTAL_BP_MAPPED / UNIQUE_DB_LENGTH
+UREF_CMAX                           MAX COVERAGE OF REFDB POSSIBLE, GIVEN SAMPLE INPUT BASES<br>= Cmax = L0/l0 <br>= TOTAL_INPUT_BASES / UNIQUE_DB_LENGTH
+FRAC_HITS_POSSIBLE                  HIT_COUNT / TOTAL_INPUT_READS
+FRAC_BASES_POSSIBLE                 TOTAL_BP_MAPPED / TOTAL_INPUT_BASES
+MEAN_HIT_LENGTH                     TOTAL_BP_MAPPED / HIT_COUNT
+MEAN_LINEAR_HIT_LENGTH              LINEAR_LENGTH / HIT_COUNT
+best_SUBRANK                        name of the best subrank (determined by the highest LINEAR_COV)
+best_NUM_SUBRANKS                   no. of subranks supporting current "SUBRANK"<br>{SS} = no. of GI entries supporting this strain<br>{S}  = no. of strains supporting this species<br>{G}  = no. of species supporting this genus<br>{F}  = no. of genera supporting this family<br>{O}  = no. of families supporting this order <br>{C}  = no. of orders supporting this class<br>{P}  = no. of classes supporting this phylum
+best_GPROJ_ENTRIES                  no. of genome projects (i.e. STRAINS) under this best_SUBRANK<br>{SS} = no. of genome projects supporting this strain = 1<br>{S}  = no. of genome projects supporting this species<br>{G}  = no. of genome projects supporting this genus<br>{F}  = no. of genome projects supporting this family<br>{O}  = no. of genome projects supporting this order<br>{C}  = no. of genome projects supporting this class<br>{P}  = no. of genome projects supporting this phylum
+best_LINEAR_LENGTH
+best_UNIQUE_DB_LENGTH
+best_FULL_REFDB_LENGTH
+best_LINEAR_COV
+best_HIT_COUNT
+best_FULL_HIT_COUNT
+best_TOTAL_BP_MAPPED
+best_LINEAR_DOC (a.k.a. Abundance)
+best_UREF_DOC
+best_UREF_CMAX
+best_FRAC_HITS_POSSIBLE
+best_FRAC_BASES_POSSIBLE
+best_MEAN_HIT_LENGTH
+best_MEAN_LINEAR_HIT_LENGTH
+CONTIG_COUNT                        no. of contiguous fragments<br> (after mapping & generating non-overlapping fragments)
+CONTIG_MEAN_LEN                     mean length of contigs (bp)
+CONTIG_STDEV_LEN                    standard deviation of contig lengths (bp)
+CONTIG_MINLEN                       length of smallest contig
+CONTIG_MAXLEN                       length of largest contig
+CONTIG_HISTOGRAM(LEN:FREQ)          contig Length Histogram<br> (in the format contigLength:frequency)
+==================================  ==========================
+]]>
+</help>
+<!-- ***************************************************************** -->
+<citations>
+<citation type="doi">10.1093/nar/gkv180</citation>
+</citations>
+</tool>

Mercurial > repos > jdv > gottcha

comparison gottcha.xml @ 0:2569a83977f5 draft