Mercurial > repos > jdv > gottcha
comparison gottcha.xml @ 0:2569a83977f5 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/gottcha commit 5d24210279e26623ae6c98f7551e3565fdc9bc48
author | jdv |
---|---|
date | Mon, 30 Jan 2017 19:07:21 -0500 |
parents | |
children | 87efdde6105f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2569a83977f5 |
---|---|
1 <tool id="gottcha" name="GOTTCHA" version="0.0.1"> | |
2 | |
3 <description>Read-based metagenome characterization</description> | |
4 | |
5 <!-- ***************************************************************** --> | |
6 | |
7 <requirements> | |
8 <requirement type="package" version="1.0b-564cf3b">gottcha</requirement> | |
9 </requirements> | |
10 | |
11 <!-- ***************************************************************** --> | |
12 | |
13 <version_command>gottcha.pl -h | perl -wnE'print "$1\n" for /VERSION: (\S+)/g'</version_command> | |
14 | |
15 <!-- ***************************************************************** --> | |
16 | |
17 <command detect_errors="aggressive"> | |
18 <![CDATA[ | |
19 | |
20 gottcha.pl | |
21 | |
22 --input '${fn_in}' | |
23 --database '${db.fields.path}' | |
24 --threads '\${GALAXY_SLOTS:-1}' | |
25 --outdir './' | |
26 --prefix results | |
27 | |
28 ##--General Options------------------------------ | |
29 | |
30 --relAbu ${general.rel_abund} | |
31 --mode ${general.output_full} | |
32 ${general.filt_plasmid} | |
33 | |
34 ##--Split-trim Options--------------------------- | |
35 | |
36 --minQ ${split.min_qual} | |
37 --fixL ${split.fixed_len} | |
38 --ascii ${split.qual_offset} | |
39 | |
40 ##--Filtering Options---------------------------- | |
41 | |
42 --minCov ${filter.min_cov} | |
43 --minMLHL ${filter.min_mlhl} | |
44 --cCov ${filter.c_cov} | |
45 --minLen ${filter.min_len} | |
46 --minHits ${filter.min_hits} | |
47 | |
48 ]]> | |
49 </command> | |
50 | |
51 <!-- ***************************************************************** --> | |
52 | |
53 <inputs> | |
54 | |
55 <param name="fn_in" type="data" format="fastq" label="Input reads" help="--input" /> | |
56 <param name="db" type="select" label="Select a reference database" help="--database"> | |
57 <options from_data_table="gottcha_indices"> | |
58 <filter type="sort_by" column="2"/> | |
59 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
60 </options> | |
61 </param> | |
62 | |
63 <section name="general" title="General Options" expanded="True"> | |
64 <param name="rel_abund" type="select" label="Abundance field" help="--relAbu"> | |
65 <option value="LINEAR_DOC" selected="true">Linear DOC</option> | |
66 <option value="LINEAR_LENGTH">Linear length</option> | |
67 <option value="TOTAL_BP_MAPPED">Total bp mapped</option> | |
68 <option value="HIT_COUNT">Hit count</option> | |
69 </param> | |
70 <param name="output_full" type="boolean" truevalue="full" falsevalue="summary" checked="no" label="Output full report" help="--mode full" /> | |
71 <param name="filt_plasmid" type="boolean" truevalue="--noPlasmidHit" falsevalue="" checked="no" label="Filter plasmid hits" help="If true, ignore alignments to plasmids" /> | |
72 </section> | |
73 | |
74 <section name="split" title="Split-trim Options" expanded="False"> | |
75 <param name="min_qual" size="4" type="integer" value="20" min="0" max="41" label="Minimum quality" help="Minimum quality for a read to be considered valid (0-41)" /> | |
76 <param name="fixed_len" size="4" type="integer" value="30" min="1" label="Trim length" help="Fixed length to which each read will be trimmed" /> | |
77 <param name="qual_offset" type="select" label="Quality offset" help="Base call quality offset for ASCII encoding"> | |
78 <option value="33" selected="true">33</option> | |
79 <option value="64">64</option> | |
80 </param> | |
81 </section> | |
82 | |
83 <section name="filter" title="Filtering Options" expanded="False"> | |
84 <param name="min_cov" size="5" type="float" value="0.005" min="0" label="Minimum coverage" help="Minimum linear coverage to be considered valid in abundance calculation" /> | |
85 <param name="min_mlhl" size="4" type="integer" value="5" min="0" label="Minimum MLHL" help="Minimum mean-linear-hit-length to be considered valid in abundance calculations" /> | |
86 <param name="c_cov" size="5" type="float" value="0.006" min="0" label="Critical coverage for MLHL" help="Critical coverage below which Minimum MLHL will cause an organism to fail" /> | |
87 <param name="min_len" size="4" type="integer" value="100" min="0" label="Minimum length" help="Minimum unique length to be considered valid in abundance calculation" /> | |
88 <param name="min_hits" size="4" type="integer" value="10" min="0" label="Minimum hits" help="Minimum number of hits to be considered valid in abundance calculation" /> | |
89 </section> | |
90 | |
91 </inputs> | |
92 | |
93 <!-- ***************************************************************** --> | |
94 | |
95 <outputs> | |
96 | |
97 <data name="out_log" format="txt" label="GOTTCHA on ${on_string}: Log" from_work_dir="results.gottcha.log" /> | |
98 <data name="out_tsv" format="txt" label="GOTTCHA on ${on_string}: Summary" from_work_dir="results.gottcha.tsv" /> | |
99 <data name="out_full" format="txt" label="GOTTCHA on ${on_string}: Full Report" from_work_dir="results.gottcha_full.tsv"> | |
100 <filter>output_full</filter> | |
101 </data> | |
102 | |
103 </outputs> | |
104 | |
105 <!-- ***************************************************************** --> | |
106 | |
107 <tests> | |
108 <test> | |
109 <param name="db" value="test_db" /> | |
110 <param name="fn_in" ftype="fastq" value="test.fq" /> | |
111 <param name="output_full" value="no" /> | |
112 <param name="min_hits" value="1" /> | |
113 <output name="out_tsv" file="test_02.tsv" /> | |
114 <output name="out_log" file="test_02.log" compare="sim_size" delta="2000"/> | |
115 </test> | |
116 </tests> | |
117 | |
118 <!-- ***************************************************************** --> | |
119 | |
120 <help> | |
121 <![CDATA[ | |
122 | |
123 .. class:: infomark | |
124 | |
125 Description | |
126 -------------------- | |
127 | |
128 Genomic Origin Through Taxonomic CHAllenge (GOTTCHA) is an | |
129 annotation-independent and signature-based metagenomic taxonomic profiling | |
130 tool that has significantly smaller FDR than other profiling tools. This Perl | |
131 script is a wrapper to run the GOTTCHA profiling tool with pre-computed | |
132 signature databases. The procedure includes 3 major steps: split-trimming the | |
133 input data, mapping reads to a GOTTCHA database using BWA, profiling/filtering | |
134 the result. | |
135 | |
136 Options | |
137 -------------------- | |
138 :: | |
139 | |
140 --relAbu|r <STRING> The field will be used to calculate relative | |
141 abundance. You can specify one of the following | |
142 fields: "LINEAR_LENGTH", "TOTAL_BP_MAPPED", | |
143 "HIT_COUNT", "LINEAR_DOC". | |
144 [default: LINEAR_DOC] | |
145 --mode|m <STRING> You can specify one of the output mode: | |
146 "summary" : this mode will report a summary of | |
147 profiling result to *.gottcha.tsv file. | |
148 "full" : other than a summary, this mode will | |
149 report unfiltered result to | |
150 *.gottcha_full.tsv with more detail. | |
151 "all" : other than two tables, this mode will | |
152 keep all output files that were | |
153 generated by each profiling step. | |
154 [default: summary] | |
155 --noPlasmidHit|n Ignore alignments that hit to plasmids | |
156 [default: null] | |
157 | |
158 *** OPTIONS FOR SPLIT-TRIMMING READS *** | |
159 | |
160 --minQ <INT> Minimum quality for a read to be considered valid | |
161 (0-41) [default: 20] | |
162 --fixL <INT> Fixed length to which each trimmed read will be cut | |
163 down to [default: 30] | |
164 --ascii <INT> ASCII encoding of quality score (33 or 64) [default: | |
165 33] | |
166 | |
167 *** OPTIONS FOR FILTERING PROFILING RESULT *** | |
168 | |
169 --minCov <FLOAT> Minimum linear coverage to be considered valid in | |
170 abundance calculation [default: 0.005] | |
171 --minMLHL <INT> Minimum Mean-Linear-Hit-Length to be considered valid | |
172 in abundance calculation [default: 5] | |
173 --cCov <FLOAT> Critical coverage below which --minMLHL will cause an | |
174 organism to fail [default: 0.006] | |
175 --minLen <INT> Minimum unique length to be considered valid in | |
176 abundance calculation [default: 100] | |
177 --minHits <INT> Minimum number of hits to be considered valid in | |
178 abundance calculation [10] | |
179 | |
180 Interpreting Results | |
181 -------------------- | |
182 | |
183 GOTTCHA reports profiling results in a neat summary table | |
184 by default. The tsv file will list the organism(s) at all taxonomic | |
185 levels from STRAIN to PHYLUM, their linear length, total bases mapped, | |
186 linear depth of coverage, and the normalized linear depth of coverage. The | |
187 linear depth of coverage (LINEAR_DOC) is used to calculate relative | |
188 abundance of each organism or taxonomic name in the sample. | |
189 | |
190 Summary table: | |
191 | |
192 ================= ============================== | |
193 Column Description | |
194 ================= ============================== | |
195 LEVEL taxonomic rank | |
196 NAME taxonomic name | |
197 REL_ABUNDANCE relative abundance (equivalent to NORM_COV by default) | |
198 LINEAR_LENGTH number of non-overlapping bases covering the signatures | |
199 TOTAL_BP_MAPPED sum total of all hit lengths recruited to signatures | |
200 HIT_COUNT number of hits recruited to signatures | |
201 HIT_COUNT_PLASMID number of hits recruited to signatures | |
202 READ_COUNT number of reads recruited to signatures | |
203 LINEAR_DOC linear depth-of-coverage (TOTAL_BP_MAPPED / LINEAR_LENGTH) | |
204 NORM_COV normalized linear depth-of-coverage (LINEAR_DOC / SUM(LINEAR_DOC in certain level)) | |
205 ================= ============================== | |
206 | |
207 Other than a summary table, "full" report mode will report a table with more | |
208 detail information from unfiltered results. The explanation of each column in | |
209 the full report is as follows: | |
210 | |
211 ================================== ========================== | |
212 Column Description | |
213 ================================== ========================== | |
214 RANKNAME (REPLICON) = replicon name (source + plasmid/chr)<br>(STRAIN) = strain name<br>(SPECIES) = species name<br>(GENUS) = genus name<br>... | |
215 NUM_SUBRANKS no. of distinct subranks for the current rank<br>(E.g. the no. of SPECIES under the current GENUS) | |
216 GPROJ_ENTRIES no. of genome projects (i.e. STRAINS) under this RANK NAME | |
217 LINEAR_LENGTH N/O_LENGTH<br>= non-overlapping length <br>= no. of non-overlapping bases covering the unique DB | |
218 UNIQUE_DB_LENGTH no. of unique bases for this organism | |
219 FULL_REFDB_LENGTH no. of bases in full reference | |
220 LINEAR_COV LINEAR_LENGTH / UNIQUE_DB_LENGTH | |
221 HIT_COUNT no. of hits recruited to genome | |
222 HIT_COUNT_PLASMID no. of hits recruited to plasmid | |
223 READ_COUNT no. of reads recruited to genome | |
224 FULL_HIT_COUNT no. of full-length read hits recruited to genome | |
225 TOTAL_BP_MAPPED sum total of all hit lengths recruited to genome<br>= hit1.length + hit2.length + ... hitX.length<br>[formerly FOLD_COV_UNIQUE_SAMPLE] | |
226 LINEAR_DOC linear depth-of-coverage<br>= fold coverage of sample's LINEAR_LENGTH <br>= TOTAL_BP_MAPPED / LINEAR_LENGTH<br>[formerly FOLD_COV_UNIQUE_REFDB] | |
227 UREF_DOC unique reference's depth-of-coverage<br>= fold coverage of reference's UNIQUE_DB_LENGTH<br>= TOTAL_BP_MAPPED / UNIQUE_DB_LENGTH | |
228 UREF_CMAX MAX COVERAGE OF REFDB POSSIBLE, GIVEN SAMPLE INPUT BASES<br>= Cmax = L0/l0 <br>= TOTAL_INPUT_BASES / UNIQUE_DB_LENGTH | |
229 FRAC_HITS_POSSIBLE HIT_COUNT / TOTAL_INPUT_READS | |
230 FRAC_BASES_POSSIBLE TOTAL_BP_MAPPED / TOTAL_INPUT_BASES | |
231 MEAN_HIT_LENGTH TOTAL_BP_MAPPED / HIT_COUNT | |
232 MEAN_LINEAR_HIT_LENGTH LINEAR_LENGTH / HIT_COUNT | |
233 best_SUBRANK name of the best subrank (determined by the highest LINEAR_COV) | |
234 best_NUM_SUBRANKS no. of subranks supporting current "SUBRANK"<br>{SS} = no. of GI entries supporting this strain<br>{S} = no. of strains supporting this species<br>{G} = no. of species supporting this genus<br>{F} = no. of genera supporting this family<br>{O} = no. of families supporting this order <br>{C} = no. of orders supporting this class<br>{P} = no. of classes supporting this phylum | |
235 best_GPROJ_ENTRIES no. of genome projects (i.e. STRAINS) under this best_SUBRANK<br>{SS} = no. of genome projects supporting this strain = 1<br>{S} = no. of genome projects supporting this species<br>{G} = no. of genome projects supporting this genus<br>{F} = no. of genome projects supporting this family<br>{O} = no. of genome projects supporting this order<br>{C} = no. of genome projects supporting this class<br>{P} = no. of genome projects supporting this phylum | |
236 best_LINEAR_LENGTH | |
237 best_UNIQUE_DB_LENGTH | |
238 best_FULL_REFDB_LENGTH | |
239 best_LINEAR_COV | |
240 best_HIT_COUNT | |
241 best_FULL_HIT_COUNT | |
242 best_TOTAL_BP_MAPPED | |
243 best_LINEAR_DOC (a.k.a. Abundance) | |
244 best_UREF_DOC | |
245 best_UREF_CMAX | |
246 best_FRAC_HITS_POSSIBLE | |
247 best_FRAC_BASES_POSSIBLE | |
248 best_MEAN_HIT_LENGTH | |
249 best_MEAN_LINEAR_HIT_LENGTH | |
250 CONTIG_COUNT no. of contiguous fragments<br> (after mapping & generating non-overlapping fragments) | |
251 CONTIG_MEAN_LEN mean length of contigs (bp) | |
252 CONTIG_STDEV_LEN standard deviation of contig lengths (bp) | |
253 CONTIG_MINLEN length of smallest contig | |
254 CONTIG_MAXLEN length of largest contig | |
255 CONTIG_HISTOGRAM(LEN:FREQ) contig Length Histogram<br> (in the format contigLength:frequency) | |
256 ================================== ========================== | |
257 | |
258 ]]> | |
259 </help> | |
260 | |
261 <!-- ***************************************************************** --> | |
262 | |
263 <citations> | |
264 <citation type="doi">10.1093/nar/gkv180</citation> | |
265 </citations> | |
266 | |
267 </tool> |