41
|
1 <tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.5.0">
|
|
2 <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.3.5">amplicon_analysis_pipeline</requirement>
|
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <exit_code range="1:" />
|
|
8 </stdio>
|
|
9 <command><![CDATA[
|
|
10
|
|
11 ## Convenience variable for pipeline name
|
|
12 #set $pipeline_name = $pipeline.pipeline_name
|
|
13
|
|
14 ## Set the reference database name
|
|
15 #if str( $pipeline_name ) == "DADA2"
|
|
16 #set reference_database_name = "silva"
|
|
17 #else
|
|
18 #set reference_database = $pipeline.reference_database
|
|
19 #if $reference_database == "-S"
|
|
20 #set reference_database_name = "silva"
|
|
21 #else if $reference_database == "-H"
|
|
22 #set reference_database_name = "homd"
|
|
23 #else
|
|
24 #set reference_database_name = "gg"
|
|
25 #end if
|
|
26 #end if
|
|
27
|
|
28 ## Run the amplicon analysis pipeline wrapper
|
|
29 python $__tool_directory__/amplicon_analysis_pipeline.py
|
|
30 ## Set options
|
|
31 #if str( $forward_pcr_primer ) != ""
|
|
32 -g "$forward_pcr_primer"
|
|
33 #end if
|
|
34 #if str( $reverse_pcr_primer ) != ""
|
|
35 -G "$reverse_pcr_primer"
|
|
36 #end if
|
|
37 #if str( $trimming_threshold ) != ""
|
|
38 -q $trimming_threshold
|
|
39 #end if
|
|
40 #if str( $sliding_window_length ) != ""
|
|
41 -l $sliding_window_length
|
|
42 #end if
|
|
43 #if str( $minimum_overlap ) != ""
|
|
44 -O $minimum_overlap
|
|
45 #end if
|
|
46 #if str( $minimum_length ) != ""
|
|
47 -L $minimum_length
|
|
48 #end if
|
|
49 -P $pipeline_name
|
|
50 -r \${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData}
|
|
51 #if str( $pipeline_name ) != "DADA2"
|
|
52 ${reference_database}
|
|
53 #end if
|
|
54 #if str($categories_file_in) != 'None'
|
|
55 -c "${categories_file_in}"
|
|
56 #end if
|
|
57 ## Input files
|
|
58 "${metatable_file_in}"
|
|
59 ## FASTQ pairs
|
|
60 #if str($input_type.pairs_or_collection) == "collection"
|
|
61 #set fastq_pairs = $input_type.fastq_collection
|
|
62 #else
|
|
63 #set fastq_pairs = $input_type.fastq_pairs
|
|
64 #end if
|
|
65 #for $fq_pair in $fastq_pairs
|
|
66 "${fq_pair.name}" "${fq_pair.forward}" "${fq_pair.reverse}"
|
|
67 #end for
|
|
68 &&
|
|
69
|
|
70 ## Collect outputs
|
|
71 cp Metatable_log/Metatable_mod.txt "${metatable_mod}" &&
|
|
72 #if str( $pipeline_name ) == "Vsearch"
|
|
73 # Vsearch-specific
|
|
74 cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
|
|
75 cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" &&
|
|
76 cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" &&
|
|
77 #else
|
|
78 # DADA2-specific
|
|
79 cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" &&
|
|
80 cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" &&
|
|
81 #end if
|
|
82 cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" &&
|
|
83 cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" &&
|
|
84 cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" &&
|
|
85 cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" &&
|
|
86
|
|
87 ## OTU table heatmap
|
|
88 cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}"" &&
|
|
89
|
|
90 ## HTML outputs
|
|
91
|
|
92 ## Phylum genus barcharts
|
|
93 mkdir $phylum_genus_dist_barcharts_html.files_path &&
|
|
94 cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path &&
|
|
95 cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path &&
|
|
96 cp RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" &&
|
|
97
|
|
98 ## Beta diversity weighted 2d plots
|
|
99 mkdir $beta_div_even_weighted_2d_plots.files_path &&
|
|
100 cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path &&
|
|
101 cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" &&
|
|
102
|
|
103 ## Beta diversity unweighted 2d plots
|
|
104 mkdir $beta_div_even_unweighted_2d_plots.files_path &&
|
|
105 cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path &&
|
|
106 cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" &&
|
|
107
|
|
108 ## Alpha diversity rarefaction plots
|
|
109 mkdir $alpha_div_rarefaction_plots.files_path &&
|
|
110 cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots &&
|
|
111 cp -r RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path &&
|
|
112
|
|
113 ## DADA2 error rate plots
|
|
114 #if str($pipeline_name) == "DADA2"
|
|
115 mkdir $dada2_error_rate_plots.files_path &&
|
|
116 cp DADA2_OTU_tables/Error_rate_plots/error_rate_plots.html $dada2_error_rate_plots &&
|
|
117 cp -r DADA2_OTU_tables/Error_rate_plots/*.pdf $dada2_error_rate_plots.files_path &&
|
|
118 #end if
|
|
119
|
|
120 ## Categories data
|
|
121 #if str($categories_file_in) != 'None'
|
|
122 ## Alpha diversity boxplots
|
|
123 mkdir $alpha_div_boxplots.files_path &&
|
|
124 cp alpha_diversity_boxplots.html "$alpha_div_boxplots" &&
|
|
125 cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path &&
|
|
126 #end if
|
|
127
|
|
128 ## Pipeline outputs (log files etc)
|
|
129 mkdir $log_files.files_path &&
|
|
130 cp Amplicon_analysis_pipeline.log $log_files.files_path &&
|
|
131 cp pipeline.log $log_files.files_path &&
|
|
132 cp Pipeline_outputs.txt $log_files.files_path &&
|
|
133 cp Metatable_log/Metatable.html $log_files.files_path &&
|
|
134 cp pipeline_outputs.html "$log_files"
|
|
135 ]]></command>
|
|
136 <inputs>
|
|
137 <param name="title" type="text" value="test" size="25"
|
|
138 label="Title" help="Optional text that will be added to the output dataset names" />
|
|
139 <param type="data" name="metatable_file_in" format="tabular"
|
|
140 label="Input Metatable.txt file" />
|
|
141 <param type="data" name="categories_file_in" format="txt"
|
|
142 label="Input Categories.txt file" optional="true"
|
|
143 help="(optional)" />
|
|
144 <conditional name="input_type">
|
|
145 <param name="pairs_or_collection" type="select"
|
|
146 label="Input FASTQ type">
|
|
147 <option value="pairs_of_files">Pairs of datasets</option>
|
|
148 <option value="collection" selected="true">Dataset pairs in a collection</option>
|
|
149 </param>
|
|
150 <when value="collection">
|
|
151 <param name="fastq_collection" type="data_collection"
|
|
152 format="fastqsanger,fastq" collection_type="list:paired"
|
|
153 label="Collection of FASTQ forward and reverse (R1/R2) pairs"
|
|
154 help="Each FASTQ pair will be treated as one sample; the name of each sample will be taken from the first column of the Metatable file " />
|
|
155 </when>
|
|
156 <when value="pairs_of_files">
|
|
157 <repeat name="fastq_pairs" title="Input fastq pairs" min="1">
|
|
158 <param type="text" name="name" value=""
|
|
159 label="Final name for FASTQ pair" />
|
|
160 <param type="data" name="fastq_r1" format="fastqsanger,fastq"
|
|
161 label="FASTQ with forward reads (R1)" />
|
|
162 <param type="data" name="fastq_r2" format="fastqsanger,fastq"
|
|
163 label="FASTQ with reverse reads (R2)" />
|
|
164 </repeat>
|
|
165 </when>
|
|
166 </conditional>
|
|
167 <param type="text" name="forward_pcr_primer" value=""
|
|
168 label="Forward PCR primer sequence"
|
|
169 help="Optional; must not include barcode or adapter sequence (-g)" />
|
|
170 <param type="text" name="reverse_pcr_primer" value=""
|
|
171 label="Reverse PCR primer sequence"
|
|
172 help="Optional; must not include barcode or adapter sequence (-G)" />
|
|
173 <param type="integer" name="trimming_threshold" value="20"
|
|
174 label="Threshold quality below which read will be trimmed"
|
|
175 help="Phred score; default is 20 (-q)" />
|
|
176 <param type="integer" name="minimum_overlap" value="10"
|
|
177 label="Minimum overlap in bp between forward and reverse reads"
|
|
178 help="Default is 10 (-O)" />
|
|
179 <param type="integer" name="minimum_length" value="200"
|
|
180 label="Minimum length in bp to keep sequence after overlapping"
|
|
181 help="Default is 200 (-L)" />
|
|
182 <param type="integer" name="sliding_window_length" value="10"
|
|
183 label="Minimum length in bp to retain a read after trimming"
|
|
184 help="Supplied to Sickle; default is 10 (-l)" />
|
|
185 <conditional name="pipeline">
|
|
186 <param type="select" name="pipeline_name"
|
|
187 label="Pipeline to use for analysis">
|
|
188 <option value="Vsearch" selected="true" >Vsearch</option>
|
|
189 <option value="DADA2">DADA2</option>
|
|
190 </param>
|
|
191 <when value="Vsearch">
|
|
192 <param type="select" name="reference_database"
|
|
193 label="Reference database">
|
|
194 <option value="" selected="true">GreenGenes</option>
|
|
195 <option value="-S">Silva</option>
|
|
196 <option value="-H">Human Oral Microbiome Database (HOMD)</option>
|
|
197 </param>
|
|
198 </when>
|
|
199 <when value="DADA2">
|
|
200 </when>
|
|
201 </conditional>
|
|
202 </inputs>
|
|
203 <outputs>
|
|
204 <data format="tabular" name="metatable_mod"
|
|
205 label="${tool.name}:${title} Metatable_mod.txt" />
|
|
206 <data format="tabular" name="read_counts_out"
|
|
207 label="${tool.name} (${pipeline.pipeline_name}):${title} read counts">
|
|
208 <filter>pipeline['pipeline_name'] == 'Vsearch'</filter>
|
|
209 </data>
|
|
210 <data format="biom" name="tax_otu_table_biom_file"
|
|
211 label="${tool.name} (${pipeline.pipeline_name}):${title} tax OTU table (biom format)" />
|
|
212 <data format="tabular" name="otus_tre_file"
|
|
213 label="${tool.name} (${pipeline.pipeline_name}):${title} otus.tre" />
|
|
214 <data format="html" name="phylum_genus_dist_barcharts_html"
|
|
215 label="${tool.name} (${pipeline.pipeline_name}):${title} phylum genus dist barcharts HTML" />
|
|
216 <data format="tabular" name="otus_count_file"
|
|
217 label="${tool.name} (${pipeline.pipeline_name}):${title} OTUs count file" />
|
|
218 <data format="tabular" name="table_summary_file"
|
|
219 label="${tool.name} (${pipeline.pipeline_name}):${title} table summary file" />
|
|
220 <data format="fasta" name="dereplicated_nonchimera_otus_fasta"
|
|
221 label="${tool.name} (${pipeline.pipeline_name}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" />
|
|
222 <data format="html" name="fastqc_quality_boxplots_html"
|
|
223 label="${tool.name} (${pipeline.pipeline_name}):${title} FastQC per-base quality boxplots HTML" />
|
|
224 <data format="pdf" name="heatmap_otu_table_pdf"
|
|
225 label="${tool.name} (${pipeline.pipeline_name}):${title} heatmap OTU table PDF" />
|
|
226 <data format="html" name="beta_div_even_weighted_2d_plots"
|
|
227 label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity weighted 2D plots HTML" />
|
|
228 <data format="html" name="beta_div_even_unweighted_2d_plots"
|
|
229 label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" />
|
|
230 <data format="html" name="alpha_div_rarefaction_plots"
|
|
231 label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" />
|
|
232 <data format="html" name="dada2_error_rate_plots"
|
|
233 label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots">
|
|
234 <filter>pipeline['pipeline_name'] == 'DADA2'</filter>
|
|
235 </data>
|
|
236 <data format="html" name="alpha_div_boxplots"
|
|
237 label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots">
|
|
238 <filter>categories_file_in is not None</filter>
|
|
239 </data>
|
|
240 <data format="html" name="log_files"
|
|
241 label="${tool.name} (${pipeline.pipeline_name}):${title} log files" />
|
|
242 </outputs>
|
|
243 <tests>
|
|
244 </tests>
|
|
245 <help><![CDATA[
|
|
246
|
|
247 What it does
|
|
248 ------------
|
|
249
|
|
250 This pipeline has been designed for the analysis of 16S rRNA data from
|
|
251 Illumina Miseq (Casava >= 1.8) paired-end reads.
|
|
252
|
|
253 Usage
|
|
254 -----
|
|
255
|
|
256 1. Preparation of the mapping file and format of unique sample id
|
|
257 *****************************************************************
|
|
258
|
|
259 Before using the amplicon analysis pipeline it would be necessary to
|
|
260 follow the steps as below to avoid analysis failures and ensure samples
|
|
261 are labelled appropriately. Sample names for the labelling are derived
|
|
262 from the fastq files names that are generated from the sequencing. The
|
|
263 labels will include everything between the beginning of the name and
|
|
264 the sample number (from C11 to S19 in Fig. 1)
|
|
265
|
|
266 .. image:: Pipeline_description_Fig1.png
|
|
267 :height: 46
|
|
268 :width: 382
|
|
269
|
|
270 **Figure 1**
|
|
271
|
|
272 If analysing 16S data from multiple runs:
|
|
273
|
|
274 The samples from different runs may have identical IDs. For example,
|
|
275 when sequencing the same samples twice, by chance, these could be at
|
|
276 the same position in both the runs. This would cause the fastq files
|
|
277 to have exactly the same IDs (Fig. 2).
|
|
278
|
|
279 .. image:: Pipeline_description_Fig2.png
|
|
280 :height: 100
|
|
281 :width: 463
|
|
282
|
|
283 **Figure 2**
|
|
284
|
|
285 In case of identical sample IDs the pipeline will fail to run and
|
|
286 generate an error at the beginning of the analysis.
|
|
287
|
|
288 To avoid having to change the file names, before uploading the files,
|
|
289 ensure that the samples IDs are not repeated.
|
|
290
|
|
291 2. To upload the file
|
|
292 *********************
|
|
293
|
|
294 Click on **Get Data/Upload File** from the Galaxy tool panel on the
|
|
295 left hand side.
|
|
296
|
|
297 From the pop-up window, choose how to upload the file. The
|
|
298 **Choose local file** option can be used for files up to 4Gb. Fastq files
|
|
299 from Illumina MiSeq will rarely be bigger than 4Gb and this option is
|
|
300 recommended.
|
|
301
|
|
302 After choosing the files click **Start** to begin the upload. The window can
|
|
303 now be closed and the files will be uploaded onto the Galaxy server. You
|
|
304 will see the progress on the ``HISTORY`` panel on the right
|
|
305 side of the screen. The colour will change from grey (queuing), to yellow
|
|
306 (uploading) and finally green (uploaded).
|
|
307
|
|
308 Once all the files are uploaded, click on the operations on multiple
|
|
309 datasets icon and select the fastq files that need to be analysed.
|
|
310 Click on the tab **For all selected...** and on the option
|
|
311 **Build List of Dataset pairs** (Fig. 3).
|
|
312
|
|
313 .. image:: Pipeline_description_Fig3.png
|
|
314 :height: 247
|
|
315 :width: 586
|
|
316
|
|
317 **Figure 3**
|
|
318
|
|
319 Change the filter parameter ``_1`` and ``_2`` to be ``_R1`` and ``_R2``.
|
|
320 The fastq files forward R1 and reverse R2 should now appear in the
|
|
321 corresponding columns.
|
|
322
|
|
323 Select **Autopair**. This creates a collection of paired fastq files for
|
|
324 the forward and reverse reads for each sample. The name of the pairs will
|
|
325 be the ones used by the pipeline. You are free to change the names at this
|
|
326 point as long as they are the same used in the Metatable file
|
|
327 (see section 3).
|
|
328
|
|
329 Name the collection and click on **create list**. This reduces the time
|
|
330 required to input the forward and reverse reads for each individual sample.
|
|
331
|
|
332 3. Create the Metatable files
|
|
333 *****************************
|
|
334
|
|
335 Metatable.txt
|
|
336 ~~~~~~~~~~~~~
|
|
337
|
|
338 Click on the list of pairs you just created to see the name of the single
|
|
339 pairs. The name of the pairs will be the ones used by the pipeline,
|
|
340 therefore, these are the names that need to be used in the Metatable file.
|
|
341
|
|
342 The Metatable file has to be in QIIME format. You can find a description
|
|
343 of it on QIIME website http://qiime.org/documentation/file_formats.html
|
|
344
|
|
345 EXAMPLE::
|
|
346
|
|
347 #SampleID BarcodeSequence LinkerPrimerSequence Disease Gender Description
|
|
348 Mock-RUN1 TAAGGCGAGCGTAAGA PsA Male Control
|
|
349 Mock-RUN2 CGTACTAGGCGTAAGA PsA Male Control
|
|
350 Mock-RUN3 AGGCAGAAGCGTAAGA PsC Female Control
|
|
351
|
|
352 Briefly: the column ``LinkerPrimerSequence`` is empty but it cannot be
|
|
353 deleted. The header is very important. ``#SampleID``, ``Barcode``,
|
|
354 ``LinkerPrimerSequence`` and ``Description`` are mandatory. Between
|
|
355 ``LinkerPrimerSequence`` and ``Description`` you can add as many columns
|
|
356 as you want. For every column a PCoA plot will be created (see
|
|
357 **Results** section). You can create this file in Excel and it will have
|
|
358 to be saved as ``Text(Tab delimited)``.
|
|
359
|
|
360 During the analysis the Metatable.txt will be checked to ensure that the
|
|
361 file has the correct format. If necessary, this will be modified and will
|
|
362 be available as Metatable_corrected.txt in the history panel. If you are
|
|
363 going to use the metatable file for any other statistical analyses,
|
|
364 remember to use the ``Metatable_mod.txt`` one, otherwise the sample
|
|
365 names might not match!
|
|
366
|
|
367 Categories.txt (optional)
|
|
368 ~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
369
|
|
370 This file is required if you want to get box plots for comparison of
|
|
371 alpha diversity indices (see **Results** section). The file is a list
|
|
372 (without header and IN ONE COLUMN) of categories present in the
|
|
373 Metatable.txt file. THE NAMES YOU ARE USING HAVE TO BE THE SAME AS THE
|
|
374 ONES USED IN THE METATABLE.TXT. You can create this file in Excel and
|
|
375 will have to be saved as ``Text(Tab delimited)``.
|
|
376
|
|
377 EXAMPLE::
|
|
378
|
|
379 Disease
|
|
380 Gender
|
|
381
|
|
382 Metatable and categories files can be uploaded using Get Data as done
|
|
383 with the fatsq files.
|
|
384
|
|
385 4. Analysis
|
|
386 ***********
|
|
387
|
|
388 Under **Amplicon_Analysis_Pipeline**
|
|
389
|
|
390 * **Title** Name to distinguish between the runs. It will be shown at
|
|
391 the beginning of each output file name.
|
|
392
|
|
393 * **Input Metatable.txt file** Select the Metatable.txt file related to
|
|
394 this analysis
|
|
395
|
|
396 * **Input Categories.txt file (Optional)** Select the Categories.txt file
|
|
397 related to this analysis
|
|
398
|
|
399 * **Input FASTQ type** select *Dataset pairs in a collection* and, then,
|
|
400 the collection of pairs you created earlier.
|
|
401
|
|
402 * **Forward/Reverse PCR primer sequence** if the PCR primer sequences
|
|
403 have not been removed from the MiSeq during the fastq creation, they
|
|
404 have to be removed before the analysis. Insert the PCR primer sequence
|
|
405 in the corresponding field. DO NOT include any barcode or adapter
|
|
406 sequence. If the PCR primers have been already trimmed by the MiSeq,
|
|
407 and you include the sequence in this field, this would lead to an error.
|
|
408 Only include the sequences if still present in the fastq files.
|
|
409
|
|
410 * **Threshold quality below which reads will be trimmed** Choose the
|
|
411 Phred score used by Sickle to trim the reads at the 3’ end.
|
|
412
|
|
413 * **Minimum length to retain a read after trimming** If the read length
|
|
414 after trimming is shorter than a user defined length, the read, along
|
|
415 with the corresponding read pair, will be discarded.
|
|
416
|
|
417 * **Minimum overlap in bp between forward and reverse reads** Choose the
|
|
418 minimum basepair overlap used by Pandaseq to assemble the reads.
|
|
419 Default is 10.
|
|
420
|
|
421 * **Minimum length in bp to keep a sequence after overlapping** Choose the
|
|
422 minimum sequence length used by Pandaseq to keep a sequence after the
|
|
423 overlapping. This depends on the expected amplicon length. Default is
|
|
424 380 (used for V3-V4 16S sequencing; expected length ~440bp)
|
|
425
|
|
426 * **Pipeline to use for analysis** Choose the pipeline to use for OTU
|
|
427 clustering and chimera removal. The Galaxy tool supports the ``Vsearch``
|
|
428 and ``DADA2`` pipelines.
|
|
429
|
|
430 * **Reference database** Choose between ``GreenGenes``, ``Silva`` or
|
|
431 ``HOMD`` (Human Oral Microbiome Database) for taxa assignment.
|
|
432
|
|
433 Click on **Execute** to start the analysis.
|
|
434
|
|
435 5. Results
|
|
436 **********
|
|
437
|
|
438 Results are entirely generated using QIIME scripts. The results will
|
|
439 appear in the History panel when the analysis is completed.
|
|
440
|
|
441 The following outputs are captured:
|
|
442
|
|
443 * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)**
|
|
444 The OTU table in BIOM format (http://biom-format.org/)
|
|
445
|
|
446 * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py``
|
|
447 (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html)
|
|
448
|
|
449 * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at
|
|
450 Phylum, Genus and Species level
|
|
451 (http://qiime.org/scripts/summarize_taxa.html and
|
|
452 http://qiime.org/scripts/plot_taxa_summary.html)
|
|
453
|
|
454 * **OTUs_count_file** Summary of OTU counts per sample
|
|
455 (http://biom-format.org/documentation/summarizing_biom_tables.html)
|
|
456
|
|
457 * **Table_summary_file** Summary of sequences counts per sample
|
|
458 (http://biom-format.org/documentation/summarizing_biom_tables.html)
|
|
459
|
|
460 * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa**
|
|
461 Fasta file with OTU sequences (Vsearch|DADA2)
|
|
462
|
|
463 * **Heatmap_PDF** OTU heatmap in PDF format
|
|
464 (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html )
|
|
465
|
|
466 * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML
|
|
467 format using weighted Unifrac distance measure. Samples are grouped
|
|
468 by the column names present in the Metatable file. The samples are
|
|
469 firstly rarefied to the minimum sequencing depth
|
|
470 (http://qiime.org/scripts/beta_diversity_through_plots.html )
|
|
471
|
|
472 * **Vsearch_beta_diversity_unweighted_2D_plots_HTML** PCoA plots in HTML
|
|
473 format using Unweighted Unifrac distance measure. Samples are grouped
|
|
474 by the column names present in the Metatable file. The samples are
|
|
475 firstly rarefied to the minimum sequencing depth
|
|
476 (http://qiime.org/scripts/beta_diversity_through_plots.html )
|
|
477
|
|
478 Code availability
|
|
479 -----------------
|
|
480
|
|
481 **Code is available at** https://github.com/MTutino/Amplicon_analysis
|
|
482
|
|
483 Credits
|
|
484 -------
|
|
485
|
|
486 Pipeline author: Mauro Tutino
|
|
487
|
|
488 Galaxy tool: Peter Briggs
|
|
489
|
|
490 ]]></help>
|
|
491 <citations>
|
|
492 <citation type="bibtex">
|
|
493 @misc{githubAmplicon_analysis,
|
|
494 author = {Tutino, Mauro},
|
|
495 year = {2017},
|
|
496 title = {Amplicon Analysis Pipeline},
|
|
497 publisher = {GitHub},
|
|
498 journal = {GitHub repository},
|
|
499 url = {https://github.com/MTutino/Amplicon_analysis},
|
|
500 }</citation>
|
|
501 </citations>
|
|
502 </tool>
|