Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
comparison amplicon_analysis_pipeline.xml @ 42:098ad1dd7760 draft
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 10be6f00106e853a6720e4052871d9d84e027137
| author | pjbriggs |
|---|---|
| date | Thu, 05 Dec 2019 11:48:01 +0000 |
| parents | |
| children | 4bfa62618f7c |
comparison
equal
deleted
inserted
replaced
| 41:7b9786a43a16 | 42:098ad1dd7760 |
|---|---|
| 1 <tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.3.5.0"> | |
| 2 <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.3.5">amplicon_analysis_pipeline</requirement> | |
| 5 </requirements> | |
| 6 <stdio> | |
| 7 <exit_code range="1:" /> | |
| 8 </stdio> | |
| 9 <command><![CDATA[ | |
| 10 | |
| 11 ## Convenience variable for pipeline name | |
| 12 #set $pipeline_name = $pipeline.pipeline_name | |
| 13 | |
| 14 ## Set the reference database name | |
| 15 #if str( $pipeline_name ) == "DADA2" | |
| 16 #set reference_database_name = "silva" | |
| 17 #else | |
| 18 #set reference_database = $pipeline.reference_database | |
| 19 #if $reference_database == "-S" | |
| 20 #set reference_database_name = "silva" | |
| 21 #else if $reference_database == "-H" | |
| 22 #set reference_database_name = "homd" | |
| 23 #else | |
| 24 #set reference_database_name = "gg" | |
| 25 #end if | |
| 26 #end if | |
| 27 | |
| 28 ## Run the amplicon analysis pipeline wrapper | |
| 29 python $__tool_directory__/amplicon_analysis_pipeline.py | |
| 30 ## Set options | |
| 31 #if str( $forward_pcr_primer ) != "" | |
| 32 -g "$forward_pcr_primer" | |
| 33 #end if | |
| 34 #if str( $reverse_pcr_primer ) != "" | |
| 35 -G "$reverse_pcr_primer" | |
| 36 #end if | |
| 37 #if str( $trimming_threshold ) != "" | |
| 38 -q $trimming_threshold | |
| 39 #end if | |
| 40 #if str( $sliding_window_length ) != "" | |
| 41 -l $sliding_window_length | |
| 42 #end if | |
| 43 #if str( $minimum_overlap ) != "" | |
| 44 -O $minimum_overlap | |
| 45 #end if | |
| 46 #if str( $minimum_length ) != "" | |
| 47 -L $minimum_length | |
| 48 #end if | |
| 49 -P $pipeline_name | |
| 50 -r \${AMPLICON_ANALYSIS_REF_DATA_PATH-ReferenceData} | |
| 51 #if str( $pipeline_name ) != "DADA2" | |
| 52 ${reference_database} | |
| 53 #end if | |
| 54 #if str($categories_file_in) != 'None' | |
| 55 -c "${categories_file_in}" | |
| 56 #end if | |
| 57 ## Input files | |
| 58 "${metatable_file_in}" | |
| 59 ## FASTQ pairs | |
| 60 #if str($input_type.pairs_or_collection) == "collection" | |
| 61 #set fastq_pairs = $input_type.fastq_collection | |
| 62 #else | |
| 63 #set fastq_pairs = $input_type.fastq_pairs | |
| 64 #end if | |
| 65 #for $fq_pair in $fastq_pairs | |
| 66 "${fq_pair.name}" "${fq_pair.forward}" "${fq_pair.reverse}" | |
| 67 #end for | |
| 68 && | |
| 69 | |
| 70 ## Collect outputs | |
| 71 cp Metatable_log/Metatable_mod.txt "${metatable_mod}" && | |
| 72 #if str( $pipeline_name ) == "Vsearch" | |
| 73 # Vsearch-specific | |
| 74 cp ${pipeline_name}_OTU_tables/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_tax_OTU_table.biom "${tax_otu_table_biom_file}" && | |
| 75 cp Multiplexed_files/${pipeline_name}_pipeline/multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta "${dereplicated_nonchimera_otus_fasta}" && | |
| 76 cp QUALITY_CONTROL/Reads_count.txt "$read_counts_out" && | |
| 77 #else | |
| 78 # DADA2-specific | |
| 79 cp ${pipeline_name}_OTU_tables/DADA2_tax_OTU_table.biom "${tax_otu_table_biom_file}" && | |
| 80 cp ${pipeline_name}_OTU_tables/seqs.fa "${dereplicated_nonchimera_otus_fasta}" && | |
| 81 #end if | |
| 82 cp ${pipeline_name}_OTU_tables/otus.tre "${otus_tre_file}" && | |
| 83 cp RESULTS/${pipeline_name}_${reference_database_name}/OTUs_count.txt "${otus_count_file}" && | |
| 84 cp RESULTS/${pipeline_name}_${reference_database_name}/table_summary.txt "${table_summary_file}" && | |
| 85 cp fastqc_quality_boxplots.html "${fastqc_quality_boxplots_html}" && | |
| 86 | |
| 87 ## OTU table heatmap | |
| 88 cp RESULTS/${pipeline_name}_${reference_database_name}/Heatmap.pdf "${heatmap_otu_table_pdf}"" && | |
| 89 | |
| 90 ## HTML outputs | |
| 91 | |
| 92 ## Phylum genus barcharts | |
| 93 mkdir $phylum_genus_dist_barcharts_html.files_path && | |
| 94 cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/charts $phylum_genus_dist_barcharts_html.files_path && | |
| 95 cp -r RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/raw_data $phylum_genus_dist_barcharts_html.files_path && | |
| 96 cp RESULTS/${pipeline_name}_${reference_database_name}/phylum_genus_charts/bar_charts.html "${phylum_genus_dist_barcharts_html}" && | |
| 97 | |
| 98 ## Beta diversity weighted 2d plots | |
| 99 mkdir $beta_div_even_weighted_2d_plots.files_path && | |
| 100 cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/* $beta_div_even_weighted_2d_plots.files_path && | |
| 101 cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/weighted_2d_plot/weighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_weighted_2d_plots}" && | |
| 102 | |
| 103 ## Beta diversity unweighted 2d plots | |
| 104 mkdir $beta_div_even_unweighted_2d_plots.files_path && | |
| 105 cp -r RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/* $beta_div_even_unweighted_2d_plots.files_path && | |
| 106 cp RESULTS/${pipeline_name}_${reference_database_name}/beta_div_even/unweighted_2d_plot/unweighted_unifrac_pc_2D_PCoA_plots.html "${beta_div_even_unweighted_2d_plots}" && | |
| 107 | |
| 108 ## Alpha diversity rarefaction plots | |
| 109 mkdir $alpha_div_rarefaction_plots.files_path && | |
| 110 cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/rarefaction_plots.html $alpha_div_rarefaction_plots && | |
| 111 cp -r RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/rarefaction_curves/average_plots $alpha_div_rarefaction_plots.files_path && | |
| 112 | |
| 113 ## DADA2 error rate plots | |
| 114 #if str($pipeline_name) == "DADA2" | |
| 115 mkdir $dada2_error_rate_plots.files_path && | |
| 116 cp DADA2_OTU_tables/Error_rate_plots/error_rate_plots.html $dada2_error_rate_plots && | |
| 117 cp -r DADA2_OTU_tables/Error_rate_plots/*.pdf $dada2_error_rate_plots.files_path && | |
| 118 #end if | |
| 119 | |
| 120 ## Categories data | |
| 121 #if str($categories_file_in) != 'None' | |
| 122 ## Alpha diversity boxplots | |
| 123 mkdir $alpha_div_boxplots.files_path && | |
| 124 cp alpha_diversity_boxplots.html "$alpha_div_boxplots" && | |
| 125 cp RESULTS/${pipeline_name}_${reference_database_name}/Alpha_diversity/Alpha_diversity_boxplot/Categories_shannon/*.pdf $alpha_div_boxplots.files_path && | |
| 126 #end if | |
| 127 | |
| 128 ## Pipeline outputs (log files etc) | |
| 129 mkdir $log_files.files_path && | |
| 130 cp Amplicon_analysis_pipeline.log $log_files.files_path && | |
| 131 cp pipeline.log $log_files.files_path && | |
| 132 cp Pipeline_outputs.txt $log_files.files_path && | |
| 133 cp Metatable_log/Metatable.html $log_files.files_path && | |
| 134 cp pipeline_outputs.html "$log_files" | |
| 135 ]]></command> | |
| 136 <inputs> | |
| 137 <param name="title" type="text" value="test" size="25" | |
| 138 label="Title" help="Optional text that will be added to the output dataset names" /> | |
| 139 <param type="data" name="metatable_file_in" format="tabular" | |
| 140 label="Input Metatable.txt file" /> | |
| 141 <param type="data" name="categories_file_in" format="txt" | |
| 142 label="Input Categories.txt file" optional="true" | |
| 143 help="(optional)" /> | |
| 144 <conditional name="input_type"> | |
| 145 <param name="pairs_or_collection" type="select" | |
| 146 label="Input FASTQ type"> | |
| 147 <option value="pairs_of_files">Pairs of datasets</option> | |
| 148 <option value="collection" selected="true">Dataset pairs in a collection</option> | |
| 149 </param> | |
| 150 <when value="collection"> | |
| 151 <param name="fastq_collection" type="data_collection" | |
| 152 format="fastqsanger,fastq" collection_type="list:paired" | |
| 153 label="Collection of FASTQ forward and reverse (R1/R2) pairs" | |
| 154 help="Each FASTQ pair will be treated as one sample; the name of each sample will be taken from the first column of the Metatable file " /> | |
| 155 </when> | |
| 156 <when value="pairs_of_files"> | |
| 157 <repeat name="fastq_pairs" title="Input fastq pairs" min="1"> | |
| 158 <param type="text" name="name" value="" | |
| 159 label="Final name for FASTQ pair" /> | |
| 160 <param type="data" name="fastq_r1" format="fastqsanger,fastq" | |
| 161 label="FASTQ with forward reads (R1)" /> | |
| 162 <param type="data" name="fastq_r2" format="fastqsanger,fastq" | |
| 163 label="FASTQ with reverse reads (R2)" /> | |
| 164 </repeat> | |
| 165 </when> | |
| 166 </conditional> | |
| 167 <param type="text" name="forward_pcr_primer" value="" | |
| 168 label="Forward PCR primer sequence" | |
| 169 help="Optional; must not include barcode or adapter sequence (-g)" /> | |
| 170 <param type="text" name="reverse_pcr_primer" value="" | |
| 171 label="Reverse PCR primer sequence" | |
| 172 help="Optional; must not include barcode or adapter sequence (-G)" /> | |
| 173 <param type="integer" name="trimming_threshold" value="20" | |
| 174 label="Threshold quality below which read will be trimmed" | |
| 175 help="Phred score; default is 20 (-q)" /> | |
| 176 <param type="integer" name="minimum_overlap" value="10" | |
| 177 label="Minimum overlap in bp between forward and reverse reads" | |
| 178 help="Default is 10 (-O)" /> | |
| 179 <param type="integer" name="minimum_length" value="200" | |
| 180 label="Minimum length in bp to keep sequence after overlapping" | |
| 181 help="Default is 200 (-L)" /> | |
| 182 <param type="integer" name="sliding_window_length" value="10" | |
| 183 label="Minimum length in bp to retain a read after trimming" | |
| 184 help="Supplied to Sickle; default is 10 (-l)" /> | |
| 185 <conditional name="pipeline"> | |
| 186 <param type="select" name="pipeline_name" | |
| 187 label="Pipeline to use for analysis"> | |
| 188 <option value="Vsearch" selected="true" >Vsearch</option> | |
| 189 <option value="DADA2">DADA2</option> | |
| 190 </param> | |
| 191 <when value="Vsearch"> | |
| 192 <param type="select" name="reference_database" | |
| 193 label="Reference database"> | |
| 194 <option value="" selected="true">GreenGenes</option> | |
| 195 <option value="-S">Silva</option> | |
| 196 <option value="-H">Human Oral Microbiome Database (HOMD)</option> | |
| 197 </param> | |
| 198 </when> | |
| 199 <when value="DADA2"> | |
| 200 </when> | |
| 201 </conditional> | |
| 202 </inputs> | |
| 203 <outputs> | |
| 204 <data format="tabular" name="metatable_mod" | |
| 205 label="${tool.name}:${title} Metatable_mod.txt" /> | |
| 206 <data format="tabular" name="read_counts_out" | |
| 207 label="${tool.name} (${pipeline.pipeline_name}):${title} read counts"> | |
| 208 <filter>pipeline['pipeline_name'] == 'Vsearch'</filter> | |
| 209 </data> | |
| 210 <data format="biom" name="tax_otu_table_biom_file" | |
| 211 label="${tool.name} (${pipeline.pipeline_name}):${title} tax OTU table (biom format)" /> | |
| 212 <data format="tabular" name="otus_tre_file" | |
| 213 label="${tool.name} (${pipeline.pipeline_name}):${title} otus.tre" /> | |
| 214 <data format="html" name="phylum_genus_dist_barcharts_html" | |
| 215 label="${tool.name} (${pipeline.pipeline_name}):${title} phylum genus dist barcharts HTML" /> | |
| 216 <data format="tabular" name="otus_count_file" | |
| 217 label="${tool.name} (${pipeline.pipeline_name}):${title} OTUs count file" /> | |
| 218 <data format="tabular" name="table_summary_file" | |
| 219 label="${tool.name} (${pipeline.pipeline_name}):${title} table summary file" /> | |
| 220 <data format="fasta" name="dereplicated_nonchimera_otus_fasta" | |
| 221 label="${tool.name} (${pipeline.pipeline_name}):${title} multiplexed linearized dereplicated mc2 repset nonchimeras OTUs FASTA" /> | |
| 222 <data format="html" name="fastqc_quality_boxplots_html" | |
| 223 label="${tool.name} (${pipeline.pipeline_name}):${title} FastQC per-base quality boxplots HTML" /> | |
| 224 <data format="pdf" name="heatmap_otu_table_pdf" | |
| 225 label="${tool.name} (${pipeline.pipeline_name}):${title} heatmap OTU table PDF" /> | |
| 226 <data format="html" name="beta_div_even_weighted_2d_plots" | |
| 227 label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity weighted 2D plots HTML" /> | |
| 228 <data format="html" name="beta_div_even_unweighted_2d_plots" | |
| 229 label="${tool.name} (${pipeline.pipeline_name}):${title} beta diversity unweighted 2D plots HTML" /> | |
| 230 <data format="html" name="alpha_div_rarefaction_plots" | |
| 231 label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity rarefaction plots HTML" /> | |
| 232 <data format="html" name="dada2_error_rate_plots" | |
| 233 label="${tool.name} (${pipeline.pipeline_name}):${title} DADA2 error rate plots"> | |
| 234 <filter>pipeline['pipeline_name'] == 'DADA2'</filter> | |
| 235 </data> | |
| 236 <data format="html" name="alpha_div_boxplots" | |
| 237 label="${tool.name} (${pipeline.pipeline_name}):${title} alpha diversity boxplots"> | |
| 238 <filter>categories_file_in is not None</filter> | |
| 239 </data> | |
| 240 <data format="html" name="log_files" | |
| 241 label="${tool.name} (${pipeline.pipeline_name}):${title} log files" /> | |
| 242 </outputs> | |
| 243 <tests> | |
| 244 </tests> | |
| 245 <help><![CDATA[ | |
| 246 | |
| 247 What it does | |
| 248 ------------ | |
| 249 | |
| 250 This pipeline has been designed for the analysis of 16S rRNA data from | |
| 251 Illumina Miseq (Casava >= 1.8) paired-end reads. | |
| 252 | |
| 253 Usage | |
| 254 ----- | |
| 255 | |
| 256 1. Preparation of the mapping file and format of unique sample id | |
| 257 ***************************************************************** | |
| 258 | |
| 259 Before using the amplicon analysis pipeline it would be necessary to | |
| 260 follow the steps as below to avoid analysis failures and ensure samples | |
| 261 are labelled appropriately. Sample names for the labelling are derived | |
| 262 from the fastq files names that are generated from the sequencing. The | |
| 263 labels will include everything between the beginning of the name and | |
| 264 the sample number (from C11 to S19 in Fig. 1) | |
| 265 | |
| 266 .. image:: Pipeline_description_Fig1.png | |
| 267 :height: 46 | |
| 268 :width: 382 | |
| 269 | |
| 270 **Figure 1** | |
| 271 | |
| 272 If analysing 16S data from multiple runs: | |
| 273 | |
| 274 The samples from different runs may have identical IDs. For example, | |
| 275 when sequencing the same samples twice, by chance, these could be at | |
| 276 the same position in both the runs. This would cause the fastq files | |
| 277 to have exactly the same IDs (Fig. 2). | |
| 278 | |
| 279 .. image:: Pipeline_description_Fig2.png | |
| 280 :height: 100 | |
| 281 :width: 463 | |
| 282 | |
| 283 **Figure 2** | |
| 284 | |
| 285 In case of identical sample IDs the pipeline will fail to run and | |
| 286 generate an error at the beginning of the analysis. | |
| 287 | |
| 288 To avoid having to change the file names, before uploading the files, | |
| 289 ensure that the samples IDs are not repeated. | |
| 290 | |
| 291 2. To upload the file | |
| 292 ********************* | |
| 293 | |
| 294 Click on **Get Data/Upload File** from the Galaxy tool panel on the | |
| 295 left hand side. | |
| 296 | |
| 297 From the pop-up window, choose how to upload the file. The | |
| 298 **Choose local file** option can be used for files up to 4Gb. Fastq files | |
| 299 from Illumina MiSeq will rarely be bigger than 4Gb and this option is | |
| 300 recommended. | |
| 301 | |
| 302 After choosing the files click **Start** to begin the upload. The window can | |
| 303 now be closed and the files will be uploaded onto the Galaxy server. You | |
| 304 will see the progress on the ``HISTORY`` panel on the right | |
| 305 side of the screen. The colour will change from grey (queuing), to yellow | |
| 306 (uploading) and finally green (uploaded). | |
| 307 | |
| 308 Once all the files are uploaded, click on the operations on multiple | |
| 309 datasets icon and select the fastq files that need to be analysed. | |
| 310 Click on the tab **For all selected...** and on the option | |
| 311 **Build List of Dataset pairs** (Fig. 3). | |
| 312 | |
| 313 .. image:: Pipeline_description_Fig3.png | |
| 314 :height: 247 | |
| 315 :width: 586 | |
| 316 | |
| 317 **Figure 3** | |
| 318 | |
| 319 Change the filter parameter ``_1`` and ``_2`` to be ``_R1`` and ``_R2``. | |
| 320 The fastq files forward R1 and reverse R2 should now appear in the | |
| 321 corresponding columns. | |
| 322 | |
| 323 Select **Autopair**. This creates a collection of paired fastq files for | |
| 324 the forward and reverse reads for each sample. The name of the pairs will | |
| 325 be the ones used by the pipeline. You are free to change the names at this | |
| 326 point as long as they are the same used in the Metatable file | |
| 327 (see section 3). | |
| 328 | |
| 329 Name the collection and click on **create list**. This reduces the time | |
| 330 required to input the forward and reverse reads for each individual sample. | |
| 331 | |
| 332 3. Create the Metatable files | |
| 333 ***************************** | |
| 334 | |
| 335 Metatable.txt | |
| 336 ~~~~~~~~~~~~~ | |
| 337 | |
| 338 Click on the list of pairs you just created to see the name of the single | |
| 339 pairs. The name of the pairs will be the ones used by the pipeline, | |
| 340 therefore, these are the names that need to be used in the Metatable file. | |
| 341 | |
| 342 The Metatable file has to be in QIIME format. You can find a description | |
| 343 of it on QIIME website http://qiime.org/documentation/file_formats.html | |
| 344 | |
| 345 EXAMPLE:: | |
| 346 | |
| 347 #SampleID BarcodeSequence LinkerPrimerSequence Disease Gender Description | |
| 348 Mock-RUN1 TAAGGCGAGCGTAAGA PsA Male Control | |
| 349 Mock-RUN2 CGTACTAGGCGTAAGA PsA Male Control | |
| 350 Mock-RUN3 AGGCAGAAGCGTAAGA PsC Female Control | |
| 351 | |
| 352 Briefly: the column ``LinkerPrimerSequence`` is empty but it cannot be | |
| 353 deleted. The header is very important. ``#SampleID``, ``Barcode``, | |
| 354 ``LinkerPrimerSequence`` and ``Description`` are mandatory. Between | |
| 355 ``LinkerPrimerSequence`` and ``Description`` you can add as many columns | |
| 356 as you want. For every column a PCoA plot will be created (see | |
| 357 **Results** section). You can create this file in Excel and it will have | |
| 358 to be saved as ``Text(Tab delimited)``. | |
| 359 | |
| 360 During the analysis the Metatable.txt will be checked to ensure that the | |
| 361 file has the correct format. If necessary, this will be modified and will | |
| 362 be available as Metatable_corrected.txt in the history panel. If you are | |
| 363 going to use the metatable file for any other statistical analyses, | |
| 364 remember to use the ``Metatable_mod.txt`` one, otherwise the sample | |
| 365 names might not match! | |
| 366 | |
| 367 Categories.txt (optional) | |
| 368 ~~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 369 | |
| 370 This file is required if you want to get box plots for comparison of | |
| 371 alpha diversity indices (see **Results** section). The file is a list | |
| 372 (without header and IN ONE COLUMN) of categories present in the | |
| 373 Metatable.txt file. THE NAMES YOU ARE USING HAVE TO BE THE SAME AS THE | |
| 374 ONES USED IN THE METATABLE.TXT. You can create this file in Excel and | |
| 375 will have to be saved as ``Text(Tab delimited)``. | |
| 376 | |
| 377 EXAMPLE:: | |
| 378 | |
| 379 Disease | |
| 380 Gender | |
| 381 | |
| 382 Metatable and categories files can be uploaded using Get Data as done | |
| 383 with the fatsq files. | |
| 384 | |
| 385 4. Analysis | |
| 386 *********** | |
| 387 | |
| 388 Under **Amplicon_Analysis_Pipeline** | |
| 389 | |
| 390 * **Title** Name to distinguish between the runs. It will be shown at | |
| 391 the beginning of each output file name. | |
| 392 | |
| 393 * **Input Metatable.txt file** Select the Metatable.txt file related to | |
| 394 this analysis | |
| 395 | |
| 396 * **Input Categories.txt file (Optional)** Select the Categories.txt file | |
| 397 related to this analysis | |
| 398 | |
| 399 * **Input FASTQ type** select *Dataset pairs in a collection* and, then, | |
| 400 the collection of pairs you created earlier. | |
| 401 | |
| 402 * **Forward/Reverse PCR primer sequence** if the PCR primer sequences | |
| 403 have not been removed from the MiSeq during the fastq creation, they | |
| 404 have to be removed before the analysis. Insert the PCR primer sequence | |
| 405 in the corresponding field. DO NOT include any barcode or adapter | |
| 406 sequence. If the PCR primers have been already trimmed by the MiSeq, | |
| 407 and you include the sequence in this field, this would lead to an error. | |
| 408 Only include the sequences if still present in the fastq files. | |
| 409 | |
| 410 * **Threshold quality below which reads will be trimmed** Choose the | |
| 411 Phred score used by Sickle to trim the reads at the 3’ end. | |
| 412 | |
| 413 * **Minimum length to retain a read after trimming** If the read length | |
| 414 after trimming is shorter than a user defined length, the read, along | |
| 415 with the corresponding read pair, will be discarded. | |
| 416 | |
| 417 * **Minimum overlap in bp between forward and reverse reads** Choose the | |
| 418 minimum basepair overlap used by Pandaseq to assemble the reads. | |
| 419 Default is 10. | |
| 420 | |
| 421 * **Minimum length in bp to keep a sequence after overlapping** Choose the | |
| 422 minimum sequence length used by Pandaseq to keep a sequence after the | |
| 423 overlapping. This depends on the expected amplicon length. Default is | |
| 424 380 (used for V3-V4 16S sequencing; expected length ~440bp) | |
| 425 | |
| 426 * **Pipeline to use for analysis** Choose the pipeline to use for OTU | |
| 427 clustering and chimera removal. The Galaxy tool supports the ``Vsearch`` | |
| 428 and ``DADA2`` pipelines. | |
| 429 | |
| 430 * **Reference database** Choose between ``GreenGenes``, ``Silva`` or | |
| 431 ``HOMD`` (Human Oral Microbiome Database) for taxa assignment. | |
| 432 | |
| 433 Click on **Execute** to start the analysis. | |
| 434 | |
| 435 5. Results | |
| 436 ********** | |
| 437 | |
| 438 Results are entirely generated using QIIME scripts. The results will | |
| 439 appear in the History panel when the analysis is completed. | |
| 440 | |
| 441 The following outputs are captured: | |
| 442 | |
| 443 * **Vsearch_tax_OTU_table.biom|DADA2_tax_OTU_table.biom (biom format)** | |
| 444 The OTU table in BIOM format (http://biom-format.org/) | |
| 445 | |
| 446 * **otus.tre** Phylogenetic tree constructed using ``make_phylogeny.py`` | |
| 447 (fasttree) QIIME script (http://qiime.org/scripts/make_phylogeny.html) | |
| 448 | |
| 449 * **Phylum_genus_dist_barcharts_HTML** HTML file with bar charts at | |
| 450 Phylum, Genus and Species level | |
| 451 (http://qiime.org/scripts/summarize_taxa.html and | |
| 452 http://qiime.org/scripts/plot_taxa_summary.html) | |
| 453 | |
| 454 * **OTUs_count_file** Summary of OTU counts per sample | |
| 455 (http://biom-format.org/documentation/summarizing_biom_tables.html) | |
| 456 | |
| 457 * **Table_summary_file** Summary of sequences counts per sample | |
| 458 (http://biom-format.org/documentation/summarizing_biom_tables.html) | |
| 459 | |
| 460 * **multiplexed_linearized_dereplicated_mc2_repset_nonchimeras_OTUs.fasta|seqs.fa** | |
| 461 Fasta file with OTU sequences (Vsearch|DADA2) | |
| 462 | |
| 463 * **Heatmap_PDF** OTU heatmap in PDF format | |
| 464 (http://qiime.org/1.8.0/scripts/make_otu_heatmap_html.html ) | |
| 465 | |
| 466 * **Vsearch_beta_diversity_weighted_2D_plots_HTML** PCoA plots in HTML | |
| 467 format using weighted Unifrac distance measure. Samples are grouped | |
| 468 by the column names present in the Metatable file. The samples are | |
| 469 firstly rarefied to the minimum sequencing depth | |
| 470 (http://qiime.org/scripts/beta_diversity_through_plots.html ) | |
| 471 | |
| 472 * **Vsearch_beta_diversity_unweighted_2D_plots_HTML** PCoA plots in HTML | |
| 473 format using Unweighted Unifrac distance measure. Samples are grouped | |
| 474 by the column names present in the Metatable file. The samples are | |
| 475 firstly rarefied to the minimum sequencing depth | |
| 476 (http://qiime.org/scripts/beta_diversity_through_plots.html ) | |
| 477 | |
| 478 Code availability | |
| 479 ----------------- | |
| 480 | |
| 481 **Code is available at** https://github.com/MTutino/Amplicon_analysis | |
| 482 | |
| 483 Credits | |
| 484 ------- | |
| 485 | |
| 486 Pipeline author: Mauro Tutino | |
| 487 | |
| 488 Galaxy tool: Peter Briggs | |
| 489 | |
| 490 ]]></help> | |
| 491 <citations> | |
| 492 <citation type="bibtex"> | |
| 493 @misc{githubAmplicon_analysis, | |
| 494 author = {Tutino, Mauro}, | |
| 495 year = {2017}, | |
| 496 title = {Amplicon Analysis Pipeline}, | |
| 497 publisher = {GitHub}, | |
| 498 journal = {GitHub repository}, | |
| 499 url = {https://github.com/MTutino/Amplicon_analysis}, | |
| 500 }</citation> | |
| 501 </citations> | |
| 502 </tool> |
