Mercurial > repos > matthias > data_manager_dada2
comparison data_manager/dada2_fetcher.xml @ 3:3a4ee8bf012a draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/data_managers/data_manager_dada2 commit 5b1603bbcd3f139cad5c876be83fcb39697b5613-dirty
| author | matthias |
|---|---|
| date | Tue, 09 Apr 2019 07:18:57 -0400 |
| parents | b4c303665291 |
| children | 51b90d282704 |
comparison
equal
deleted
inserted
replaced
| 2:b4c303665291 | 3:3a4ee8bf012a |
|---|---|
| 1 <?xml version="1.0"?> | 1 <?xml version="1.0"?> |
| 2 <tool id="dada_fetcher" name="dada2 dada manager" tool_type="manage_data" version="0.0.5"> | 2 <tool id="dada_fetcher" name="dada2 data manager" version="0.0.5"> |
| 3 <description>Download reference data sets</description> | 3 <description>Download reference databases</description> |
| 4 <command detect_errors="exit_code"> | 4 <command detect_errors="exit_code"><![CDATA[ |
| 5 <![CDATA[ | 5 python '$__tool_directory__/data_manager.py' |
| 6 python '$__tool_directory__/data_manager.py' | |
| 7 --out '${out_file}' | 6 --out '${out_file}' |
| 8 --dataset '$database_name' | 7 --dataset '$db_cond.db_select'_'$db_cond.version_select' |
| 9 ]]> | 8 ]]> |
| 10 </command> | 9 </command> |
| 11 <inputs> | 10 <inputs> |
| 12 <param name="database_name" type="select" label="mapping data"> | 11 <conditional name="db_cond"> |
| 13 <option value="silva132">Silva version 132</option> | 12 <param name="db_select" type="select" label="Taxonomic database"> |
| 14 <option value="silva128">Silva version 128</option> | 13 <option value="silva">Silva</option> |
| 15 <option value="rdp16">RDP trainset 16</option> | 14 <option value="rdp">RDP</option> |
| 16 <option value="rdp14">RDP trainset 14</option> | 15 <option value="greengenes">GreenGenes</option> |
| 17 <option value="gg13.84">GreenGenes version 13.84</option> | 16 <option value="unite">UNITE: General Fasta</option> |
| 18 <option value="unite8.0_fungi">UNITE: General Fasta release 8.0 for Fungi</option> | 17 <option value="RefSeq_RDP">NCBI RefSeq 16S rRNA database supplemented by RDP</option> |
| 19 <option value="unite8.0_fungi_singletons">UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons</option> | 18 <option value="gtdb">GTDB: Genome Taxonomy Database (Bacteria & Archaea)</option> |
| 20 <option value="unite8.0_euka">UNITE: General Fasta release 8.0 for all Eukaryotes</option> | 19 <option value="hitdb">HitDB (Human InTestinal 16S)</option> |
| 21 <option value="unite8.0_euka_singletons">UNITE: General Fasta release 8.0 for all Eukaryotes including global and 97% singletons</option> | 20 <option value="silva_euk_18S">Silva Eukaryotic 18S</option> |
| 22 <option value="RefSeq_RDP_2018_05">NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)</option> | 21 <option value="PR2">Protist Ribosomal Reference database (PR2)</option> |
| 23 <option value="gtdb_2018_11_20">GTDB: Genome Taxonomy Database (Bacteria & Archaea) (11/2018)</option> | 22 </param> |
| 24 <option value="hitdb1">HitDB version 1 (Human InTestinal 16S)</option> | 23 <when value="silva"> |
| 25 <option value="silva132_euk_18S">Silva version 132 Eukaryotic 18S</option> | 24 <param name="version_select" type="select" label="Database version"> |
| 26 <option value="PR2v4.11.1">Protist Ribosomal Reference database (PR2) 4.11.1</option> | 25 <option value="132">132</option> |
| 27 </param> | 26 <option value="128">128</option> |
| 27 </param> | |
| 28 </when> | |
| 29 <when value="rdp"> | |
| 30 <param name="version_select" type="select" label="Database version"> | |
| 31 <option value="16">16</option> | |
| 32 <option value="14">14</option> | |
| 33 </param> | |
| 34 </when> | |
| 35 <when value="greengenes"> | |
| 36 <param name="version_select" type="select" label="Database version"> | |
| 37 <option value="13.84">13.84</option> | |
| 38 </param> | |
| 39 </when> | |
| 40 <when value="unite"> | |
| 41 <param name="version_select" type="select" label="Database version"> | |
| 42 <option value="8.0_fungi">release 8.0 for Fungi</option> | |
| 43 <option value="8.0_fungi_singleton">release 8.0 for Fungi including global and 97% singletons</option> | |
| 44 </param> | |
| 45 </when> | |
| 46 <when value="RefSeq_RDP"> | |
| 47 <param name="version_select" type="select" label="Database version"> | |
| 48 <option value="2018_05">05/2018</option> | |
| 49 </param> | |
| 50 </when> | |
| 51 <when value="gtdb"> | |
| 52 <param name="version_select" type="select" label="Database version"> | |
| 53 <option value="2018_11">11/2018</option> | |
| 54 </param> | |
| 55 </when> | |
| 56 <when value="hitdb"> | |
| 57 <param name="version_select" type="select" label="Database version"> | |
| 58 <option value="1">1</option> | |
| 59 </param> | |
| 60 </when> | |
| 61 <when value="silva_euk_18S"> | |
| 62 <param name="version_select" type="select" label="Database version"> | |
| 63 <option value="132">132</option> | |
| 64 </param> | |
| 65 </when> | |
| 66 <when value="PR2"> | |
| 67 <param name="version_select" type="select" label="Database version"> | |
| 68 <option value="4.11.1">4.11.1</option> | |
| 69 </param> | |
| 70 </when> | |
| 71 </conditional> | |
| 28 </inputs> | 72 </inputs> |
| 29 <outputs> | 73 <outputs> |
| 30 <data name="out_file" format="data_manager_json" /> | 74 <data name="out_file" format="data_manager_json" /> |
| 31 </outputs> | 75 </outputs> |
| 32 <tests> | 76 <tests> |
| 33 <test> | 77 <test> |
| 34 <param name="database_name" value="silva132"/> | 78 <param name="db_cond|db_select" value="silva"/> |
| 79 <param name="db_cond|db_version" value="132"/> | |
| 35 <output name="out_file" file="silva132_json"/> | 80 <output name="out_file" file="silva132_json"/> |
| 81 </test> | |
| 82 <test> | |
| 83 <param name="db_cond|db_select" value="rdp"/> | |
| 84 <param name="db_cond|db_version" value="16"/> | |
| 85 <output name="out_file" file="rdp16_json"/> | |
| 86 </test> | |
| 87 <test> | |
| 88 <param name="db_cond|db_select" value="greengenes"/> | |
| 89 <param name="db_cond|db_version" value="13.84"/> | |
| 90 <output name="out_file" file="greengenes13.84_json"/> | |
| 91 </test> | |
| 92 <test> | |
| 93 <param name="db_cond|db_select" value="unite"/> | |
| 94 <param name="db_cond|db_version" value="8.0_fungi"/> | |
| 95 <output name="out_file" file="unite8fungi_json"/> | |
| 96 </test> | |
| 97 <test> | |
| 98 <param name="db_cond|db_select" value="RefSeq_RDP"/> | |
| 99 <param name="db_cond|db_version" value="2018_05"/> | |
| 100 <output name="out_file" file="RefSeq_RDP2018_json"/> | |
| 101 </test> | |
| 102 <test> | |
| 103 <param name="db_cond|db_select" value="gtdb"/> | |
| 104 <param name="db_cond|db_version" value="2018_11"/> | |
| 105 <output name="out_file" file="gtdb2018_json"/> | |
| 106 </test> | |
| 107 <test> | |
| 108 <param name="db_cond|db_select" value="hitdb"/> | |
| 109 <param name="db_cond|db_version" value="1"/> | |
| 110 <output name="out_file" file="hitdb1_json"/> | |
| 111 </test> | |
| 112 <test> | |
| 113 <param name="db_cond|db_select" value="silva_euk_18S"/> | |
| 114 <param name="db_cond|db_version" value="132"/> | |
| 115 <output name="out_file" file="silvaeuk132_json"/> | |
| 116 </test> | |
| 117 <test> | |
| 118 <param name="db_cond|db_select" value="PR2"/> | |
| 119 <param name="db_cond|db_version" value="4.11.1"/> | |
| 120 <output name="out_file" file="PR24.11.1_json"/> | |
| 36 </test> | 121 </test> |
| 37 </tests> | 122 </tests> |
| 38 <help><![CDATA[ | 123 <help><![CDATA[ |
| 39 Public Reference data sets | 124 Public Reference databases maintained by the DADA2 project |
| 40 -------------------------- | 125 .......................................................... |
| 41 | 126 |
| 42 The following 16S data sets are taken from the list of data sets maintained by the DADA2 project (https://benjjneb.github.io/dada2/training.html) | 127 The following refrence databases which are describes as maintained by the DADA2 project (https://benjjneb.github.io/dada2/training.html) are available |
| 43 | 128 |
| 44 - Silva version 132 | 129 - Silva (https://www.arb-silva.de/) |
| 45 - Silva version 128 | 130 - RDP (http://rdp.cme.msu.edu/) |
| 46 - RDP trainset 16 + RDP database release 11.5</option> | 131 - GreenGenes (http://greengenes.secondgenome.com/) |
| 47 - RDP trainset 14 | 132 - UNITE general FASTA (https://unite.ut.ee/repository.php) |
| 48 - GreenGenes version 13.8 | |
| 49 | 133 |
| 50 While the Silva and RDP data sets contain reference data bases for taxonomy and species assignment, the greengenes data set only contains a reference data base for taxonomy assignment. | 134 While Silva and RDP contain reference databases for taxonomy and species assignment, the greengenes and UNITE databases only contains a reference database for taxonomy assignment. |
| 51 | 135 |
| 52 For the Silva data sets consider to check the license information: http://www.arb-silva.de/silva-license-information. | 136 For the Silva databases check the license information: http://www.arb-silva.de/silva-license-information. |
| 53 | 137 |
| 138 Except for UNITE all reference databases are downloaded from the corresponding zenodo links that are listed on the DADA2 website. The UNITE databases are taken from the links provided on the UNITE website | |
| 54 | 139 |
| 140 More detailed informations in the reference data bases can be found on the DADA2 website and contained links: https://benjjneb.github.io/dada2/training.html. | |
| 55 | 141 |
| 56 (More info: http://gtdb.ecogenomic.org/) | 142 Further public Reference databases listed by the DADA2 project |
| 143 .............................................................. | |
| 57 | 144 |
| 58 https://github.com/pr2database/pr2database | 145 Several contributed reference databases are listed of the DADA2 project website (https://benjjneb.github.io/dada2/training.html): |
| 59 | 146 |
| 147 - RefSeq + RDP (NCBI RefSeq 16S rRNA database supplemented by RDP) | |
| 148 - GTDB: Genome Taxonomy Database (More info: http://gtdb.ecogenomic.org/) | |
| 149 - HitDB version 1 (Human InTestinal 16S rRNA) (https://github.com/microbiome/HITdb) | |
| 150 - RDP fungi LSU | |
| 151 - Silva Eukaryotic 18S | |
| 152 - PR2 (https://github.com/pr2database/pr2database) | |
| 60 | 153 |
| 61 Custom Reference data sets | 154 Except for PR2, all reference databases are downloaded from the corresponding zenodo links that are listed on the DADA2 website. The PR2 database is taken from their github page. |
| 62 -------------------------- | |
| 63 | 155 |
| 64 For ** taxonomy assignment ** the following is needed: | 156 More detailed informations in the reference data bases can be found on the DADA2 website and contained links: https://benjjneb.github.io/dada2/training.html. |
| 65 | |
| 66 - a reference fasta data base | |
| 67 - a comma separated list of taxonomic ranks present in the reference data base | |
| 68 | |
| 69 The reference fasta data base for taxonomic assignment (fasta or compressed fasta) needs to encode the taxonomy corresponding to each sequence in the fasta header lines in the following fashion (note, the second sequence is not assigned down to level 6): | |
| 70 | |
| 71 :: | |
| 72 | |
| 73 >Level1;Level2;Level3;Level4;Level5;Level6; | |
| 74 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC | |
| 75 >Level1;Level2;Level3;Level4;Level5; | |
| 76 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC | |
| 77 | |
| 78 The list of required taxonomic ranks could be for instance: "Kingdom,Phylum,Class,Order,Family,Genus" | |
| 79 | |
| 80 The reference data base for ** species assignment ** is a fasta file (or compressed fasta file), with the id line formatted as follows: | |
| 81 | |
| 82 :: | |
| 83 | |
| 84 >ID Genus species | |
| 85 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC | |
| 86 >ID Genus species | |
| 87 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC | |
| 88 ]]></help> | 157 ]]></help> |
| 158 <citations> | |
| 159 <!-- silva --> | |
| 160 <citation type="doi">10.1093/nar/gks1219</citation> | |
| 161 <!-- rdp -->> | |
| 162 <citation type="doi">10.1093/nar/gkt1244</citation> | |
| 163 <!-- greengenes --> | |
| 164 <citation type="doi">10.1128/AEM.03006-05</citation> | |
| 165 <!-- unite --> | |
| 166 <citation type="doi">10.15156/BIO/786343</citation> | |
| 167 <!-- TODO gtdb ??? --> | |
| 168 <!-- hitdb --> | |
| 169 <citation type="doi">10.1186/s12864-015-2265-y</citation> | |
| 170 <!-- PR2 --> | |
| 171 <citation type="doi">10.1093/nar/gks1160</citation> | |
| 172 </citations> | |
| 89 </tool> | 173 </tool> |
| 90 | 174 |
