annotate test-data/reproduce_test_dataset.sh @ 10:92f9975f08e2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
author dfornika
date Thu, 04 Nov 2021 21:33:16 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
1 #!/bin/bash
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
2
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
3 # This script produces a small kraken2 database containing only a ~1kb portion each of a salmonella and ecoli genome
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
4 # It requires kraken2, and entrez-direct (available on bioconda)
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
5 kraken2-build --db test_db --download_taxonomy
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
6 mv test_db/taxonomy/nucl_gb.accession2taxid test_db/taxonomy/nucl_gb.accession2taxid_full
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
7 grep -e 'NC_003198.1' -e 'NC_011750.1' test_db/taxonomy/nucl_gb.accession2taxid_full > test_db/taxonomy/nucl_gb.accession2taxid
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
8 mv test_db/taxonomy/nodes.dmp test_db/taxonomy/nodes.dmp_full
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
9 grep -f node_patterns.txt test_db/taxonomy/nodes.dmp_full > test_db/taxonomy/nodes.dmp
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
10 mv test_db/taxonomy/names.dmp test_db/taxonomy/names.dmp_full
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
11 grep -e '^220341\s' -e '^585057\s' test_db/taxonomy/names.dmp_full > test_db/taxonomy/names.dmp
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
12 esearch -db nucleotide -query "NC_003198.1" | efetch -format fasta > NC_003198.1.fasta
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
13 esearch -db nucleotide -query "NC_011750.1" | efetch -format fasta > NC_011750.1.fasta
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
14 head -n 14 NC_003198.1.fasta > NC_003198.1_1kb.fasta
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
15 head -n 14 NC_011750.1.fasta > NC_011750.1_1kb.fasta
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
16 kraken2-build --db test_db --add-to-library NC_003198.1_1kb.fasta
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
17 kraken2-build --db test_db --add-to-library NC_011750.1_1kb.fasta
92f9975f08e2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit 57b36cfbdd1663aef43d03b76e37364cc5bdeef3-dirty"
dfornika
parents:
diff changeset
18 kraken2-build --db test_db --build