Mercurial > repos > devteam > blast_datatypes
annotate blast.py @ 19:db2480de852e draft default tip
"planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit f726ee4fbfef077d2ffe7e1a67eee254babb7841-dirty"
| author | peterjc |
|---|---|
| date | Fri, 21 Aug 2020 12:39:02 +0000 |
| parents | 1250aab8b97a |
| children |
| rev | line source |
|---|---|
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
1 """NCBI BLAST datatypes. |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
2 |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
3 Covers the ``blastxml`` format and the BLAST databases. |
| 3 | 4 """ |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
5 |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
6 import logging |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
7 import os |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
8 from time import sleep |
| 3 | 9 |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
10 from galaxy.datatypes.data import Data, Text, get_file_peek |
| 3 | 11 from galaxy.datatypes.xml import GenericXml |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
12 |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
13 log = logging.getLogger(__name__) |
|
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
14 |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
15 # Note implicit string concatenation here to avoid excessively long lines: |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
16 _DOCTYPES = [ |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
17 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
18 '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
19 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
20 '"NCBI_BlastOutput.dtd">', |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
21 ] |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
22 |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
23 |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
24 class BlastXml(GenericXml): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
25 """NCBI Blast XML Output data.""" |
|
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
26 |
| 3 | 27 file_ext = "blastxml" |
| 28 | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
29 def set_peek(self, dataset, is_multi_byte=False): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
30 """Set the peek and blurb text.""" |
| 3 | 31 if not dataset.dataset.purged: |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
32 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
33 dataset.blurb = "NCBI Blast XML data" |
| 3 | 34 else: |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
35 dataset.peek = "file does not exist" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
36 dataset.blurb = "file purged from disk" |
| 4 | 37 |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
38 def sniff(self, filename): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
39 """Determine from the contents if the file is blastxml. |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
40 |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
41 >>> from galaxy.datatypes.sniff import get_test_fname |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
42 >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
43 >>> BlastXml().sniff(fname) |
| 3 | 44 True |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
45 >>> fname = get_test_fname('tblastn_four_human_vs_rhodopsin.xml') |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
46 >>> BlastXml().sniff(fname) |
| 3 | 47 True |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
48 >>> fname = get_test_fname('interval.interval') |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
49 >>> BlastXml().sniff(fname) |
| 3 | 50 False |
| 51 """ | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
52 # TODO - Use a context manager on Python 2.5+ to close handle |
| 3 | 53 handle = open(filename) |
| 54 line = handle.readline() | |
| 55 if line.strip() != '<?xml version="1.0"?>': | |
| 56 handle.close() | |
| 57 return False | |
| 58 line = handle.readline() | |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
59 if line.strip() not in _DOCTYPES: |
| 3 | 60 handle.close() |
| 61 return False | |
| 62 line = handle.readline() | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
63 if line.strip() != "<BlastOutput>": |
| 3 | 64 handle.close() |
| 65 return False | |
| 66 handle.close() | |
| 67 return True | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
68 |
| 3 | 69 def merge(split_files, output_file): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
70 """Merge muliple BLAST XML files into one. |
|
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
71 |
|
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
72 Merging multiple XML files is non-trivial and must be done in |
|
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
73 subclasses, rather than the generic parent base XML class. |
|
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
74 """ |
| 3 | 75 if len(split_files) == 1: |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
76 # For one file only, use base class method (move/copy) |
| 3 | 77 return Text.merge(split_files, output_file) |
| 4 | 78 if not split_files: |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
79 raise ValueError( |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
80 "Given no BLAST XML files, %r, to merge into %s" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
81 % (split_files, output_file) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
82 ) |
| 3 | 83 out = open(output_file, "w") |
| 84 h = None | |
| 85 for f in split_files: | |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
86 if not os.path.isfile(f): |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
87 log.warning("BLAST XML file %s missing, retry in 1s..." % f) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
88 sleep(1) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
89 if not os.path.isfile(f): |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
90 log.error("BLAST XML file %s missing" % f) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
91 raise ValueError("BLAST XML file %s missing" % f) |
| 3 | 92 h = open(f) |
| 93 header = h.readline() | |
| 94 if not header: | |
| 95 out.close() | |
| 96 h.close() | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
97 # Retry, could be transient error with networked file system... |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
98 log.warning("BLAST XML file %s empty, retry in 1s..." % f) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
99 sleep(1) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
100 h = open(f) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
101 header = h.readline() |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
102 if not header: |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
103 log.error("BLAST XML file %s was empty" % f) |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
104 raise ValueError("BLAST XML file %s was empty" % f) |
| 3 | 105 if header.strip() != '<?xml version="1.0"?>': |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
106 out.write(header) # for diagnosis |
| 3 | 107 out.close() |
| 108 h.close() | |
| 109 raise ValueError("%s is not an XML file!" % f) | |
| 110 line = h.readline() | |
| 111 header += line | |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
112 if line.strip() not in _DOCTYPES: |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
113 out.write(header) # for diagnosis |
| 3 | 114 out.close() |
| 115 h.close() | |
| 116 raise ValueError("%s is not a BLAST XML file!" % f) | |
| 117 while True: | |
| 118 line = h.readline() | |
| 119 if not line: | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
120 out.write(header) # for diagnosis |
| 3 | 121 out.close() |
| 122 h.close() | |
| 123 raise ValueError("BLAST XML file %s ended prematurely" % f) | |
| 124 header += line | |
| 125 if "<Iteration>" in line: | |
| 126 break | |
| 127 if len(header) > 10000: | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
128 # Something has gone wrong, don't load too much into memory! |
|
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
129 # Write what we have to the merged file for diagnostics |
| 3 | 130 out.write(header) |
| 131 out.close() | |
| 132 h.close() | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
133 raise ValueError("The header in BLAST XML file %s is too long" % f) |
| 3 | 134 if "<BlastOutput>" not in header: |
| 135 out.close() | |
| 136 h.close() | |
| 137 raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header)) | |
| 138 if f == split_files[0]: | |
| 139 out.write(header) | |
| 140 old_header = header | |
| 141 elif old_header[:300] != header[:300]: | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
142 # Enough to check <BlastOutput_program> and <BlastOutput_version> match |
| 3 | 143 out.close() |
| 144 h.close() | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
145 raise ValueError( |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
146 "BLAST XML headers don't match for %s and %s - have:\n" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
147 "%s\n...\n\nAnd:\n%s\n...\n" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
148 % (split_files[0], f, old_header[:300], header[:300]) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
149 ) |
| 3 | 150 else: |
| 151 out.write(" <Iteration>\n") | |
| 152 for line in h: | |
| 153 if "</BlastOutput_iterations>" in line: | |
| 154 break | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
155 # TODO - Increment <Iteration_iter-num> and if required automatic query |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
156 # names like <Iteration_query-ID>Query_3</Iteration_query-ID> to be |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
157 # increasing? |
| 3 | 158 out.write(line) |
| 159 h.close() | |
| 160 out.write(" </BlastOutput_iterations>\n") | |
| 161 out.write("</BlastOutput>\n") | |
| 162 out.close() | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
163 |
| 3 | 164 merge = staticmethod(merge) |
| 165 | |
| 4 | 166 |
| 167 class _BlastDb(object): | |
| 168 """Base class for BLAST database datatype.""" | |
| 169 | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
170 def set_peek(self, dataset, is_multi_byte=False): |
| 4 | 171 """Set the peek and blurb text.""" |
| 172 if not dataset.dataset.purged: | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
173 dataset.peek = "BLAST database (multiple files)" |
| 4 | 174 dataset.blurb = "BLAST database (multiple files)" |
| 175 else: | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
176 dataset.peek = "file does not exist" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
177 dataset.blurb = "file purged from disk" |
| 4 | 178 |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
179 def display_peek(self, dataset): |
| 4 | 180 """Create HTML content, used for displaying peek.""" |
| 181 try: | |
| 182 return dataset.peek | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
183 except Exception: |
| 4 | 184 return "BLAST database (multiple files)" |
| 185 | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
186 def display_data( |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
187 self, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
188 trans, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
189 data, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
190 preview=False, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
191 filename=None, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
192 to_ext=None, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
193 size=None, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
194 offset=None, |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
195 **kwd |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
196 ): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
197 """Documented as an old display method, but still gets called via tests etc. |
| 4 | 198 |
| 199 This allows us to format the data shown in the central pane via the "eye" icon. | |
| 200 """ | |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
201 if filename is not None and filename != "index": |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
202 # Change nothing - important for the unit tests to access child files: |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
203 return Data.display_data( |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
204 self, trans, data, preview, filename, to_ext, size, offset, **kwd |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
205 ) |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
206 if self.file_ext == "blastdbn": |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
207 title = "This is a nucleotide BLAST database" |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
208 elif self.file_ext == "blastdbp": |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
209 title = "This is a protein BLAST database" |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
210 elif self.file_ext == "blastdbd": |
| 10 | 211 title = "This is a domain BLAST database" |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
212 else: |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
213 # Error? |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
214 title = "This is a BLAST database." |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
215 msg = "" |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
216 try: |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
217 # Try to use any text recorded in the dummy index file: |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
218 handle = open(data.file_name, "rU") |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
219 msg = handle.read().strip() |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
220 handle.close() |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
221 except Exception: |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
222 pass |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
223 if not msg: |
|
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
224 msg = title |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
225 # Galaxy assumes HTML for the display of composite datatypes, |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
226 return ( |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
227 "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
228 % (title, msg) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
229 ) |
| 4 | 230 |
| 231 def merge(split_files, output_file): | |
| 232 """Merge BLAST databases (not implemented for now).""" | |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
233 raise NotImplementedError( |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
234 "Merging BLAST databases is non-trivial (do this via makeblastdb?)" |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
235 ) |
| 4 | 236 |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
237 def split(cls, input_datasets, subdir_generator_function, split_params): |
| 4 | 238 """Split a BLAST database (not implemented for now).""" |
| 239 if split_params is None: | |
| 240 return None | |
| 241 raise NotImplementedError("Can't split BLAST databases") | |
| 242 | |
| 243 | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
244 class BlastNucDb(_BlastDb, Data): |
| 4 | 245 """Class for nucleotide BLAST database files.""" |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
246 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
247 file_ext = "blastdbn" |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
248 allow_datatype_change = False |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
249 composite_type = "basic" |
| 4 | 250 |
|
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
251 def __init__(self, **kwd): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
252 """Initialize the class.""" |
| 4 | 253 Data.__init__(self, **kwd) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
254 self.add_composite_file("blastdb.nhr", is_binary=True) # sequence headers |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
255 self.add_composite_file("blastdb.nin", is_binary=True) # index file |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
256 self.add_composite_file("blastdb.nsq", is_binary=True) # nucleotide sequences |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
257 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
258 # alias ( -gi_mask option of makeblastdb) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
259 self.add_composite_file("blastdb.nal", is_binary=False, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
260 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
261 # sorted sequence hash values ( -hash_index option of makeblastdb) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
262 self.add_composite_file("blastdb.nhd", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
263 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
264 # index of sequence hash values ( -hash_index option of makeblastdb) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
265 self.add_composite_file("blastdb.nhi", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
266 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
267 # sorted GI values |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
268 # ( -parse_seqids option of makeblastdb and gi present in the description lines) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
269 self.add_composite_file("blastdb.nnd", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
270 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
271 # index of GI values |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
272 # ( -parse_seqids option of makeblastdb and gi present in the description lines) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
273 self.add_composite_file("blastdb.nni", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
274 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
275 # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
276 self.add_composite_file("blastdb.nog", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
277 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
278 # sorted sequence accession values |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
279 # ( -hash_index or -parse_seqids option of makeblastdb) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
280 self.add_composite_file("blastdb.nsd", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
281 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
282 # index of sequence accession values |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
283 # ( -hash_index or -parse_seqids option of makeblastdb) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
284 self.add_composite_file("blastdb.nsi", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
285 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
286 # first volume of the MegaBLAST index generated by makembindex |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
287 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
288 # The previous line should be repeated for each index volume, with filename |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
289 # extensions like '.01.idx', '.02.idx', etc. |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
290 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
291 # MegaBLAST index superheader (-old_style_index false option of makembindex) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
292 # self.add_composite_file('blastdb.shd', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
293 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
294 # index of a WriteDB column for e.g. mask data |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
295 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
296 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
297 # data of a WriteDB column |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
298 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
299 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
300 # multiple byte order for a WriteDB column |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
301 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
302 |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
303 # The previous 3 lines should be repeated for each WriteDB column, with filename |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
304 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. |
| 4 | 305 |
|
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
306 |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
307 class BlastProtDb(_BlastDb, Data): |
| 4 | 308 """Class for protein BLAST database files.""" |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
309 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
310 file_ext = "blastdbp" |
|
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
311 allow_datatype_change = False |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
312 composite_type = "basic" |
| 4 | 313 |
|
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
314 def __init__(self, **kwd): |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
315 """Initialize the class.""" |
| 4 | 316 Data.__init__(self, **kwd) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
317 # Component file comments are as in BlastNucDb except where noted |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
318 self.add_composite_file("blastdb.phr", is_binary=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
319 self.add_composite_file("blastdb.pin", is_binary=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
320 self.add_composite_file("blastdb.psq", is_binary=True) # protein sequences |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
321 self.add_composite_file("blastdb.phd", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
322 self.add_composite_file("blastdb.phi", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
323 self.add_composite_file("blastdb.pnd", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
324 self.add_composite_file("blastdb.pni", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
325 self.add_composite_file("blastdb.pog", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
326 self.add_composite_file("blastdb.psd", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
327 self.add_composite_file("blastdb.psi", is_binary=True, optional=True) |
|
15
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
328 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
329 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
330 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
331 # The last 3 lines should be repeated for each WriteDB column, with filename |
|
310ec0f47485
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
peterjc
parents:
14
diff
changeset
|
332 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. |
| 10 | 333 |
| 334 | |
|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
335 class BlastDomainDb(_BlastDb, Data): |
| 10 | 336 """Class for domain BLAST database files.""" |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
337 |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
338 file_ext = "blastdbd" |
| 10 | 339 allow_datatype_change = False |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
340 composite_type = "basic" |
| 10 | 341 |
| 342 def __init__(self, **kwd): | |
|
17
3eada762af11
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
peterjc
parents:
15
diff
changeset
|
343 """Initialize the class.""" |
| 10 | 344 Data.__init__(self, **kwd) |
|
18
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
345 self.add_composite_file("blastdb.phr", is_binary=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
346 self.add_composite_file("blastdb.pin", is_binary=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
347 self.add_composite_file("blastdb.psq", is_binary=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
348 self.add_composite_file("blastdb.freq", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
349 self.add_composite_file("blastdb.loo", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
350 self.add_composite_file("blastdb.psd", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
351 self.add_composite_file("blastdb.psi", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
352 self.add_composite_file("blastdb.rps", is_binary=True, optional=True) |
|
1250aab8b97a
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
peterjc
parents:
17
diff
changeset
|
353 self.add_composite_file("blastdb.aux", is_binary=True, optional=True) |
