comparison blast.py @ 15:310ec0f47485 draft

planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
author peterjc
date Thu, 09 Feb 2017 11:16:00 -0500
parents 623a3fbe5340
children 3eada762af11
comparison
equal deleted inserted replaced
14:623a3fbe5340 15:310ec0f47485
5 5
6 import logging 6 import logging
7 import os 7 import os
8 from time import sleep 8 from time import sleep
9 9
10 from galaxy.datatypes.data import get_file_peek 10 from galaxy.datatypes.data import Data, Text, get_file_peek
11 from galaxy.datatypes.data import Data, Text
12 from galaxy.datatypes.xml import GenericXml 11 from galaxy.datatypes.xml import GenericXml
13 12
14 log = logging.getLogger(__name__) 13 log = logging.getLogger(__name__)
14
15 # Note implicit string concatenation here to avoid excessively long lines:
16 _DOCTYPES = ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
17 '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
18 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
19 '"NCBI_BlastOutput.dtd">']
15 20
16 21
17 class BlastXml(GenericXml): 22 class BlastXml(GenericXml):
18 """NCBI Blast XML Output data""" 23 """NCBI Blast XML Output data"""
19 file_ext = "blastxml" 24 file_ext = "blastxml"
46 line = handle.readline() 51 line = handle.readline()
47 if line.strip() != '<?xml version="1.0"?>': 52 if line.strip() != '<?xml version="1.0"?>':
48 handle.close() 53 handle.close()
49 return False 54 return False
50 line = handle.readline() 55 line = handle.readline()
51 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', 56 if line.strip() not in _DOCTYPES:
52 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
53 handle.close() 57 handle.close()
54 return False 58 return False
55 line = handle.readline() 59 line = handle.readline()
56 if line.strip() != '<BlastOutput>': 60 if line.strip() != '<BlastOutput>':
57 handle.close() 61 handle.close()
94 out.close() 98 out.close()
95 h.close() 99 h.close()
96 raise ValueError("%s is not an XML file!" % f) 100 raise ValueError("%s is not an XML file!" % f)
97 line = h.readline() 101 line = h.readline()
98 header += line 102 header += line
99 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', 103 if line.strip() not in _DOCTYPES:
100 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
101 out.write(header) # for diagnosis 104 out.write(header) # for diagnosis
102 out.close() 105 out.close()
103 h.close() 106 h.close()
104 raise ValueError("%s is not a BLAST XML file!" % f) 107 raise ValueError("%s is not a BLAST XML file!" % f)
105 while True: 108 while True:
218 def __init__(self, **kwd): 221 def __init__(self, **kwd):
219 Data.__init__(self, **kwd) 222 Data.__init__(self, **kwd)
220 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers 223 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
221 self.add_composite_file('blastdb.nin', is_binary=True) # index file 224 self.add_composite_file('blastdb.nin', is_binary=True) # index file
222 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences 225 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
223 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb) 226
224 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb) 227 # alias ( -gi_mask option of makeblastdb)
225 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb) 228 self.add_composite_file('blastdb.nal', is_binary=False, optional=True)
226 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) 229
227 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) 230 # sorted sequence hash values ( -hash_index option of makeblastdb)
228 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) 231 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True)
229 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) 232
230 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) 233 # index of sequence hash values ( -hash_index option of makeblastdb)
231 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex 234 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True)
232 # The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc. 235
233 self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex) 236 # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
234 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data 237 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True)
235 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column 238
236 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column 239 # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
237 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. 240 self.add_composite_file('blastdb.nni', is_binary=True, optional=True)
241
242 # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
243 self.add_composite_file('blastdb.nog', is_binary=True, optional=True)
244
245 # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
246 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True)
247
248 # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
249 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True)
250
251 # first volume of the MegaBLAST index generated by makembindex
252 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True)
253 # The previous line should be repeated for each index volume, with filename
254 # extensions like '.01.idx', '.02.idx', etc.
255
256 # MegaBLAST index superheader (-old_style_index false option of makembindex)
257 # self.add_composite_file('blastdb.shd', is_binary=True, optional=True)
258
259 # index of a WriteDB column for e.g. mask data
260 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True)
261
262 # data of a WriteDB column
263 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True)
264
265 # multiple byte order for a WriteDB column
266 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True)
267
268 # The previous 3 lines should be repeated for each WriteDB column, with filename
269 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
238 270
239 271
240 class BlastProtDb(_BlastDb, Data): 272 class BlastProtDb(_BlastDb, Data):
241 """Class for protein BLAST database files.""" 273 """Class for protein BLAST database files."""
242 file_ext = 'blastdbp' 274 file_ext = 'blastdbp'
243 allow_datatype_change = False 275 allow_datatype_change = False
244 composite_type = 'basic' 276 composite_type = 'basic'
245 277
246 def __init__(self, **kwd): 278 def __init__(self, **kwd):
247 Data.__init__(self, **kwd) 279 Data.__init__(self, **kwd)
248 # Component file comments are as in BlastNucDb except where noted 280 # Component file comments are as in BlastNucDb except where noted
249 self.add_composite_file('blastdb.phr', is_binary=True) 281 self.add_composite_file('blastdb.phr', is_binary=True)
250 self.add_composite_file('blastdb.pin', is_binary=True) 282 self.add_composite_file('blastdb.pin', is_binary=True)
251 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences 283 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
252 self.add_composite_file('blastdb.phd', is_binary=True, optional=True) 284 self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
253 self.add_composite_file('blastdb.phi', is_binary=True, optional=True) 285 self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
254 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True) 286 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
255 self.add_composite_file('blastdb.pni', is_binary=True, optional=True) 287 self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
256 self.add_composite_file('blastdb.pog', is_binary=True, optional=True) 288 self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
257 self.add_composite_file('blastdb.psd', is_binary=True, optional=True) 289 self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
258 self.add_composite_file('blastdb.psi', is_binary=True, optional=True) 290 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
259 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) 291 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
260 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) 292 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
261 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) 293 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
262 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. 294 # The last 3 lines should be repeated for each WriteDB column, with filename
295 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
263 296
264 297
265 class BlastDomainDb(_BlastDb, Data): 298 class BlastDomainDb(_BlastDb, Data):
266 """Class for domain BLAST database files.""" 299 """Class for domain BLAST database files."""
267 file_ext = 'blastdbd' 300 file_ext = 'blastdbd'