comparison blast.py @ 17:3eada762af11 draft

planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
author peterjc
date Tue, 23 Oct 2018 06:24:33 -0400
parents 310ec0f47485
children 1250aab8b97a
comparison
equal deleted inserted replaced
16:63befb860c3e 17:3eada762af11
18 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' 18 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
19 '"NCBI_BlastOutput.dtd">'] 19 '"NCBI_BlastOutput.dtd">']
20 20
21 21
22 class BlastXml(GenericXml): 22 class BlastXml(GenericXml):
23 """NCBI Blast XML Output data""" 23 """NCBI Blast XML Output data."""
24
24 file_ext = "blastxml" 25 file_ext = "blastxml"
25 26
26 def set_peek(self, dataset, is_multi_byte=False): 27 def set_peek(self, dataset, is_multi_byte=False):
27 """Set the peek and blurb text""" 28 """Set the peek and blurb text."""
28 if not dataset.dataset.purged: 29 if not dataset.dataset.purged:
29 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) 30 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
30 dataset.blurb = 'NCBI Blast XML data' 31 dataset.blurb = 'NCBI Blast XML data'
31 else: 32 else:
32 dataset.peek = 'file does not exist' 33 dataset.peek = 'file does not exist'
33 dataset.blurb = 'file purged from disk' 34 dataset.blurb = 'file purged from disk'
34 35
35 def sniff(self, filename): 36 def sniff(self, filename):
36 """Determines whether the file is blastxml 37 """Determine from the contents if the file is blastxml.
37 38
38 >>> from galaxy.datatypes.sniff import get_test_fname 39 >>> from galaxy.datatypes.sniff import get_test_fname
39 >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') 40 >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml')
40 >>> BlastXml().sniff(fname) 41 >>> BlastXml().sniff(fname)
41 True 42 True
62 return False 63 return False
63 handle.close() 64 handle.close()
64 return True 65 return True
65 66
66 def merge(split_files, output_file): 67 def merge(split_files, output_file):
67 """Merging multiple XML files is non-trivial and must be done in subclasses.""" 68 """Merge muliple BLAST XML files into one.
69
70 Merging multiple XML files is non-trivial and must be done in
71 subclasses, rather than the generic parent base XML class.
72 """
68 if len(split_files) == 1: 73 if len(split_files) == 1:
69 # For one file only, use base class method (move/copy) 74 # For one file only, use base class method (move/copy)
70 return Text.merge(split_files, output_file) 75 return Text.merge(split_files, output_file)
71 if not split_files: 76 if not split_files:
72 raise ValueError("Given no BLAST XML files, %r, to merge into %s" 77 raise ValueError("Given no BLAST XML files, %r, to merge into %s"
169 except Exception: 174 except Exception:
170 return "BLAST database (multiple files)" 175 return "BLAST database (multiple files)"
171 176
172 def display_data(self, trans, data, preview=False, filename=None, 177 def display_data(self, trans, data, preview=False, filename=None,
173 to_ext=None, size=None, offset=None, **kwd): 178 to_ext=None, size=None, offset=None, **kwd):
174 """Documented as an old display method, but still gets called via tests etc 179 """Documented as an old display method, but still gets called via tests etc.
175 180
176 This allows us to format the data shown in the central pane via the "eye" icon. 181 This allows us to format the data shown in the central pane via the "eye" icon.
177 """ 182 """
178 if filename is not None and filename != "index": 183 if filename is not None and filename != "index":
179 # Change nothing - important for the unit tests to access child files: 184 # Change nothing - important for the unit tests to access child files:
212 raise NotImplementedError("Can't split BLAST databases") 217 raise NotImplementedError("Can't split BLAST databases")
213 218
214 219
215 class BlastNucDb(_BlastDb, Data): 220 class BlastNucDb(_BlastDb, Data):
216 """Class for nucleotide BLAST database files.""" 221 """Class for nucleotide BLAST database files."""
222
217 file_ext = 'blastdbn' 223 file_ext = 'blastdbn'
218 allow_datatype_change = False 224 allow_datatype_change = False
219 composite_type = 'basic' 225 composite_type = 'basic'
220 226
221 def __init__(self, **kwd): 227 def __init__(self, **kwd):
228 """Initialize the class."""
222 Data.__init__(self, **kwd) 229 Data.__init__(self, **kwd)
223 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers 230 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
224 self.add_composite_file('blastdb.nin', is_binary=True) # index file 231 self.add_composite_file('blastdb.nin', is_binary=True) # index file
225 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences 232 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences
226 233
269 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. 276 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
270 277
271 278
272 class BlastProtDb(_BlastDb, Data): 279 class BlastProtDb(_BlastDb, Data):
273 """Class for protein BLAST database files.""" 280 """Class for protein BLAST database files."""
281
274 file_ext = 'blastdbp' 282 file_ext = 'blastdbp'
275 allow_datatype_change = False 283 allow_datatype_change = False
276 composite_type = 'basic' 284 composite_type = 'basic'
277 285
278 def __init__(self, **kwd): 286 def __init__(self, **kwd):
287 """Initialize the class."""
279 Data.__init__(self, **kwd) 288 Data.__init__(self, **kwd)
280 # Component file comments are as in BlastNucDb except where noted 289 # Component file comments are as in BlastNucDb except where noted
281 self.add_composite_file('blastdb.phr', is_binary=True) 290 self.add_composite_file('blastdb.phr', is_binary=True)
282 self.add_composite_file('blastdb.pin', is_binary=True) 291 self.add_composite_file('blastdb.pin', is_binary=True)
283 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences 292 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences
295 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. 304 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
296 305
297 306
298 class BlastDomainDb(_BlastDb, Data): 307 class BlastDomainDb(_BlastDb, Data):
299 """Class for domain BLAST database files.""" 308 """Class for domain BLAST database files."""
309
300 file_ext = 'blastdbd' 310 file_ext = 'blastdbd'
301 allow_datatype_change = False 311 allow_datatype_change = False
302 composite_type = 'basic' 312 composite_type = 'basic'
303 313
304 def __init__(self, **kwd): 314 def __init__(self, **kwd):
315 """Initialize the class."""
305 Data.__init__(self, **kwd) 316 Data.__init__(self, **kwd)
306 self.add_composite_file('blastdb.phr', is_binary=True) 317 self.add_composite_file('blastdb.phr', is_binary=True)
307 self.add_composite_file('blastdb.pin', is_binary=True) 318 self.add_composite_file('blastdb.pin', is_binary=True)
308 self.add_composite_file('blastdb.psq', is_binary=True) 319 self.add_composite_file('blastdb.psq', is_binary=True)
309 self.add_composite_file('blastdb.freq', is_binary=True, optional=True) 320 self.add_composite_file('blastdb.freq', is_binary=True, optional=True)