Mercurial > repos > devteam > blast_datatypes
comparison blast.py @ 17:3eada762af11 draft
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit ed6325d44993c65dd9fbab02902ede0a9c0eeb80-dirty
| author | peterjc |
|---|---|
| date | Tue, 23 Oct 2018 06:24:33 -0400 |
| parents | 310ec0f47485 |
| children | 1250aab8b97a |
comparison
equal
deleted
inserted
replaced
| 16:63befb860c3e | 17:3eada762af11 |
|---|---|
| 18 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' | 18 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' |
| 19 '"NCBI_BlastOutput.dtd">'] | 19 '"NCBI_BlastOutput.dtd">'] |
| 20 | 20 |
| 21 | 21 |
| 22 class BlastXml(GenericXml): | 22 class BlastXml(GenericXml): |
| 23 """NCBI Blast XML Output data""" | 23 """NCBI Blast XML Output data.""" |
| 24 | |
| 24 file_ext = "blastxml" | 25 file_ext = "blastxml" |
| 25 | 26 |
| 26 def set_peek(self, dataset, is_multi_byte=False): | 27 def set_peek(self, dataset, is_multi_byte=False): |
| 27 """Set the peek and blurb text""" | 28 """Set the peek and blurb text.""" |
| 28 if not dataset.dataset.purged: | 29 if not dataset.dataset.purged: |
| 29 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) | 30 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
| 30 dataset.blurb = 'NCBI Blast XML data' | 31 dataset.blurb = 'NCBI Blast XML data' |
| 31 else: | 32 else: |
| 32 dataset.peek = 'file does not exist' | 33 dataset.peek = 'file does not exist' |
| 33 dataset.blurb = 'file purged from disk' | 34 dataset.blurb = 'file purged from disk' |
| 34 | 35 |
| 35 def sniff(self, filename): | 36 def sniff(self, filename): |
| 36 """Determines whether the file is blastxml | 37 """Determine from the contents if the file is blastxml. |
| 37 | 38 |
| 38 >>> from galaxy.datatypes.sniff import get_test_fname | 39 >>> from galaxy.datatypes.sniff import get_test_fname |
| 39 >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') | 40 >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') |
| 40 >>> BlastXml().sniff(fname) | 41 >>> BlastXml().sniff(fname) |
| 41 True | 42 True |
| 62 return False | 63 return False |
| 63 handle.close() | 64 handle.close() |
| 64 return True | 65 return True |
| 65 | 66 |
| 66 def merge(split_files, output_file): | 67 def merge(split_files, output_file): |
| 67 """Merging multiple XML files is non-trivial and must be done in subclasses.""" | 68 """Merge muliple BLAST XML files into one. |
| 69 | |
| 70 Merging multiple XML files is non-trivial and must be done in | |
| 71 subclasses, rather than the generic parent base XML class. | |
| 72 """ | |
| 68 if len(split_files) == 1: | 73 if len(split_files) == 1: |
| 69 # For one file only, use base class method (move/copy) | 74 # For one file only, use base class method (move/copy) |
| 70 return Text.merge(split_files, output_file) | 75 return Text.merge(split_files, output_file) |
| 71 if not split_files: | 76 if not split_files: |
| 72 raise ValueError("Given no BLAST XML files, %r, to merge into %s" | 77 raise ValueError("Given no BLAST XML files, %r, to merge into %s" |
| 169 except Exception: | 174 except Exception: |
| 170 return "BLAST database (multiple files)" | 175 return "BLAST database (multiple files)" |
| 171 | 176 |
| 172 def display_data(self, trans, data, preview=False, filename=None, | 177 def display_data(self, trans, data, preview=False, filename=None, |
| 173 to_ext=None, size=None, offset=None, **kwd): | 178 to_ext=None, size=None, offset=None, **kwd): |
| 174 """Documented as an old display method, but still gets called via tests etc | 179 """Documented as an old display method, but still gets called via tests etc. |
| 175 | 180 |
| 176 This allows us to format the data shown in the central pane via the "eye" icon. | 181 This allows us to format the data shown in the central pane via the "eye" icon. |
| 177 """ | 182 """ |
| 178 if filename is not None and filename != "index": | 183 if filename is not None and filename != "index": |
| 179 # Change nothing - important for the unit tests to access child files: | 184 # Change nothing - important for the unit tests to access child files: |
| 212 raise NotImplementedError("Can't split BLAST databases") | 217 raise NotImplementedError("Can't split BLAST databases") |
| 213 | 218 |
| 214 | 219 |
| 215 class BlastNucDb(_BlastDb, Data): | 220 class BlastNucDb(_BlastDb, Data): |
| 216 """Class for nucleotide BLAST database files.""" | 221 """Class for nucleotide BLAST database files.""" |
| 222 | |
| 217 file_ext = 'blastdbn' | 223 file_ext = 'blastdbn' |
| 218 allow_datatype_change = False | 224 allow_datatype_change = False |
| 219 composite_type = 'basic' | 225 composite_type = 'basic' |
| 220 | 226 |
| 221 def __init__(self, **kwd): | 227 def __init__(self, **kwd): |
| 228 """Initialize the class.""" | |
| 222 Data.__init__(self, **kwd) | 229 Data.__init__(self, **kwd) |
| 223 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers | 230 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers |
| 224 self.add_composite_file('blastdb.nin', is_binary=True) # index file | 231 self.add_composite_file('blastdb.nin', is_binary=True) # index file |
| 225 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences | 232 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences |
| 226 | 233 |
| 269 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. | 276 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. |
| 270 | 277 |
| 271 | 278 |
| 272 class BlastProtDb(_BlastDb, Data): | 279 class BlastProtDb(_BlastDb, Data): |
| 273 """Class for protein BLAST database files.""" | 280 """Class for protein BLAST database files.""" |
| 281 | |
| 274 file_ext = 'blastdbp' | 282 file_ext = 'blastdbp' |
| 275 allow_datatype_change = False | 283 allow_datatype_change = False |
| 276 composite_type = 'basic' | 284 composite_type = 'basic' |
| 277 | 285 |
| 278 def __init__(self, **kwd): | 286 def __init__(self, **kwd): |
| 287 """Initialize the class.""" | |
| 279 Data.__init__(self, **kwd) | 288 Data.__init__(self, **kwd) |
| 280 # Component file comments are as in BlastNucDb except where noted | 289 # Component file comments are as in BlastNucDb except where noted |
| 281 self.add_composite_file('blastdb.phr', is_binary=True) | 290 self.add_composite_file('blastdb.phr', is_binary=True) |
| 282 self.add_composite_file('blastdb.pin', is_binary=True) | 291 self.add_composite_file('blastdb.pin', is_binary=True) |
| 283 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences | 292 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences |
| 295 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. | 304 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. |
| 296 | 305 |
| 297 | 306 |
| 298 class BlastDomainDb(_BlastDb, Data): | 307 class BlastDomainDb(_BlastDb, Data): |
| 299 """Class for domain BLAST database files.""" | 308 """Class for domain BLAST database files.""" |
| 309 | |
| 300 file_ext = 'blastdbd' | 310 file_ext = 'blastdbd' |
| 301 allow_datatype_change = False | 311 allow_datatype_change = False |
| 302 composite_type = 'basic' | 312 composite_type = 'basic' |
| 303 | 313 |
| 304 def __init__(self, **kwd): | 314 def __init__(self, **kwd): |
| 315 """Initialize the class.""" | |
| 305 Data.__init__(self, **kwd) | 316 Data.__init__(self, **kwd) |
| 306 self.add_composite_file('blastdb.phr', is_binary=True) | 317 self.add_composite_file('blastdb.phr', is_binary=True) |
| 307 self.add_composite_file('blastdb.pin', is_binary=True) | 318 self.add_composite_file('blastdb.pin', is_binary=True) |
| 308 self.add_composite_file('blastdb.psq', is_binary=True) | 319 self.add_composite_file('blastdb.psq', is_binary=True) |
| 309 self.add_composite_file('blastdb.freq', is_binary=True, optional=True) | 320 self.add_composite_file('blastdb.freq', is_binary=True, optional=True) |
