Mercurial > repos > devteam > blast_datatypes
comparison blast.py @ 15:310ec0f47485 draft
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
| author | peterjc |
|---|---|
| date | Thu, 09 Feb 2017 11:16:00 -0500 |
| parents | 623a3fbe5340 |
| children | 3eada762af11 |
comparison
equal
deleted
inserted
replaced
| 14:623a3fbe5340 | 15:310ec0f47485 |
|---|---|
| 5 | 5 |
| 6 import logging | 6 import logging |
| 7 import os | 7 import os |
| 8 from time import sleep | 8 from time import sleep |
| 9 | 9 |
| 10 from galaxy.datatypes.data import get_file_peek | 10 from galaxy.datatypes.data import Data, Text, get_file_peek |
| 11 from galaxy.datatypes.data import Data, Text | |
| 12 from galaxy.datatypes.xml import GenericXml | 11 from galaxy.datatypes.xml import GenericXml |
| 13 | 12 |
| 14 log = logging.getLogger(__name__) | 13 log = logging.getLogger(__name__) |
| 14 | |
| 15 # Note implicit string concatenation here to avoid excessively long lines: | |
| 16 _DOCTYPES = ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' | |
| 17 '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', | |
| 18 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" ' | |
| 19 '"NCBI_BlastOutput.dtd">'] | |
| 15 | 20 |
| 16 | 21 |
| 17 class BlastXml(GenericXml): | 22 class BlastXml(GenericXml): |
| 18 """NCBI Blast XML Output data""" | 23 """NCBI Blast XML Output data""" |
| 19 file_ext = "blastxml" | 24 file_ext = "blastxml" |
| 46 line = handle.readline() | 51 line = handle.readline() |
| 47 if line.strip() != '<?xml version="1.0"?>': | 52 if line.strip() != '<?xml version="1.0"?>': |
| 48 handle.close() | 53 handle.close() |
| 49 return False | 54 return False |
| 50 line = handle.readline() | 55 line = handle.readline() |
| 51 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', | 56 if line.strip() not in _DOCTYPES: |
| 52 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']: | |
| 53 handle.close() | 57 handle.close() |
| 54 return False | 58 return False |
| 55 line = handle.readline() | 59 line = handle.readline() |
| 56 if line.strip() != '<BlastOutput>': | 60 if line.strip() != '<BlastOutput>': |
| 57 handle.close() | 61 handle.close() |
| 94 out.close() | 98 out.close() |
| 95 h.close() | 99 h.close() |
| 96 raise ValueError("%s is not an XML file!" % f) | 100 raise ValueError("%s is not an XML file!" % f) |
| 97 line = h.readline() | 101 line = h.readline() |
| 98 header += line | 102 header += line |
| 99 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', | 103 if line.strip() not in _DOCTYPES: |
| 100 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']: | |
| 101 out.write(header) # for diagnosis | 104 out.write(header) # for diagnosis |
| 102 out.close() | 105 out.close() |
| 103 h.close() | 106 h.close() |
| 104 raise ValueError("%s is not a BLAST XML file!" % f) | 107 raise ValueError("%s is not a BLAST XML file!" % f) |
| 105 while True: | 108 while True: |
| 218 def __init__(self, **kwd): | 221 def __init__(self, **kwd): |
| 219 Data.__init__(self, **kwd) | 222 Data.__init__(self, **kwd) |
| 220 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers | 223 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers |
| 221 self.add_composite_file('blastdb.nin', is_binary=True) # index file | 224 self.add_composite_file('blastdb.nin', is_binary=True) # index file |
| 222 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences | 225 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences |
| 223 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb) | 226 |
| 224 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb) | 227 # alias ( -gi_mask option of makeblastdb) |
| 225 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb) | 228 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) |
| 226 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) | 229 |
| 227 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) | 230 # sorted sequence hash values ( -hash_index option of makeblastdb) |
| 228 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) | 231 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) |
| 229 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) | 232 |
| 230 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) | 233 # index of sequence hash values ( -hash_index option of makeblastdb) |
| 231 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex | 234 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) |
| 232 # The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc. | 235 |
| 233 self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex) | 236 # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) |
| 234 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data | 237 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) |
| 235 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column | 238 |
| 236 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column | 239 # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) |
| 237 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. | 240 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) |
| 241 | |
| 242 # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) | |
| 243 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) | |
| 244 | |
| 245 # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) | |
| 246 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) | |
| 247 | |
| 248 # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) | |
| 249 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) | |
| 250 | |
| 251 # first volume of the MegaBLAST index generated by makembindex | |
| 252 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) | |
| 253 # The previous line should be repeated for each index volume, with filename | |
| 254 # extensions like '.01.idx', '.02.idx', etc. | |
| 255 | |
| 256 # MegaBLAST index superheader (-old_style_index false option of makembindex) | |
| 257 # self.add_composite_file('blastdb.shd', is_binary=True, optional=True) | |
| 258 | |
| 259 # index of a WriteDB column for e.g. mask data | |
| 260 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) | |
| 261 | |
| 262 # data of a WriteDB column | |
| 263 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) | |
| 264 | |
| 265 # multiple byte order for a WriteDB column | |
| 266 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) | |
| 267 | |
| 268 # The previous 3 lines should be repeated for each WriteDB column, with filename | |
| 269 # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. | |
| 238 | 270 |
| 239 | 271 |
| 240 class BlastProtDb(_BlastDb, Data): | 272 class BlastProtDb(_BlastDb, Data): |
| 241 """Class for protein BLAST database files.""" | 273 """Class for protein BLAST database files.""" |
| 242 file_ext = 'blastdbp' | 274 file_ext = 'blastdbp' |
| 243 allow_datatype_change = False | 275 allow_datatype_change = False |
| 244 composite_type = 'basic' | 276 composite_type = 'basic' |
| 245 | 277 |
| 246 def __init__(self, **kwd): | 278 def __init__(self, **kwd): |
| 247 Data.__init__(self, **kwd) | 279 Data.__init__(self, **kwd) |
| 248 # Component file comments are as in BlastNucDb except where noted | 280 # Component file comments are as in BlastNucDb except where noted |
| 249 self.add_composite_file('blastdb.phr', is_binary=True) | 281 self.add_composite_file('blastdb.phr', is_binary=True) |
| 250 self.add_composite_file('blastdb.pin', is_binary=True) | 282 self.add_composite_file('blastdb.pin', is_binary=True) |
| 251 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences | 283 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences |
| 252 self.add_composite_file('blastdb.phd', is_binary=True, optional=True) | 284 self.add_composite_file('blastdb.phd', is_binary=True, optional=True) |
| 253 self.add_composite_file('blastdb.phi', is_binary=True, optional=True) | 285 self.add_composite_file('blastdb.phi', is_binary=True, optional=True) |
| 254 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True) | 286 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True) |
| 255 self.add_composite_file('blastdb.pni', is_binary=True, optional=True) | 287 self.add_composite_file('blastdb.pni', is_binary=True, optional=True) |
| 256 self.add_composite_file('blastdb.pog', is_binary=True, optional=True) | 288 self.add_composite_file('blastdb.pog', is_binary=True, optional=True) |
| 257 self.add_composite_file('blastdb.psd', is_binary=True, optional=True) | 289 self.add_composite_file('blastdb.psd', is_binary=True, optional=True) |
| 258 self.add_composite_file('blastdb.psi', is_binary=True, optional=True) | 290 self.add_composite_file('blastdb.psi', is_binary=True, optional=True) |
| 259 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) | 291 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) |
| 260 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) | 292 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) |
| 261 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) | 293 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) |
| 262 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. | 294 # The last 3 lines should be repeated for each WriteDB column, with filename |
| 295 # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. | |
| 263 | 296 |
| 264 | 297 |
| 265 class BlastDomainDb(_BlastDb, Data): | 298 class BlastDomainDb(_BlastDb, Data): |
| 266 """Class for domain BLAST database files.""" | 299 """Class for domain BLAST database files.""" |
| 267 file_ext = 'blastdbd' | 300 file_ext = 'blastdbd' |
