Mercurial > repos > devteam > blast_datatypes
comparison blast.py @ 4:f9a7783ed7b6
Uploaded v0.0.14 adding BLAST database support.
See also the matching update for the NCBI BLAST+ wrappers which use these new definitions. This update included work by Edward Kirton.
| author | peterjc |
|---|---|
| date | Fri, 09 Nov 2012 06:50:05 -0500 |
| parents | 6ef523b390e0 |
| children | b3a3ba0c1d47 |
comparison
equal
deleted
inserted
replaced
| 3:6ef523b390e0 | 4:f9a7783ed7b6 |
|---|---|
| 1 """ | 1 """ |
| 2 BlastXml class | 2 BlastXml class |
| 3 """ | 3 """ |
| 4 | 4 |
| 5 from galaxy.datatypes.data import get_file_peek | 5 from galaxy.datatypes.data import get_file_peek |
| 6 from galaxy.datatypes.data import Text | 6 from galaxy.datatypes.data import Text, Data |
| 7 from galaxy.datatypes.xml import GenericXml | 7 from galaxy.datatypes.xml import GenericXml |
| 8 from galaxy.datatypes.metadata import MetadataElement | |
| 8 | 9 |
| 9 class BlastXml( GenericXml ): | 10 class BlastXml( GenericXml ): |
| 10 """NCBI Blast XML Output data""" | 11 """NCBI Blast XML Output data""" |
| 11 file_ext = "blastxml" | 12 file_ext = "blastxml" |
| 12 | 13 |
| 16 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | 17 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
| 17 dataset.blurb = 'NCBI Blast XML data' | 18 dataset.blurb = 'NCBI Blast XML data' |
| 18 else: | 19 else: |
| 19 dataset.peek = 'file does not exist' | 20 dataset.peek = 'file does not exist' |
| 20 dataset.blurb = 'file purged from disk' | 21 dataset.blurb = 'file purged from disk' |
| 22 | |
| 21 def sniff( self, filename ): | 23 def sniff( self, filename ): |
| 22 """ | 24 """ |
| 23 Determines whether the file is blastxml | 25 Determines whether the file is blastxml |
| 24 | 26 |
| 25 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) | 27 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) |
| 53 def merge(split_files, output_file): | 55 def merge(split_files, output_file): |
| 54 """Merging multiple XML files is non-trivial and must be done in subclasses.""" | 56 """Merging multiple XML files is non-trivial and must be done in subclasses.""" |
| 55 if len(split_files) == 1: | 57 if len(split_files) == 1: |
| 56 #For one file only, use base class method (move/copy) | 58 #For one file only, use base class method (move/copy) |
| 57 return Text.merge(split_files, output_file) | 59 return Text.merge(split_files, output_file) |
| 60 if not split_files: | |
| 61 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \ | |
| 62 % (split_files, output_file)) | |
| 58 out = open(output_file, "w") | 63 out = open(output_file, "w") |
| 59 h = None | 64 h = None |
| 60 for f in split_files: | 65 for f in split_files: |
| 61 h = open(f) | 66 h = open(f) |
| 62 body = False | 67 body = False |
| 120 out.write(" </BlastOutput_iterations>\n") | 125 out.write(" </BlastOutput_iterations>\n") |
| 121 out.write("</BlastOutput>\n") | 126 out.write("</BlastOutput>\n") |
| 122 out.close() | 127 out.close() |
| 123 merge = staticmethod(merge) | 128 merge = staticmethod(merge) |
| 124 | 129 |
| 130 | |
| 131 class _BlastDb(object): | |
| 132 """Base class for BLAST database datatype.""" | |
| 133 | |
| 134 def set_peek( self, dataset, is_multi_byte=False ): | |
| 135 """Set the peek and blurb text.""" | |
| 136 if not dataset.dataset.purged: | |
| 137 dataset.peek = "BLAST database (multiple files)" | |
| 138 dataset.blurb = "BLAST database (multiple files)" | |
| 139 else: | |
| 140 dataset.peek = 'file does not exist' | |
| 141 dataset.blurb = 'file purged from disk' | |
| 142 | |
| 143 def display_peek( self, dataset ): | |
| 144 """Create HTML content, used for displaying peek.""" | |
| 145 try: | |
| 146 return dataset.peek | |
| 147 except: | |
| 148 return "BLAST database (multiple files)" | |
| 149 | |
| 150 def display_data(self, trans, data, preview=False, filename=None, | |
| 151 to_ext=None, size=None, offset=None, **kwd): | |
| 152 """Apparently an old display method, but still gets called. | |
| 153 | |
| 154 This allows us to format the data shown in the central pane via the "eye" icon. | |
| 155 """ | |
| 156 return "This is a BLAST database." | |
| 157 | |
| 158 def get_mime(self): | |
| 159 """Returns the mime type of the datatype (pretend it is text for peek)""" | |
| 160 return 'text/plain' | |
| 161 | |
| 162 def merge(split_files, output_file): | |
| 163 """Merge BLAST databases (not implemented for now).""" | |
| 164 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)") | |
| 165 | |
| 166 def split( cls, input_datasets, subdir_generator_function, split_params): | |
| 167 """Split a BLAST database (not implemented for now).""" | |
| 168 if split_params is None: | |
| 169 return None | |
| 170 raise NotImplementedError("Can't split BLAST databases") | |
| 171 | |
| 172 | |
| 173 class BlastNucDb( _BlastDb, Data ): | |
| 174 """Class for nucleotide BLAST database files.""" | |
| 175 file_ext = 'blastdbn' | |
| 176 composite_type ='basic' | |
| 177 MetadataElement( readonly=True, optional=True, visible=False, no_value=0 ) | |
| 178 | |
| 179 def __init__(self,**kwd): | |
| 180 Data.__init__(self, **kwd) | |
| 181 self.add_composite_file('blastdb.nhr') | |
| 182 self.add_composite_file('blastdb.nin') | |
| 183 self.add_composite_file('blastdb.nsq') | |
| 184 self.add_composite_file('blastdb.nhd', optional=True) | |
| 185 self.add_composite_file('blastdb.nsi', optional=True) | |
| 186 self.add_composite_file('blastdb.nhi', optional=True) | |
| 187 self.add_composite_file('blastdb.nog', optional=True) | |
| 188 self.add_composite_file('blastdb.nsd', optional=True) | |
| 189 | |
| 190 def display_data(self, trans, data, preview=False, filename=None, | |
| 191 to_ext=None, size=None, offset=None, **kwd): | |
| 192 """Apparently an old display method, but still gets called. | |
| 193 | |
| 194 This allows us to format the data shown in the central pane via the "eye" icon. | |
| 195 """ | |
| 196 return "This is a BLAST nucleotide database." | |
| 197 | |
| 198 class BlastProtDb( _BlastDb, Data ): | |
| 199 """Class for protein BLAST database files.""" | |
| 200 file_ext = 'blastdbp' | |
| 201 composite_type ='basic' | |
| 202 MetadataElement( readonly=True, optional=True, visible=False, no_value=0 ) | |
| 203 | |
| 204 def __init__(self,**kwd): | |
| 205 Data.__init__(self, **kwd) | |
| 206 self.add_composite_file('blastdb.phr') | |
| 207 self.add_composite_file('blastdb.pin') | |
| 208 self.add_composite_file('blastdb.psq') | |
| 209 self.add_composite_file('blastdb.pnd', optional=True) | |
| 210 self.add_composite_file('blastdb.pni', optional=True) | |
| 211 self.add_composite_file('blastdb.psd', optional=True) | |
| 212 self.add_composite_file('blastdb.psi', optional=True) | |
| 213 self.add_composite_file('blastdb.psq', optional=True) | |
| 214 self.add_composite_file('blastdb.phd', optional=True) | |
| 215 self.add_composite_file('blastdb.phi', optional=True) | |
| 216 self.add_composite_file('blastdb.pog', optional=True) | |
| 217 | |
| 218 def display_data(self, trans, data, preview=False, filename=None, | |
| 219 to_ext=None, size=None, offset=None, **kwd): | |
| 220 """Apparently an old display method, but still gets called. | |
| 221 | |
| 222 This allows us to format the data shown in the central pane via the "eye" icon. | |
| 223 """ | |
| 224 return "This is a BLAST protein database." |
