Mercurial > repos > devteam > blast_datatypes
comparison blast.py @ 4:f9a7783ed7b6
Uploaded v0.0.14 adding BLAST database support.
See also the matching update for the NCBI BLAST+ wrappers which use these new definitions. This update included work by Edward Kirton.
author | peterjc |
---|---|
date | Fri, 09 Nov 2012 06:50:05 -0500 |
parents | 6ef523b390e0 |
children | b3a3ba0c1d47 |
comparison
equal
deleted
inserted
replaced
3:6ef523b390e0 | 4:f9a7783ed7b6 |
---|---|
1 """ | 1 """ |
2 BlastXml class | 2 BlastXml class |
3 """ | 3 """ |
4 | 4 |
5 from galaxy.datatypes.data import get_file_peek | 5 from galaxy.datatypes.data import get_file_peek |
6 from galaxy.datatypes.data import Text | 6 from galaxy.datatypes.data import Text, Data |
7 from galaxy.datatypes.xml import GenericXml | 7 from galaxy.datatypes.xml import GenericXml |
8 from galaxy.datatypes.metadata import MetadataElement | |
8 | 9 |
9 class BlastXml( GenericXml ): | 10 class BlastXml( GenericXml ): |
10 """NCBI Blast XML Output data""" | 11 """NCBI Blast XML Output data""" |
11 file_ext = "blastxml" | 12 file_ext = "blastxml" |
12 | 13 |
16 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | 17 dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
17 dataset.blurb = 'NCBI Blast XML data' | 18 dataset.blurb = 'NCBI Blast XML data' |
18 else: | 19 else: |
19 dataset.peek = 'file does not exist' | 20 dataset.peek = 'file does not exist' |
20 dataset.blurb = 'file purged from disk' | 21 dataset.blurb = 'file purged from disk' |
22 | |
21 def sniff( self, filename ): | 23 def sniff( self, filename ): |
22 """ | 24 """ |
23 Determines whether the file is blastxml | 25 Determines whether the file is blastxml |
24 | 26 |
25 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) | 27 >>> fname = get_test_fname( 'megablast_xml_parser_test1.blastxml' ) |
53 def merge(split_files, output_file): | 55 def merge(split_files, output_file): |
54 """Merging multiple XML files is non-trivial and must be done in subclasses.""" | 56 """Merging multiple XML files is non-trivial and must be done in subclasses.""" |
55 if len(split_files) == 1: | 57 if len(split_files) == 1: |
56 #For one file only, use base class method (move/copy) | 58 #For one file only, use base class method (move/copy) |
57 return Text.merge(split_files, output_file) | 59 return Text.merge(split_files, output_file) |
60 if not split_files: | |
61 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \ | |
62 % (split_files, output_file)) | |
58 out = open(output_file, "w") | 63 out = open(output_file, "w") |
59 h = None | 64 h = None |
60 for f in split_files: | 65 for f in split_files: |
61 h = open(f) | 66 h = open(f) |
62 body = False | 67 body = False |
120 out.write(" </BlastOutput_iterations>\n") | 125 out.write(" </BlastOutput_iterations>\n") |
121 out.write("</BlastOutput>\n") | 126 out.write("</BlastOutput>\n") |
122 out.close() | 127 out.close() |
123 merge = staticmethod(merge) | 128 merge = staticmethod(merge) |
124 | 129 |
130 | |
131 class _BlastDb(object): | |
132 """Base class for BLAST database datatype.""" | |
133 | |
134 def set_peek( self, dataset, is_multi_byte=False ): | |
135 """Set the peek and blurb text.""" | |
136 if not dataset.dataset.purged: | |
137 dataset.peek = "BLAST database (multiple files)" | |
138 dataset.blurb = "BLAST database (multiple files)" | |
139 else: | |
140 dataset.peek = 'file does not exist' | |
141 dataset.blurb = 'file purged from disk' | |
142 | |
143 def display_peek( self, dataset ): | |
144 """Create HTML content, used for displaying peek.""" | |
145 try: | |
146 return dataset.peek | |
147 except: | |
148 return "BLAST database (multiple files)" | |
149 | |
150 def display_data(self, trans, data, preview=False, filename=None, | |
151 to_ext=None, size=None, offset=None, **kwd): | |
152 """Apparently an old display method, but still gets called. | |
153 | |
154 This allows us to format the data shown in the central pane via the "eye" icon. | |
155 """ | |
156 return "This is a BLAST database." | |
157 | |
158 def get_mime(self): | |
159 """Returns the mime type of the datatype (pretend it is text for peek)""" | |
160 return 'text/plain' | |
161 | |
162 def merge(split_files, output_file): | |
163 """Merge BLAST databases (not implemented for now).""" | |
164 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)") | |
165 | |
166 def split( cls, input_datasets, subdir_generator_function, split_params): | |
167 """Split a BLAST database (not implemented for now).""" | |
168 if split_params is None: | |
169 return None | |
170 raise NotImplementedError("Can't split BLAST databases") | |
171 | |
172 | |
173 class BlastNucDb( _BlastDb, Data ): | |
174 """Class for nucleotide BLAST database files.""" | |
175 file_ext = 'blastdbn' | |
176 composite_type ='basic' | |
177 MetadataElement( readonly=True, optional=True, visible=False, no_value=0 ) | |
178 | |
179 def __init__(self,**kwd): | |
180 Data.__init__(self, **kwd) | |
181 self.add_composite_file('blastdb.nhr') | |
182 self.add_composite_file('blastdb.nin') | |
183 self.add_composite_file('blastdb.nsq') | |
184 self.add_composite_file('blastdb.nhd', optional=True) | |
185 self.add_composite_file('blastdb.nsi', optional=True) | |
186 self.add_composite_file('blastdb.nhi', optional=True) | |
187 self.add_composite_file('blastdb.nog', optional=True) | |
188 self.add_composite_file('blastdb.nsd', optional=True) | |
189 | |
190 def display_data(self, trans, data, preview=False, filename=None, | |
191 to_ext=None, size=None, offset=None, **kwd): | |
192 """Apparently an old display method, but still gets called. | |
193 | |
194 This allows us to format the data shown in the central pane via the "eye" icon. | |
195 """ | |
196 return "This is a BLAST nucleotide database." | |
197 | |
198 class BlastProtDb( _BlastDb, Data ): | |
199 """Class for protein BLAST database files.""" | |
200 file_ext = 'blastdbp' | |
201 composite_type ='basic' | |
202 MetadataElement( readonly=True, optional=True, visible=False, no_value=0 ) | |
203 | |
204 def __init__(self,**kwd): | |
205 Data.__init__(self, **kwd) | |
206 self.add_composite_file('blastdb.phr') | |
207 self.add_composite_file('blastdb.pin') | |
208 self.add_composite_file('blastdb.psq') | |
209 self.add_composite_file('blastdb.pnd', optional=True) | |
210 self.add_composite_file('blastdb.pni', optional=True) | |
211 self.add_composite_file('blastdb.psd', optional=True) | |
212 self.add_composite_file('blastdb.psi', optional=True) | |
213 self.add_composite_file('blastdb.psq', optional=True) | |
214 self.add_composite_file('blastdb.phd', optional=True) | |
215 self.add_composite_file('blastdb.phi', optional=True) | |
216 self.add_composite_file('blastdb.pog', optional=True) | |
217 | |
218 def display_data(self, trans, data, preview=False, filename=None, | |
219 to_ext=None, size=None, offset=None, **kwd): | |
220 """Apparently an old display method, but still gets called. | |
221 | |
222 This allows us to format the data shown in the central pane via the "eye" icon. | |
223 """ | |
224 return "This is a BLAST protein database." |