comparison blast.py @ 9:e36c60d13c94 draft

Uploaded v0.0.18, tweak display_data for running tests
author peterjc
date Mon, 25 Nov 2013 10:55:07 -0500
parents a44a7a5456e1
children 939a600f45e9
comparison
equal deleted inserted replaced
8:7ceb2ae30ff4 9:e36c60d13c94
5 from galaxy.datatypes.data import get_file_peek 5 from galaxy.datatypes.data import get_file_peek
6 from galaxy.datatypes.data import Text, Data 6 from galaxy.datatypes.data import Text, Data
7 from galaxy.datatypes.xml import GenericXml 7 from galaxy.datatypes.xml import GenericXml
8 from galaxy.datatypes.metadata import MetadataElement 8 from galaxy.datatypes.metadata import MetadataElement
9 9
10 from time import sleep
11 import os
12 import logging
13
14 log = logging.getLogger(__name__)
10 15
11 class BlastXml( GenericXml ): 16 class BlastXml( GenericXml ):
12 """NCBI Blast XML Output data""" 17 """NCBI Blast XML Output data"""
13 file_ext = "blastxml" 18 file_ext = "blastxml"
14 19
62 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \ 67 raise ValueError("Given no BLAST XML files, %r, to merge into %s" \
63 % (split_files, output_file)) 68 % (split_files, output_file))
64 out = open(output_file, "w") 69 out = open(output_file, "w")
65 h = None 70 h = None
66 for f in split_files: 71 for f in split_files:
72 if not os.path.isfile(f):
73 log.warning("BLAST XML file %s missing, retry in 1s..." % f)
74 sleep(1)
75 if not os.path.isfile(f):
76 log.error("BLAST XML file %s missing" % f)
77 raise ValueError("BLAST XML file %s missing" % f)
67 h = open(f) 78 h = open(f)
68 body = False 79 body = False
69 header = h.readline() 80 header = h.readline()
70 if not header: 81 if not header:
71 out.close() 82 out.close()
72 h.close() 83 h.close()
73 raise ValueError("BLAST XML file %s was empty" % f) 84 #Retry, could be transient error with networked file system...
85 log.warning("BLAST XML file %s empty, retry in 1s..." % f)
86 sleep(1)
87 h = open(f)
88 header = h.readline()
89 if not header:
90 log.error("BLAST XML file %s was empty" % f)
91 raise ValueError("BLAST XML file %s was empty" % f)
74 if header.strip() != '<?xml version="1.0"?>': 92 if header.strip() != '<?xml version="1.0"?>':
75 out.write(header) #for diagnosis 93 out.write(header) #for diagnosis
76 out.close() 94 out.close()
77 h.close() 95 h.close()
78 raise ValueError("%s is not an XML file!" % f) 96 raise ValueError("%s is not an XML file!" % f)
148 except: 166 except:
149 return "BLAST database (multiple files)" 167 return "BLAST database (multiple files)"
150 168
151 def display_data(self, trans, data, preview=False, filename=None, 169 def display_data(self, trans, data, preview=False, filename=None,
152 to_ext=None, size=None, offset=None, **kwd): 170 to_ext=None, size=None, offset=None, **kwd):
153 """Apparently an old display method, but still gets called. 171 """Documented as an old display method, but still gets called via tests etc
154 172
155 This allows us to format the data shown in the central pane via the "eye" icon. 173 This allows us to format the data shown in the central pane via the "eye" icon.
156 """ 174 """
157 return "This is a BLAST database." 175 if filename is not None and filename != "index":
158 176 #Change nothing - important for the unit tests to access child files:
159 def get_mime(self): 177 return Data.display_data(self, trans, data, preview, filename,
160 """Returns the mime type of the datatype (pretend it is text for peek)""" 178 to_ext, size, offset, **kwd)
161 return 'text/plain' 179 if self.file_ext == "blastdbn":
180 title = "This is a nucleotide BLAST database"
181 elif self.file_ext =="blastdbp":
182 title = "This is a protein BLAST database"
183 else:
184 #Error?
185 title = "This is a BLAST database."
186 msg = ""
187 try:
188 #Try to use any text recorded in the dummy index file:
189 handle = open(data.file_name, "rU")
190 msg = handle.read().strip()
191 handle.close()
192 except Exception, err:
193 #msg = str(err)
194 pass
195 if not msg:
196 msg = title
197 #Galaxy assumes HTML for the display of composite datatypes,
198 return "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>" % (title, msg)
162 199
163 def merge(split_files, output_file): 200 def merge(split_files, output_file):
164 """Merge BLAST databases (not implemented for now).""" 201 """Merge BLAST databases (not implemented for now)."""
165 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)") 202 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
166 203
172 209
173 210
174 class BlastNucDb( _BlastDb, Data ): 211 class BlastNucDb( _BlastDb, Data ):
175 """Class for nucleotide BLAST database files.""" 212 """Class for nucleotide BLAST database files."""
176 file_ext = 'blastdbn' 213 file_ext = 'blastdbn'
177 composite_type ='basic' 214 allow_datatype_change = False
215 composite_type = 'basic'
178 216
179 def __init__(self, **kwd): 217 def __init__(self, **kwd):
180 Data.__init__(self, **kwd) 218 Data.__init__(self, **kwd)
181 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers 219 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers
182 self.add_composite_file('blastdb.nin', is_binary=True) # index file 220 self.add_composite_file('blastdb.nin', is_binary=True) # index file
195 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data 233 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data
196 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column 234 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column
197 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column 235 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column
198 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. 236 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
199 237
200 def display_data(self, trans, data, preview=False, filename=None,
201 to_ext=None, size=None, offset=None, **kwd):
202 """Apparently an old display method, but still gets called.
203
204 This allows us to format the data shown in the central pane via the "eye" icon.
205 """
206 return "This is a BLAST nucleotide database."
207
208 238
209 class BlastProtDb( _BlastDb, Data ): 239 class BlastProtDb( _BlastDb, Data ):
210 """Class for protein BLAST database files.""" 240 """Class for protein BLAST database files."""
211 file_ext = 'blastdbp' 241 file_ext = 'blastdbp'
212 composite_type ='basic' 242 allow_datatype_change = False
243 composite_type = 'basic'
213 244
214 def __init__(self, **kwd): 245 def __init__(self, **kwd):
215 Data.__init__(self, **kwd) 246 Data.__init__(self, **kwd)
216 # Component file comments are as in BlastNucDb except where noted 247 # Component file comments are as in BlastNucDb except where noted
217 self.add_composite_file('blastdb.phr', is_binary=True) 248 self.add_composite_file('blastdb.phr', is_binary=True)
226 self.add_composite_file('blastdb.psi', is_binary=True, optional=True) 257 self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
227 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) 258 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
228 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) 259 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
229 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) 260 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
230 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. 261 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
231
232 def display_data(self, trans, data, preview=False, filename=None,
233 to_ext=None, size=None, offset=None, **kwd):
234 """Apparently an old display method, but still gets called.
235
236 This allows us to format the data shown in the central pane via the "eye" icon.
237 """
238 return "This is a BLAST protein database."