annotate datatypes/molFiles.py @ 0:af7b6c6ee439 draft

initial commit
author bgruening
date Tue, 25 Dec 2012 05:16:25 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
1 # -*- coding: utf-8 -*-
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
2
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
3 from galaxy.datatypes import data
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
4 import logging
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
5 from galaxy.datatypes.sniff import *
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
6 import commands
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
7 import pybel
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
8 import openbabel
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
9 openbabel.obErrorLog.StopLogging()
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
10
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
11 from galaxy.datatypes.metadata import MetadataElement
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
12 from galaxy.datatypes import metadata
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
13
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
14 log = logging.getLogger(__name__)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
15
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
16 class GenericMolFile( data.Text ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
17
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
18 MetadataElement( name="molecules", default=0, desc="Number of molecules", readonly=True, visible=False, optional=True, no_value=0 )
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
19
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
20 file_ext = "mol2/sdf/drf"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
21 def check_filetype( self,filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
22 self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
23 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
24 self.file_ext="sdf"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
25 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
26 self.no_mols = commands.getstatusoutput("grep -c @\<TRIPOS\>MOLECULE "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
27 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
28 self.file_ext="mol2"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
29 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
30 self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
31 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
32 self.file_ext="drf"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
33 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
34 self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
35 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
36 self.file_ext="pdb"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
37 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
38 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
39
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
40 def set_peek( self, dataset, is_multi_byte=False ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
41 if not dataset.dataset.purged:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
42 if(self.check_filetype(dataset.file_name)) :
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
43 if (self.no_mols[1] == '1'):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
44 dataset.blurb = "1 molecule"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
45 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
46 dataset.blurb = "%s molecules" % self.no_mols[1]
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
47 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
48 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
49 dataset.peek = 'file does not exist'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
50 dataset.blurb = 'file purged from disk'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
51
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
52 def get_mime(self):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
53 return 'text/plain'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
54
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
55
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
56 class GenericMultiMolFile( GenericMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
57 def set_peek( self, dataset, is_multi_byte=False ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
58 if not dataset.dataset.purged:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
59 self.sniff(dataset.file_name)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
60 if (self.no_mols[1] == '1'):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
61 dataset.blurb = "1 molecule"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
62 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
63 dataset.blurb = "%s molecules" % self.no_mols[1]
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
64 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
65 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
66 dataset.peek = 'file does not exist'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
67 dataset.blurb = 'file purged from disk'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
68
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
69 class SDF( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
70 file_ext = "sdf"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
71 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
72 self.no_mols = commands.getstatusoutput("grep -c \\$\\$\\$\\$ "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
73 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
74 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
75 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
76 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
77
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
78 class MOL2( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
79 file_ext = "mol2"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
80 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
81 self.no_mols = commands.getstatusoutput("grep -c @\<TRIPOS\>MOLECULE "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
82 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
83 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
84 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
85 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
86
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
87 class FPS( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
88 file_ext = "fps"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
89 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
90 self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
91 with open(filename) as in_handle:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
92 for line_counter, line in enumerate(in_handle):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
93 line = line.strip()
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
94 if line.startswith('#FPS1'):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
95 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
96 if line_counter > 10:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
97 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
98
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
99 class DRF( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
100 file_ext = "drf"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
101 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
102 self.no_mols = commands.getstatusoutput("grep -c \"ligand id\" "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
103 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
104 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
105 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
106 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
107
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
108
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
109 class PHAR( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
110 MetadataElement( name="base_name", desc="base name", default='Phar',
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
111 readonly=True, set_in_upload=True)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
112 file_ext = "phar"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
113 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
114 self.no_mols = commands.getstatusoutput("grep -c -v '^#' "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
115 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
116
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
117 class PDB( GenericMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
118 file_ext = "pdb"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
119 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
120 self.no_mols = commands.getstatusoutput("grep -c HEADER "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
121 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
122 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
123 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
124 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
125 def set_peek( self, dataset, is_multi_byte=False ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
126 #def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
127 if not dataset.dataset.purged:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
128 res = commands.getstatusoutput("lib/galaxy/datatypes/countResidues.sh "+dataset.file_name)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
129 dataset.peek = res[1]
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
130 self.sniff(dataset.file_name)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
131 if (self.no_mols[1] == '1'):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
132 dataset.blurb = "1 protein structure"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
133 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
134 dataset.blurb = "%s protein structures"%self.no_mols[1]
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
135 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
136 dataset.peek = 'file does not exist'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
137 dataset.blurb = 'file purged from disk'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
138
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
139 class grd ( data.Text ) :
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
140 file_ext = "grd"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
141 def set_peek( self, dataset, is_multi_byte=False ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
142 if not dataset.dataset.purged:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
143 #dataset.peek = ""
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
144 dataset.blurb = "score-grids for docking"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
145 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
146 dataset.peek = 'file does not exist'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
147 dataset.blurb = 'file purged from disk'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
148
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
149 class grdtgz ( data.Text ) :
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
150 file_ext = "grd.tgz"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
151 def set_peek( self, dataset, is_multi_byte=False ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
152 if not dataset.dataset.purged:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
153 #dataset.peek = ""
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
154 dataset.blurb = "compressed score-grids for docking"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
155 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
156 dataset.peek = 'file does not exist'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
157 dataset.blurb = 'file purged from disk'
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
158
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
159
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
160 class InChI( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
161 file_ext = "inchi"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
162 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
163 self.no_mols = commands.getstatusoutput("grep -c '^InChI=' "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
164 word_count = commands.getoutput("wc -w "+filename).split()[0]
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
165
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
166 if self.no_mols[1] != word_count:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
167 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
168
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
169 if (self.no_mols[0] == 0) & (self.no_mols[1] > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
170 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
171 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
172 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
173
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
174 def set_meta( self, dataset, **kwd ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
175 """
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
176 Set the number of sequences and the number of data lines
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
177 in dataset.
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
178 """
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
179 if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
180 dataset.metadata.data_lines = None
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
181 dataset.metadata.sequences = None
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
182 return
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
183 #word_count = commands.getoutput("wc -w "+filename).split()[0]
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
184 # word_count are the lines of the file, if word_count and molecule count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
185 # are the same, that must hold to be an InChI File, then that should be
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
186 # the same number as all non-empty lines
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
187 #dataset.metadata.data_lines = word_count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
188 #int(commands.getoutput("grep -cve '^\s*$' "+filename))
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
189 #dataset.metadata.molecules = word_count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
190
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
191
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
192 class SMILES( GenericMultiMolFile ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
193 file_ext = "smi"
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
194 def sniff( self, filename ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
195 """
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
196 Its hard or impossible to sniff a SMILES File. All what i know is the
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
197 word_count must be the same as the non-empty line count. And that i can
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
198 try to import the first SMILES and check if it is a molecule.
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
199 """
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
200
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
201 # that corresponds to non-empty line count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
202 self.no_mols = commands.getstatusoutput("grep -cve '^\s*$' "+filename)
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
203 word_count = int(commands.getoutput("wc -w "+filename).split()[0])
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
204
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
205 if int(self.no_mols[1]) != word_count:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
206 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
207
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
208 if (self.no_mols[0] == 0) & (int(self.no_mols[1]) > 0):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
209 for line in open(filename):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
210 line = line.strip()
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
211 if line:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
212 # if we have atoms, we have a molecule
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
213 try:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
214 if len(pybel.readstring('smi', line).atoms) > 0:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
215 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
216 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
217 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
218 except:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
219 # if convert fails its not a smiles string
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
220 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
221 return True
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
222 else:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
223 return False
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
224
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
225 def set_meta( self, dataset, **kwd ):
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
226 """
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
227 Set the number of sequences and the number of data lines
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
228 in dataset.
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
229 """
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
230 if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
231 dataset.metadata.data_lines = None
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
232 dataset.metadata.sequences = None
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
233 return
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
234
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
235 #word_count = int(commands.getoutput("wc -w "+filename).split()[0])
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
236 # word_count are the lines of the file, if word_count and molecule count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
237 # are the same, that must hold to be an InChI File, then that should be
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
238 # the same number as all non-empty lines
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
239 #dataset.metadata.data_lines = word_count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
240 #dataset.metadata.molecules = word_count
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
241
af7b6c6ee439 initial commit
bgruening
parents:
diff changeset
242