Mercurial > repos > kls286 > chap_test_20230328
comparison MLaaS/README.md @ 0:cbbe42422d56 draft
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
author | kls286 |
---|---|
date | Tue, 28 Mar 2023 15:07:30 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cbbe42422d56 |
---|---|
1 ## MLaaS end-to-end example using MNIST dataset | |
2 MLaaS stands for Machine Learning as a Service, and here we'll provide | |
3 end-to-end example based on MNIST dataset using | |
4 Python [Keras](https://keras.io/) ML framework for training | |
5 part, and [TFaas](https://github.com/vkuznet/TFaaS) ML framework | |
6 for inference part. | |
7 | |
8 ### Requirements (environment) | |
9 To proceed with ML trainig we need to acquire MNIST dataset. | |
10 We will assume that you have a box where recent version of python is installed, | |
11 please note that instructions were tested with `Python 3.10.10` | |
12 | |
13 ``` | |
14 # create mnist_env, here python refers to python 3.10.10 | |
15 python -m venv mnist_env | |
16 | |
17 # download mnist dataset for training purposes in numpy gziped arrays | |
18 curl -ksLO https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz | |
19 | |
20 # download MNIST dataset for training purposes in pkl.gz data-format | |
21 curl -ksLO https://s3.amazonaws.com/img-datasets/mnist.pkl.gz | |
22 | |
23 # download MNIST images | |
24 # download MNIST actual images which we will use within inference | |
25 curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz | |
26 ``` | |
27 | |
28 ### Train ML model | |
29 Below you can see fully tested Keras mased ML codebase to train | |
30 simple convolutional neural network over MNIST dataset (save | |
31 this code as `ktrain.py`): | |
32 ``` | |
33 #!/usr/bin/env python | |
34 #-*- coding: utf-8 -*- | |
35 #pylint: disable= | |
36 """ | |
37 File : ktrain.py | |
38 Author : Valentin Kuznetsov <vkuznet AT gmail dot com> | |
39 Description: Keras based ML network to train over MNIST dataset | |
40 """ | |
41 | |
42 # system modules | |
43 import os | |
44 import sys | |
45 import json | |
46 import gzip | |
47 import pickle | |
48 import argparse | |
49 | |
50 # third-party modules | |
51 import numpy as np | |
52 import tensorflow as tf | |
53 from tensorflow import keras | |
54 from tensorflow.keras import layers | |
55 from tensorflow.keras import backend as K | |
56 from tensorflow.python.tools import saved_model_utils | |
57 | |
58 | |
59 def modelGraph(model_dir): | |
60 """ | |
61 Provide input/output names used by TF Graph along with graph itself | |
62 The code is based on TF saved_model_cli.py script. | |
63 """ | |
64 input_names = [] | |
65 output_names = [] | |
66 tag_sets = saved_model_utils.get_saved_model_tag_sets(model_dir) | |
67 for tag_set in sorted(tag_sets): | |
68 print('%r' % ', '.join(sorted(tag_set))) | |
69 meta_graph_def = saved_model_utils.get_meta_graph_def(model_dir, tag_set[0]) | |
70 for key in meta_graph_def.signature_def.keys(): | |
71 meta = meta_graph_def.signature_def[key] | |
72 if hasattr(meta, 'inputs') and hasattr(meta, 'outputs'): | |
73 inputs = meta.inputs | |
74 outputs = meta.outputs | |
75 input_signatures = list(meta.inputs.values()) | |
76 input_names = [signature.name for signature in input_signatures] | |
77 if len(input_names) > 0: | |
78 output_signatures = list(meta.outputs.values()) | |
79 output_names = [signature.name for signature in output_signatures] | |
80 return input_names, output_names, meta_graph_def | |
81 | |
82 def readData(fin, num_classes): | |
83 """ | |
84 Helper function to read MNIST data and provide it to | |
85 upstream code, e.g. to the training layer | |
86 """ | |
87 # Load the data and split it between train and test sets | |
88 # (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() | |
89 f = gzip.open(fin, 'rb') | |
90 if sys.version_info < (3,): | |
91 mnist_data = pickle.load(f) | |
92 else: | |
93 mnist_data = pickle.load(f, encoding='bytes') | |
94 f.close() | |
95 (x_train, y_train), (x_test, y_test) = mnist_data | |
96 | |
97 # Scale images to the [0, 1] range | |
98 x_train = x_train.astype("float32") / 255 | |
99 x_test = x_test.astype("float32") / 255 | |
100 # Make sure images have shape (28, 28, 1) | |
101 x_train = np.expand_dims(x_train, -1) | |
102 x_test = np.expand_dims(x_test, -1) | |
103 print("x_train shape:", x_train.shape) | |
104 print(x_train.shape[0], "train samples") | |
105 print(x_test.shape[0], "test samples") | |
106 | |
107 | |
108 # convert class vectors to binary class matrices | |
109 y_train = keras.utils.to_categorical(y_train, num_classes) | |
110 y_test = keras.utils.to_categorical(y_test, num_classes) | |
111 return x_train, y_train, x_test, y_test | |
112 | |
113 | |
114 def train(fin, fout=None, model_name=None, epochs=1, batch_size=128, h5=False): | |
115 """ | |
116 train function for MNIST | |
117 """ | |
118 # Model / data parameters | |
119 num_classes = 10 | |
120 input_shape = (28, 28, 1) | |
121 | |
122 # create ML model | |
123 model = keras.Sequential( | |
124 [ | |
125 keras.Input(shape=input_shape), | |
126 layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), | |
127 layers.MaxPooling2D(pool_size=(2, 2)), | |
128 layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), | |
129 layers.MaxPooling2D(pool_size=(2, 2)), | |
130 layers.Flatten(), | |
131 layers.Dropout(0.5), | |
132 layers.Dense(num_classes, activation="softmax"), | |
133 ] | |
134 ) | |
135 | |
136 model.summary() | |
137 print("model input", model.input, type(model.input), model.input.__dict__) | |
138 print("model output", model.output, type(model.output), model.output.__dict__) | |
139 model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) | |
140 | |
141 # train model | |
142 x_train, y_train, x_test, y_test = readData(fin, num_classes) | |
143 model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) | |
144 | |
145 # evaluate trained model | |
146 score = model.evaluate(x_test, y_test, verbose=0) | |
147 print("Test loss:", score[0]) | |
148 print("Test accuracy:", score[1]) | |
149 print("save model to", fout) | |
150 writer(fout, model_name, model, input_shape, h5) | |
151 | |
152 def writer(fout, model_name, model, input_shape, h5=False): | |
153 """ | |
154 Writer provide write function for given model | |
155 """ | |
156 if not fout: | |
157 return | |
158 model.save(fout) | |
159 if h5: | |
160 model.save('{}/{}'.format(fout, h5), save_format='h5') | |
161 pbModel = '{}/saved_model.pb'.format(fout) | |
162 pbtxtModel = '{}/saved_model.pbtxt'.format(fout) | |
163 convert(pbModel, pbtxtModel) | |
164 | |
165 # get meta-data information about our ML model | |
166 input_names, output_names, model_graph = modelGraph(model_name) | |
167 print("### input", input_names) | |
168 print("### output", output_names) | |
169 # ML uses (28,28,1) shape, i.e. 28x28 black-white images | |
170 # if we'll use color images we'll use shape (28, 28, 3) | |
171 img_channels = input_shape[2] # last item represent number of colors | |
172 meta = {'name': model_name, | |
173 'model': 'saved_model.pb', | |
174 'labels': 'labels.txt', | |
175 'img_channels': img_channels, | |
176 'input_name': input_names[0].split(':')[0], | |
177 'output_name': output_names[0].split(':')[0], | |
178 'input_node': model.input.name, | |
179 'output_node': model.output.name | |
180 } | |
181 with open(fout+'/params.json', 'w') as ostream: | |
182 ostream.write(json.dumps(meta)) | |
183 with open(fout+'/labels.txt', 'w') as ostream: | |
184 for i in range(0, 10): | |
185 ostream.write(str(i)+'\n') | |
186 with open(fout + '/model.graph', 'wb') as ostream: | |
187 ostream.write(model_graph.SerializeToString()) | |
188 | |
189 def convert(fin, fout): | |
190 """ | |
191 convert input model.pb into output model.pbtxt | |
192 Based on internet search: | |
193 - https://www.tensorflow.org/guide/saved_model | |
194 - https://www.programcreek.com/python/example/123317/tensorflow.core.protobuf.saved_model_pb2.SavedModel | |
195 """ | |
196 import google.protobuf | |
197 from tensorflow.core.protobuf import saved_model_pb2 | |
198 import tensorflow as tf | |
199 | |
200 saved_model = saved_model_pb2.SavedModel() | |
201 | |
202 with open(fin, 'rb') as f: | |
203 saved_model.ParseFromString(f.read()) | |
204 | |
205 with open(fout, 'w') as f: | |
206 f.write(google.protobuf.text_format.MessageToString(saved_model)) | |
207 | |
208 | |
209 class OptionParser(): | |
210 def __init__(self): | |
211 "User based option parser" | |
212 self.parser = argparse.ArgumentParser(prog='PROG') | |
213 self.parser.add_argument("--fin", action="store", | |
214 dest="fin", default="", help="Input MNIST file") | |
215 self.parser.add_argument("--fout", action="store", | |
216 dest="fout", default="", help="Output models area") | |
217 self.parser.add_argument("--model", action="store", | |
218 dest="model", default="mnist", help="model name") | |
219 self.parser.add_argument("--epochs", action="store", | |
220 dest="epochs", default=1, help="number of epochs to use in ML training") | |
221 self.parser.add_argument("--batch_size", action="store", | |
222 dest="batch_size", default=128, help="batch size to use in training") | |
223 self.parser.add_argument("--h5", action="store", | |
224 dest="h5", default="mnist", help="h5 model file name") | |
225 | |
226 def main(): | |
227 "Main function" | |
228 optmgr = OptionParser() | |
229 opts = optmgr.parser.parse_args() | |
230 train(opts.fin, opts.fout, | |
231 model_name=opts.model, | |
232 epochs=opts.epochs, | |
233 batch_size=opts.batch_size, | |
234 h5=opts.h5) | |
235 | |
236 if __name__ == '__main__': | |
237 main() | |
238 ``` | |
239 | |
240 ### Training process | |
241 We will train our model using the following command (for simplicity we skip | |
242 warning messages from TF and irrelevant printouts): | |
243 ``` | |
244 # here fout=mnist represents mnist directory where we'll stored our trained model | |
245 # and model=mnist is the name of the model we'll use later in inference | |
246 ./ktrain.py --fin=./mnist.pkl.gz --fout=mnist --model=mnist | |
247 ... | |
248 x_train shape: (60000, 28, 28, 1) | |
249 60000 train samples | |
250 10000 test samples | |
251 Model: "sequential" | |
252 _________________________________________________________________ | |
253 Layer (type) Output Shape Param # | |
254 ================================================================= | |
255 conv2d (Conv2D) (None, 26, 26, 32) 320 | |
256 | |
257 max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0 | |
258 ) | |
259 | |
260 conv2d_1 (Conv2D) (None, 11, 11, 64) 18496 | |
261 | |
262 max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0 | |
263 2D) | |
264 | |
265 flatten (Flatten) (None, 1600) 0 | |
266 | |
267 dropout (Dropout) (None, 1600) 0 | |
268 | |
269 dense (Dense) (None, 10) 16010 | |
270 | |
271 ================================================================= | |
272 Total params: 34,826 | |
273 Trainable params: 34,826 | |
274 Non-trainable params: 0 | |
275 _________________________________________________________________ | |
276 | |
277 422/422 [==============================] - 37s 84ms/step - loss: 0.3645 - accuracy: 0.8898 - val_loss: 0.0825 - val_accuracy: 0.9772 | |
278 Test loss: 0.09409885853528976 | |
279 Test accuracy: 0.9703999757766724 | |
280 save model to mnist | |
281 | |
282 ### input ['serving_default_input_1:0'] | |
283 ### output ['StatefulPartitionedCall:0'] | |
284 ``` | |
285 When this process is over you'll find `mnist` directory with the following | |
286 content: | |
287 ``` | |
288 shell# ls mnist | |
289 | |
290 assets keras_metadata.pb model.graph saved_model.pb variables | |
291 fingerprint.pb labels.txt params.json saved_model.pbtxt | |
292 ``` | |
293 - `saved_model.pb` represents trained ML model in protobuffer data-format | |
294 - `saved_model.pbtxt` represents trained ML model in text protobuffer representation | |
295 - `labels.txt` contains our image labels | |
296 - `params.json` contains meta-data used by TFaaS and it has the following content: | |
297 ``` | |
298 cat mnist/params.json | jq | |
299 { | |
300 "name": "mnist", | |
301 "model": "saved_model.pb", | |
302 "labels": "labels.txt", | |
303 "img_channels": 1, | |
304 "input_name": "serving_default_input_1", | |
305 "output_name": "StatefulPartitionedCall", | |
306 "input_node": "input_1", | |
307 "output_node": "dense/Softmax:0" | |
308 } | |
309 ``` | |
310 Here you see, that our ML model is called `mnist`, the model is stored in | |
311 `saved_model.pb` file, and more importantly this file contains the input and | |
312 output tensor names and nodes which we need to provide for TFaaS to server | |
313 our predictions. | |
314 | |
315 ### Inference server | |
316 Now, it is time to start our inference server. You can find its code in `src/go` area. | |
317 To build the code you need | |
318 ``` | |
319 # download TF library and includes for your OS, e.g. macOS build | |
320 curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.11.0.tar.gz | |
321 # or linux build | |
322 curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz | |
323 # or linux GPU build | |
324 curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.11.0.tar.gz | |
325 | |
326 # provide TF include area location to go build command | |
327 # the /opt/tensorflow/include is are where TF includes are | |
328 export CGO_CPPFLAGS="-I/opt/tensorflow/include" | |
329 | |
330 # compile the code | |
331 make | |
332 | |
333 # it will produce tfaas executable | |
334 | |
335 # to run the code we need to setup `DYLD_LIBRARY_PATH` | |
336 export DYLD_LIBRARY_PATH=/opt/tensorflow/lib | |
337 ./tfaas -config config.json | |
338 ``` | |
339 where `config.json` has the following form (please refer for more details): | |
340 ``` | |
341 { | |
342 "port": 8083, | |
343 "modelDir": "models", | |
344 "staticDir": "static", | |
345 "configProto": "", | |
346 "base": "", | |
347 "serverKey": "", | |
348 "serverCrt": "", | |
349 "verbose": 1 | |
350 } | |
351 ``` | |
352 | |
353 ### Serving predictions with TFaaS inference server | |
354 Finally, we are ready for the inference part. | |
355 - upload your ML model to TFaaS server | |
356 ``` | |
357 # create tarball of your mnist ML trained model | |
358 tar cfz mnist.tar.gz mnist | |
359 | |
360 # upload tarball to TFaaS server | |
361 curl -v -X POST -H "Content-Encoding: gzip" \ | |
362 -H "Content-Type: application/octet-stream" \ | |
363 --data-binary @./mnist.tar.gz \ | |
364 http://localhost:8083/upload | |
365 | |
366 # check your model presence | |
367 curl http://localhost:8083/models | |
368 | |
369 # generate image from MNIST dataset you want to use for prediction | |
370 # img1.png will contain number 1, img4.png will contain number 4 | |
371 ./mnist_img.py --fout img1.png --imgid=3 | |
372 ./mnist_img.py --fout img4.png --imgid=2 | |
373 | |
374 # ask for prediction of your image | |
375 curl http://localhost:8083/predict/image -F 'image=@./img1.png' -F 'model=mnist' | |
376 [0,1,0,0,0,0,0,0,0,0] | |
377 | |
378 curl http://localhost:8083/predict/image -F 'image=@./img4.png' -F 'model=mnist' | |
379 [0,0,0,0,1,0,0,0,0,0] | |
380 ``` |