comparison MLaaS/README.md @ 0:cbbe42422d56 draft

planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
author kls286
date Tue, 28 Mar 2023 15:07:30 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cbbe42422d56
1 ## MLaaS end-to-end example using MNIST dataset
2 MLaaS stands for Machine Learning as a Service, and here we'll provide
3 end-to-end example based on MNIST dataset using
4 Python [Keras](https://keras.io/) ML framework for training
5 part, and [TFaas](https://github.com/vkuznet/TFaaS) ML framework
6 for inference part.
7
8 ### Requirements (environment)
9 To proceed with ML trainig we need to acquire MNIST dataset.
10 We will assume that you have a box where recent version of python is installed,
11 please note that instructions were tested with `Python 3.10.10`
12
13 ```
14 # create mnist_env, here python refers to python 3.10.10
15 python -m venv mnist_env
16
17 # download mnist dataset for training purposes in numpy gziped arrays
18 curl -ksLO https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
19
20 # download MNIST dataset for training purposes in pkl.gz data-format
21 curl -ksLO https://s3.amazonaws.com/img-datasets/mnist.pkl.gz
22
23 # download MNIST images
24 # download MNIST actual images which we will use within inference
25 curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
26 ```
27
28 ### Train ML model
29 Below you can see fully tested Keras mased ML codebase to train
30 simple convolutional neural network over MNIST dataset (save
31 this code as `ktrain.py`):
32 ```
33 #!/usr/bin/env python
34 #-*- coding: utf-8 -*-
35 #pylint: disable=
36 """
37 File : ktrain.py
38 Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
39 Description: Keras based ML network to train over MNIST dataset
40 """
41
42 # system modules
43 import os
44 import sys
45 import json
46 import gzip
47 import pickle
48 import argparse
49
50 # third-party modules
51 import numpy as np
52 import tensorflow as tf
53 from tensorflow import keras
54 from tensorflow.keras import layers
55 from tensorflow.keras import backend as K
56 from tensorflow.python.tools import saved_model_utils
57
58
59 def modelGraph(model_dir):
60 """
61 Provide input/output names used by TF Graph along with graph itself
62 The code is based on TF saved_model_cli.py script.
63 """
64 input_names = []
65 output_names = []
66 tag_sets = saved_model_utils.get_saved_model_tag_sets(model_dir)
67 for tag_set in sorted(tag_sets):
68 print('%r' % ', '.join(sorted(tag_set)))
69 meta_graph_def = saved_model_utils.get_meta_graph_def(model_dir, tag_set[0])
70 for key in meta_graph_def.signature_def.keys():
71 meta = meta_graph_def.signature_def[key]
72 if hasattr(meta, 'inputs') and hasattr(meta, 'outputs'):
73 inputs = meta.inputs
74 outputs = meta.outputs
75 input_signatures = list(meta.inputs.values())
76 input_names = [signature.name for signature in input_signatures]
77 if len(input_names) > 0:
78 output_signatures = list(meta.outputs.values())
79 output_names = [signature.name for signature in output_signatures]
80 return input_names, output_names, meta_graph_def
81
82 def readData(fin, num_classes):
83 """
84 Helper function to read MNIST data and provide it to
85 upstream code, e.g. to the training layer
86 """
87 # Load the data and split it between train and test sets
88 # (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
89 f = gzip.open(fin, 'rb')
90 if sys.version_info < (3,):
91 mnist_data = pickle.load(f)
92 else:
93 mnist_data = pickle.load(f, encoding='bytes')
94 f.close()
95 (x_train, y_train), (x_test, y_test) = mnist_data
96
97 # Scale images to the [0, 1] range
98 x_train = x_train.astype("float32") / 255
99 x_test = x_test.astype("float32") / 255
100 # Make sure images have shape (28, 28, 1)
101 x_train = np.expand_dims(x_train, -1)
102 x_test = np.expand_dims(x_test, -1)
103 print("x_train shape:", x_train.shape)
104 print(x_train.shape[0], "train samples")
105 print(x_test.shape[0], "test samples")
106
107
108 # convert class vectors to binary class matrices
109 y_train = keras.utils.to_categorical(y_train, num_classes)
110 y_test = keras.utils.to_categorical(y_test, num_classes)
111 return x_train, y_train, x_test, y_test
112
113
114 def train(fin, fout=None, model_name=None, epochs=1, batch_size=128, h5=False):
115 """
116 train function for MNIST
117 """
118 # Model / data parameters
119 num_classes = 10
120 input_shape = (28, 28, 1)
121
122 # create ML model
123 model = keras.Sequential(
124 [
125 keras.Input(shape=input_shape),
126 layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
127 layers.MaxPooling2D(pool_size=(2, 2)),
128 layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
129 layers.MaxPooling2D(pool_size=(2, 2)),
130 layers.Flatten(),
131 layers.Dropout(0.5),
132 layers.Dense(num_classes, activation="softmax"),
133 ]
134 )
135
136 model.summary()
137 print("model input", model.input, type(model.input), model.input.__dict__)
138 print("model output", model.output, type(model.output), model.output.__dict__)
139 model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
140
141 # train model
142 x_train, y_train, x_test, y_test = readData(fin, num_classes)
143 model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
144
145 # evaluate trained model
146 score = model.evaluate(x_test, y_test, verbose=0)
147 print("Test loss:", score[0])
148 print("Test accuracy:", score[1])
149 print("save model to", fout)
150 writer(fout, model_name, model, input_shape, h5)
151
152 def writer(fout, model_name, model, input_shape, h5=False):
153 """
154 Writer provide write function for given model
155 """
156 if not fout:
157 return
158 model.save(fout)
159 if h5:
160 model.save('{}/{}'.format(fout, h5), save_format='h5')
161 pbModel = '{}/saved_model.pb'.format(fout)
162 pbtxtModel = '{}/saved_model.pbtxt'.format(fout)
163 convert(pbModel, pbtxtModel)
164
165 # get meta-data information about our ML model
166 input_names, output_names, model_graph = modelGraph(model_name)
167 print("### input", input_names)
168 print("### output", output_names)
169 # ML uses (28,28,1) shape, i.e. 28x28 black-white images
170 # if we'll use color images we'll use shape (28, 28, 3)
171 img_channels = input_shape[2] # last item represent number of colors
172 meta = {'name': model_name,
173 'model': 'saved_model.pb',
174 'labels': 'labels.txt',
175 'img_channels': img_channels,
176 'input_name': input_names[0].split(':')[0],
177 'output_name': output_names[0].split(':')[0],
178 'input_node': model.input.name,
179 'output_node': model.output.name
180 }
181 with open(fout+'/params.json', 'w') as ostream:
182 ostream.write(json.dumps(meta))
183 with open(fout+'/labels.txt', 'w') as ostream:
184 for i in range(0, 10):
185 ostream.write(str(i)+'\n')
186 with open(fout + '/model.graph', 'wb') as ostream:
187 ostream.write(model_graph.SerializeToString())
188
189 def convert(fin, fout):
190 """
191 convert input model.pb into output model.pbtxt
192 Based on internet search:
193 - https://www.tensorflow.org/guide/saved_model
194 - https://www.programcreek.com/python/example/123317/tensorflow.core.protobuf.saved_model_pb2.SavedModel
195 """
196 import google.protobuf
197 from tensorflow.core.protobuf import saved_model_pb2
198 import tensorflow as tf
199
200 saved_model = saved_model_pb2.SavedModel()
201
202 with open(fin, 'rb') as f:
203 saved_model.ParseFromString(f.read())
204
205 with open(fout, 'w') as f:
206 f.write(google.protobuf.text_format.MessageToString(saved_model))
207
208
209 class OptionParser():
210 def __init__(self):
211 "User based option parser"
212 self.parser = argparse.ArgumentParser(prog='PROG')
213 self.parser.add_argument("--fin", action="store",
214 dest="fin", default="", help="Input MNIST file")
215 self.parser.add_argument("--fout", action="store",
216 dest="fout", default="", help="Output models area")
217 self.parser.add_argument("--model", action="store",
218 dest="model", default="mnist", help="model name")
219 self.parser.add_argument("--epochs", action="store",
220 dest="epochs", default=1, help="number of epochs to use in ML training")
221 self.parser.add_argument("--batch_size", action="store",
222 dest="batch_size", default=128, help="batch size to use in training")
223 self.parser.add_argument("--h5", action="store",
224 dest="h5", default="mnist", help="h5 model file name")
225
226 def main():
227 "Main function"
228 optmgr = OptionParser()
229 opts = optmgr.parser.parse_args()
230 train(opts.fin, opts.fout,
231 model_name=opts.model,
232 epochs=opts.epochs,
233 batch_size=opts.batch_size,
234 h5=opts.h5)
235
236 if __name__ == '__main__':
237 main()
238 ```
239
240 ### Training process
241 We will train our model using the following command (for simplicity we skip
242 warning messages from TF and irrelevant printouts):
243 ```
244 # here fout=mnist represents mnist directory where we'll stored our trained model
245 # and model=mnist is the name of the model we'll use later in inference
246 ./ktrain.py --fin=./mnist.pkl.gz --fout=mnist --model=mnist
247 ...
248 x_train shape: (60000, 28, 28, 1)
249 60000 train samples
250 10000 test samples
251 Model: "sequential"
252 _________________________________________________________________
253 Layer (type) Output Shape Param #
254 =================================================================
255 conv2d (Conv2D) (None, 26, 26, 32) 320
256
257 max_pooling2d (MaxPooling2D (None, 13, 13, 32) 0
258 )
259
260 conv2d_1 (Conv2D) (None, 11, 11, 64) 18496
261
262 max_pooling2d_1 (MaxPooling (None, 5, 5, 64) 0
263 2D)
264
265 flatten (Flatten) (None, 1600) 0
266
267 dropout (Dropout) (None, 1600) 0
268
269 dense (Dense) (None, 10) 16010
270
271 =================================================================
272 Total params: 34,826
273 Trainable params: 34,826
274 Non-trainable params: 0
275 _________________________________________________________________
276
277 422/422 [==============================] - 37s 84ms/step - loss: 0.3645 - accuracy: 0.8898 - val_loss: 0.0825 - val_accuracy: 0.9772
278 Test loss: 0.09409885853528976
279 Test accuracy: 0.9703999757766724
280 save model to mnist
281
282 ### input ['serving_default_input_1:0']
283 ### output ['StatefulPartitionedCall:0']
284 ```
285 When this process is over you'll find `mnist` directory with the following
286 content:
287 ```
288 shell# ls mnist
289
290 assets keras_metadata.pb model.graph saved_model.pb variables
291 fingerprint.pb labels.txt params.json saved_model.pbtxt
292 ```
293 - `saved_model.pb` represents trained ML model in protobuffer data-format
294 - `saved_model.pbtxt` represents trained ML model in text protobuffer representation
295 - `labels.txt` contains our image labels
296 - `params.json` contains meta-data used by TFaaS and it has the following content:
297 ```
298 cat mnist/params.json | jq
299 {
300 "name": "mnist",
301 "model": "saved_model.pb",
302 "labels": "labels.txt",
303 "img_channels": 1,
304 "input_name": "serving_default_input_1",
305 "output_name": "StatefulPartitionedCall",
306 "input_node": "input_1",
307 "output_node": "dense/Softmax:0"
308 }
309 ```
310 Here you see, that our ML model is called `mnist`, the model is stored in
311 `saved_model.pb` file, and more importantly this file contains the input and
312 output tensor names and nodes which we need to provide for TFaaS to server
313 our predictions.
314
315 ### Inference server
316 Now, it is time to start our inference server. You can find its code in `src/go` area.
317 To build the code you need
318 ```
319 # download TF library and includes for your OS, e.g. macOS build
320 curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.11.0.tar.gz
321 # or linux build
322 curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
323 # or linux GPU build
324 curl -ksLO https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.11.0.tar.gz
325
326 # provide TF include area location to go build command
327 # the /opt/tensorflow/include is are where TF includes are
328 export CGO_CPPFLAGS="-I/opt/tensorflow/include"
329
330 # compile the code
331 make
332
333 # it will produce tfaas executable
334
335 # to run the code we need to setup `DYLD_LIBRARY_PATH`
336 export DYLD_LIBRARY_PATH=/opt/tensorflow/lib
337 ./tfaas -config config.json
338 ```
339 where `config.json` has the following form (please refer for more details):
340 ```
341 {
342 "port": 8083,
343 "modelDir": "models",
344 "staticDir": "static",
345 "configProto": "",
346 "base": "",
347 "serverKey": "",
348 "serverCrt": "",
349 "verbose": 1
350 }
351 ```
352
353 ### Serving predictions with TFaaS inference server
354 Finally, we are ready for the inference part.
355 - upload your ML model to TFaaS server
356 ```
357 # create tarball of your mnist ML trained model
358 tar cfz mnist.tar.gz mnist
359
360 # upload tarball to TFaaS server
361 curl -v -X POST -H "Content-Encoding: gzip" \
362 -H "Content-Type: application/octet-stream" \
363 --data-binary @./mnist.tar.gz \
364 http://localhost:8083/upload
365
366 # check your model presence
367 curl http://localhost:8083/models
368
369 # generate image from MNIST dataset you want to use for prediction
370 # img1.png will contain number 1, img4.png will contain number 4
371 ./mnist_img.py --fout img1.png --imgid=3
372 ./mnist_img.py --fout img4.png --imgid=2
373
374 # ask for prediction of your image
375 curl http://localhost:8083/predict/image -F 'image=@./img1.png' -F 'model=mnist'
376 [0,1,0,0,0,0,0,0,0,0]
377
378 curl http://localhost:8083/predict/image -F 'image=@./img4.png' -F 'model=mnist'
379 [0,0,0,0,1,0,0,0,0,0]
380 ```