Mercurial > repos > kls286 > chap_test_20230328
annotate MLaaS/mnist_img.py @ 5:72b3a448b067 draft
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
| author | kls286 | 
|---|---|
| date | Tue, 28 Mar 2023 15:41:51 +0000 | 
| parents | cbbe42422d56 | 
| children | 
| rev | line source | 
|---|---|
| 0 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 2 #-*- coding: utf-8 -*- | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 3 #pylint: disable= | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 4 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 5 File : mnist_img.py | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 6 Author : Valentin Kuznetsov <vkuznet AT gmail dot com> | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 7 Description: | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 8 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 9 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 10 import json | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 11 import gzip | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 12 import argparse | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 13 # from itertools import chain | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 14 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 15 import numpy as np | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 16 import matplotlib.pyplot as plt | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 17 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 18 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 19 def readImage(fname, fout, num_images=5, imgId=2): | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 20 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 21 Helper function to read MNIST image | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 22 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 23 image_size = 28 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 24 with gzip.open(fname, 'r') as fstream: | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 25 fstream.read(16) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 26 buf = fstream.read(image_size * image_size * num_images) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 27 data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 28 data = data.reshape(num_images, image_size, image_size, 1) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 29 image = np.asarray(data[imgId]).squeeze() | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 30 plt.imsave(fout, image) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 31 print("read:", fname, "wrote:", fout, "image:", type(image), "shape:", image.shape) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 32 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 33 def img2json(image): | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 34 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 35 Convert given image to JSON data format used by TFaaS | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 36 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 37 # values = [int(i) for i in list(chain.from_iterable(image))] | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 38 # values = image.tolist() | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 39 values = [] | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 40 for row in image.tolist(): | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 41 row = [int(i) for i in row] | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 42 vals = [[i] for i in row] | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 43 values.append(vals) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 44 # final values should be an array of elements, e.g. single image representation | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 45 values = [values] | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 46 keys = [str(i) for i in range(0, 10)] | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 47 meta = { | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 48 'keys': keys, | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 49 'values': values, | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 50 'model': 'mnist' | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 51 } | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 52 with open('img.json', 'w') as ostream: | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 53 ostream.write(json.dumps(meta)) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 54 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 55 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 56 class OptionParser(): | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 57 def __init__(self): | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 58 "User based option parser" | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 59 fname = "train-images-idx3-ubyte.gz" | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 60 self.parser = argparse.ArgumentParser(prog='PROG') | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 61 self.parser.add_argument("--fin", action="store", | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 62 dest="fin", default=fname, help=f"Input MNIST file, default {fname}") | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 63 self.parser.add_argument("--fout", action="store", | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 64 dest="fout", default="img.png", help="Output image fila name, default img.png") | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 65 self.parser.add_argument("--nimages", action="store", | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 66 dest="nimages", default=5, help="number of images to read, default 5") | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 67 self.parser.add_argument("--imgid", action="store", | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 68 dest="imgid", default=2, help="image index to use from nimages, default 2 (number 4)") | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 69 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 70 def main(): | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 71 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 72 main function to produce image file from mnist dataset. | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 73 MNIST dataset can be downloaded from | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 74 curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 75 """ | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 76 optmgr = OptionParser() | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 77 opts = optmgr.parser.parse_args() | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 78 num_images = int(opts.nimages) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 79 imgId = int(opts.imgid) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 80 img = readImage(opts.fin, opts.fout, num_images, imgId) | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 81 | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 82 if __name__ == '__main__': | 
| 
cbbe42422d56
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
 kls286 parents: diff
changeset | 83 main() | 
