Mercurial > repos > rnateam > rnacommender
comparison model.py @ 0:d04fa5201f51 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
| author | rnateam |
|---|---|
| date | Thu, 28 Jul 2016 05:56:54 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d04fa5201f51 |
|---|---|
| 1 """Recommender model.""" | |
| 2 from __future__ import print_function | |
| 3 | |
| 4 import sys | |
| 5 | |
| 6 import numpy as np | |
| 7 | |
| 8 from theano import function, shared | |
| 9 import theano.tensor as T | |
| 10 | |
| 11 __author__ = "Gianluca Corrado" | |
| 12 __copyright__ = "Copyright 2016, Gianluca Corrado" | |
| 13 __license__ = "MIT" | |
| 14 __maintainer__ = "Gianluca Corrado" | |
| 15 __email__ = "gianluca.corrado@unitn.it" | |
| 16 __status__ = "Production" | |
| 17 | |
| 18 | |
| 19 class Model(): | |
| 20 """Factorization model.""" | |
| 21 | |
| 22 def __init__(self, sp, sr, kp, kr, irange=0.01, learning_rate=0.01, | |
| 23 lambda_reg=0.01, verbose=True, seed=1234): | |
| 24 """ | |
| 25 Constructor. | |
| 26 | |
| 27 Parameters | |
| 28 ---------- | |
| 29 sp : int | |
| 30 Number of protein features. | |
| 31 | |
| 32 sr : int | |
| 33 Number of RNA features. | |
| 34 | |
| 35 kp : int | |
| 36 Size of the protein latent space. | |
| 37 | |
| 38 kr : int | |
| 39 Size of the RNA latent space. | |
| 40 | |
| 41 irange : float (default : 0.01) | |
| 42 Initialization range for the model weights. | |
| 43 | |
| 44 learning_rate : float (default : 0.01) | |
| 45 Learning rate for the weights update. | |
| 46 | |
| 47 lambda_reg : (default : 0.01) | |
| 48 Lambda parameter for the regularization. | |
| 49 | |
| 50 verbose : bool (default : True) | |
| 51 Print information at STDOUT. | |
| 52 | |
| 53 seed : int (default : 1234) | |
| 54 Seed for random number generator. | |
| 55 """ | |
| 56 if verbose: | |
| 57 print("Compiling model...", end=' ') | |
| 58 sys.stdout.flush() | |
| 59 | |
| 60 self.learning_rate = learning_rate | |
| 61 self.lambda_reg = lambda_reg | |
| 62 np.random.seed(seed) | |
| 63 # explictit features for proteins | |
| 64 fp = T.matrix("Fp", dtype='float32') | |
| 65 # explictit features for RNAs | |
| 66 fr = T.matrix("Fr", dtype='float32') | |
| 67 # Correct label | |
| 68 y = T.vector("y") | |
| 69 | |
| 70 # projection matrix for proteins | |
| 71 self.Ap = shared(((.5 - np.random.rand(kp, sp)) * | |
| 72 irange).astype('float32'), name="Ap") | |
| 73 self.bp = shared(((.5 - np.random.rand(kp)) * | |
| 74 irange).astype('float32'), name="bp") | |
| 75 # projection matrix for RNAs | |
| 76 self.Ar = shared(((.5 - np.random.rand(kr, sr)) * | |
| 77 irange).astype('float32'), name="Ar") | |
| 78 self.br = shared(((.5 - np.random.rand(kr)) * | |
| 79 irange).astype('float32'), name="br") | |
| 80 # generalization matrix | |
| 81 self.B = shared(((.5 - np.random.rand(kp, kr)) * | |
| 82 irange).astype('float32'), name="B") | |
| 83 | |
| 84 # Latent space for proteins | |
| 85 p = T.nnet.sigmoid(T.dot(fp, self.Ap.T) + self.bp) | |
| 86 # Latent space for RNAs | |
| 87 r = T.nnet.sigmoid(T.dot(fr, self.Ar.T) + self.br) | |
| 88 # Predicted output | |
| 89 y_hat = T.nnet.sigmoid(T.sum(T.dot(p, self.B) * r, axis=1)) | |
| 90 | |
| 91 def _regularization(): | |
| 92 """Normalized Frobenius norm.""" | |
| 93 norm_proteins = self.Ap.norm(2) + self.bp.norm(2) | |
| 94 norm_rnas = self.Ar.norm(2) + self.br.norm(2) | |
| 95 norm_b = self.B.norm(2) | |
| 96 | |
| 97 num_proteins = self.Ap.flatten().shape[0] + self.bp.shape[0] | |
| 98 num_rnas = self.Ar.flatten().shape[0] + self.br.shape[0] | |
| 99 num_b = self.B.flatten().shape[0] | |
| 100 | |
| 101 return (norm_proteins / num_proteins + norm_rnas / num_rnas + | |
| 102 norm_b / num_b) / 3 | |
| 103 | |
| 104 # mean squared error | |
| 105 cost_ = (T.sqr(y - y_hat)).mean() | |
| 106 reg = lambda_reg * _regularization() | |
| 107 cost = cost_ + reg | |
| 108 | |
| 109 # compute sgd updates | |
| 110 g_Ap, g_bp, g_Ar, g_br, g_B = T.grad( | |
| 111 cost, [self.Ap, self.bp, self.Ar, self.br, self.B]) | |
| 112 updates = ((self.Ap, self.Ap - learning_rate * g_Ap), | |
| 113 (self.bp, self.bp - learning_rate * g_bp), | |
| 114 (self.Ar, self.Ar - learning_rate * g_Ar), | |
| 115 (self.br, self.br - learning_rate * g_br), | |
| 116 (self.B, self.B - learning_rate * g_B)) | |
| 117 | |
| 118 # training step | |
| 119 self.train = function( | |
| 120 inputs=[fp, fr, y], | |
| 121 outputs=[y_hat, cost], | |
| 122 updates=updates) | |
| 123 # test | |
| 124 self.test = function( | |
| 125 inputs=[fp, fr, y], | |
| 126 outputs=[y_hat, cost]) | |
| 127 | |
| 128 # predict | |
| 129 self.predict = function( | |
| 130 inputs=[fp, fr], | |
| 131 outputs=y_hat) | |
| 132 | |
| 133 if verbose: | |
| 134 print("Done.") | |
| 135 sys.stdout.flush() | |
| 136 | |
| 137 def get_params(self): | |
| 138 """Return the parameters of the model.""" | |
| 139 return {'Ap': self.Ap.get_value(), 'bp': self.bp.get_value(), | |
| 140 'Ar': self.Ar.get_value(), 'br': self.br.get_value(), | |
| 141 'B': self.B.get_value()} |
