Source code for HybridQRBM.optimizers

"""Optimizer to update the weights of the RBM."""

__copyright__ = "Dynex Developers, 2023"

from collections import namedtuple
import numpy as np

Parameters = namedtuple("Parameters", ["weights", "biases_visible", "biases_hidden"])

[docs]class RBMOptimizer: """Optimizer to update the weights of the RBM.""" def __init__(self, learning_rate: float = 0.05, momentum: float = 0.9, decay_factor: float = 1.0005, regularizers: tuple = ()) -> None: """Initialize the optimizer. Parameters ---------- learning_rate : float, optional The initial learning rate of the optimizer. Defaults to 0.05. momentum : float, optional The momentum of the optimizer. Defaults to 0.9. decay_factor : float, optional The decay factor of the learning rate. The learning rate is divided by the decay factor after each update (batch). Defaults to 1.0005. regularizers : list, optional The regularizers to apply to the weights. With the current implementation only weights and biases are passed to the regularizers. """ self.learning_rate = learning_rate self.initial_learning_rate = learning_rate self.momentum = momentum self.decay_factor = decay_factor self.regularizers = regularizers self.rbm = None self.delta_weights = None self.delta_biases_visible = None self.delta_biases_hidden = None
[docs] def calculate_update(self, positive_sample, negative_sample): """ Calculate the update for the weights and biases. Parameters ---------- positive_sample : tuple The values and probabilities of the visible and hidden layers produced in the positive phase of the training. The tuple should contain four elements: (visible_values, visible_probabilities, hidden_values, hidden_probabilities). negative_sample : tuple The values and probabilities of the visible and hidden layers produced in the negative phase of the training. The tuple should contain four elements: (visible_values, visible_probabilities, hidden_values, hidden_probabilities). Returns ------- namedtuple The delta values for the update of the weights and biases. """ # The shape of the weights is not known at the optimizer's # initialization, therefore we set the values now. if self.delta_weights is None: self.delta_weights = np.zeros(self.rbm.weights.shape) self.delta_biases_visible = np.zeros(self.rbm.biases_visible.shape) self.delta_biases_hidden = np.zeros(self.rbm.biases_hidden.shape) visible, _, _, prob_hidden = positive_sample batch_size = visible.shape[0] #print('optimizer debug visible=',len(visible),'prob_hidden:',len(prob_hidden),'batch_size:',batch_size); pos_weights = visible.T @ prob_hidden / batch_size pos_biases_visible = visible.sum(axis=0) / batch_size pos_biases_hidden = prob_hidden.sum(axis=0) / batch_size visible, _, _, prob_hidden = negative_sample batch_size = visible.shape[0] neg_weights = visible.T @ prob_hidden / batch_size neg_biases_visible = visible.sum(axis=0) / batch_size neg_biases_hidden = prob_hidden.sum(axis=0) / batch_size batch_delta_weights = pos_weights - neg_weights batch_delta_biases_visible = pos_biases_visible - neg_biases_visible batch_delta_biases_hidden = pos_biases_hidden - neg_biases_hidden self.delta_weights *= self.momentum self.delta_biases_visible *= self.momentum self.delta_biases_hidden *= self.momentum self.delta_weights += self.learning_rate * batch_delta_weights self.delta_biases_visible += self.learning_rate * batch_delta_biases_visible self.delta_biases_hidden += self.learning_rate * batch_delta_biases_hidden for regularizer in self.regularizers: self.delta_weights -= self.learning_rate * regularizer(self.rbm.weights) self.delta_biases_visible -= self.learning_rate * regularizer(self.rbm.biases_visible) self.delta_biases_hidden -= self.learning_rate * regularizer(self.rbm.biases_hidden) self.learning_rate /= self.decay_factor return Parameters(self.delta_weights, self.delta_biases_visible, self.delta_biases_hidden)