Source code for bgd.nn

""" This module contains the :class:`bgd.nn.NeuralStack`
class that represents a linear neural network (LNN).

Any other model of neural nets shall be written down here. """

# nn.py
# author : Antoine Passemiers, Robin Petit

__all__ = [
    'split_train_val', 'binarize_labels', 'NeuralStack'
]

import numpy as np

from bgd.batch import Batching
from bgd.cost import ClassificationCost, Cost
from bgd.errors import RequiredComponentError, WrongComponentTypeError
from bgd.layers import Layer, Dropout
from bgd.optimizers import Optimizer
from bgd.utils import log

[docs]def split_train_val(X, y, validation_fraction):
    """ Splits randomly the dataset (X, y) into a training set
    and a validation set.

    Args:
        X (:obj:`np.ndarray`):
            Matrix of samples. shape == (n_instances, n_features).
        y (:obj:`np.ndarray`):
            Vector of expected outputs. shape == (n_instances,)
            or (n_instances, n_classes) if binarized.
        validation_fraction (float):
            Proportion of samples to be kept for validation.

    Returns:
        :
            X_train (:obj:`np.ndarray`):
                Matrix of training samples.
                len(X_train) == len(X) * (1 - validation_fraction)
            y_train (:obj:`np.ndarray`):
                Vector of training expected outputs.
                len(y_train) == len(y) * (1 - validation_fraction)
            X_test (:obj:`np.ndarray`):
                Matrix of test samples.
                len(X_test) == len(X) * validation_fraction
            y_test (:obj:`np.ndarray`):
                Vector of test expected outputs.
                len(y_test) == len(y) * validation_fraction

    Raises:
        ValueError:
            If validation_fraction is not in [0, 1].
    """
    if not 0 <= validation_fraction <= 1:
        raise ValueError('{:g} is not a valid fraction'.format(validation_fraction))
    split = int(len(X) * (1. - validation_fraction))
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    X, y = X[indices], np.squeeze(y[indices])
    return X[:split], y[:split], X[split:], y[split:]

[docs]def binarize_labels(y):
    """ Transforms a N-valued vector of labels into a binary vector.
    If N classes are present, they *must* be 0 to N-1.

    Args:
        y (:obj:`np.ndarray`):
            Vector of classes.

    Returns:
        :obj:`np.ndarray`:
            binary_y:
                Binary matrix of shape (n_instances, n_classes)
                s.t. `binary_y[i,j] == 1` iff `y[i] == j`.

    Example:
        >>> y = np.array([0, 1, 0, 0, 1, 1, 0])
        >>> binarize_labels(y)
        array([[1, 0],
               [0, 1],
               [1, 0],
               [1, 0],
               [0, 1],
               [0, 1],
               [1, 0]])
    """
    unique_values = np.unique(y)
    n_classes = len(unique_values)
    if set(unique_values) != set(np.arange(n_classes)):
        raise ValueError('The {} classes must be encoded 0 to {}' \
                         .format(n_classes, n_classes-1))
    binary_y = np.zeros((len(y), n_classes), dtype=np.int)
    for c in range(n_classes):
        binary_y[:, c] = (y == c)
    return binary_y


[docs]class NeuralStack:
    """ Sequential model that can be viewed as a stack of layers.
    Backpropagation is simplified because the error gradient
    with respect to the parameters of any layer is the gradient
    of a composition of functions (defined by all successor layers)
    and is decomposed using the chain rule.

    Attributes:
        layers (list):
            List of layers, where layers[0] is the input layer and
            layers[-1] is the output layer.
        batch_op (:class:`bgd.batch.Batching`):
            Batching method to use in order to perform one step of the
            backpropagation algorithm.
        cost_op (:class:`bgd.cost.Cost`):
            Error metric to use in order to evaluate model performance.
        optimizer (:class:`bgd.optimizers.Optimizer`):
            Optimizer to use in order to update the parameters of the
            model during backpropagation.
    """

    def __init__(self):
        self.layers = list()
        self.batch_op = None
        self.cost_op = None
        self.optimizer = None

[docs]    def add(self, component):
        """ Adds a component to the model: it can be either a layer
        or a special component like an optimizer. Some components
        are mandatory to train the model. The architecture of the
        model is defined by the layers that are provided to this
        method. Thus, the order is taken into account when adding
        layers. However, the order has no importance when adding
        special components like optimizers, error metrics, etc.

        Args:
            component (object):
                Component to add to the model (Layer or special component).

        Raises:
            WrongComponentTypeError:
                If the type of component is not recognized.
        """
        if isinstance(component, Layer):
            # Add a layer to the neural stack
            self.layers.append(component)
        elif isinstance(component, Optimizer):
            # Set the optimizer
            self.optimizer = component
        elif isinstance(component, Cost):
            # Set the error metric
            self.cost_op = component
        elif isinstance(component, Batching):
            # Set the batching algorithm
            self.batch_op = component
        else:
            raise WrongComponentTypeError("Unknown component type")

[docs]    def eval_loss(self, batch_y, predictions, alpha):
        """ Returns the loss value of the output computed by the model
        with respect to the actual output.

        Args:
            batch_y (:obj:`np.ndarray`):
                True labels of the samples.
            predictions (:obj:`np.ndarray`):
                Labels predicted by the model.
            alpha (float):
                L2 regularization alpha term (0 if no L2).

        Returns:
            float:
                loss:
                    The value of the loss function.
        """
        loss = self.cost_op.eval(batch_y, predictions)
        # Apply L2 regularization
        if alpha > 0:
            for layer in self.layers:
                params = layer.get_parameters()
                if params:
                    squared_params = params[0] ** 2
                    if not np.isnan(squared_params).any():
                        loss += 0.5 * alpha * np.sum(squared_params)
        return loss

[docs]    def train(self, X, y, epochs=1000, l2_alpha=.1,
              print_every=1, validation_fraction=0.1,
              dataset_normalization=False):
        """ Trains the model on samples X and labels y. Optimize the model
        parameters so that the loss is minimized on this dataset.
        The validation fraction *must* be in [0, 1] (0 to not evaluate the
        model on a validation set).

        Args:
            X (:obj:`np.ndarray`):
                Array of samples. `shape == (n_instances, n_features)` or
                `shape == (n_instances, n_pixels, n_channels)` for images.
            y (:obj:`np.ndarray`):
                Array of labels. `shape == (n_instances,)`
            epochs (int):
                Number of times the dataset (X, y) is entirely fed to the model.
            l2_alpha (float):
                L2 regularization alpha parameter (0 if no L2).
            print_every (int):
                Number of batches between two prints of model state and
                evaluations on the intermediate validation set (negative
                number if none is wanted). Defaults to 1.
            validation_fraction (float):
                Proportion of samples to be kept for validation.

        Returns:
            :obj:`np.ndarray`:
                errors:
                    array of the loss of each batch.

        Raises:
            ValueError:
                see :func:`split_train_val`.
            RequiredComponentError:
                see :meth:`check_components`.

        Example:
            >>> X, y = load_digits(return_X_y=True)
            >>> n_in, n_hidden, n_out = 64, 32, 10
            >>> nn = NeuralStack()
            >>> nn.add(FullyConnected(n_in, n_hidden))
            >>> nn.add(Activation())
            >>> nn.add(FullyConnected(n_hidden, n_out))
            >>> nn.add(Activation('softmax'))
            >>> nn.add(CrossEntropy())
            >>> nn.add(AdamOptimizer())
            >>> nn.add(SGDBatching(512))
            >>> losses = nn.train(X, y)
            Loss at epoch 49 (batch 2): 1.2941039726880612   - Validation accuracy: 83.333%
            Loss at epoch 99 (batch 2): 0.764482839362229    - Validation accuracy: 96.111%
            Loss at epoch 149 (batch 2): 0.5154527368075816   - Validation accuracy: 97.222%
            Loss at epoch 199 (batch 2): 0.3979498104086121   - Validation accuracy: 97.778%
            Loss at epoch 249 (batch 2): 0.32569535096131924  - Validation accuracy: 97.778%
            Loss at epoch 299 (batch 2): 0.2748402661346476   - Validation accuracy: 97.778%
            Loss at epoch 349 (batch 2): 0.2555267910622358   - Validation accuracy: 97.778%
            Loss at epoch 399 (batch 2): 0.22863001357754167  - Validation accuracy: 97.778%
            Loss at epoch 449 (batch 2): 0.22746067257186584  - Validation accuracy: 97.778%
            Loss at epoch 499 (batch 2): 0.22220948073377536  - Validation accuracy: 97.778%
            Loss at epoch 549 (batch 2): 0.2089401746378744   - Validation accuracy: 98.333%
            Loss at epoch 599 (batch 2): 0.20035077161054704  - Validation accuracy: 98.333%
            Loss at epoch 649 (batch 2): 0.1867468456143161   - Validation accuracy: 98.333%
            Loss at epoch 699 (batch 2): 0.17347271604108705  - Validation accuracy: 98.333%
            Loss at epoch 749 (batch 2): 0.15376433332896908  - Validation accuracy: 98.333%
            Loss at epoch 799 (batch 2): 0.15436015140582668  - Validation accuracy: 98.333%
            Loss at epoch 849 (batch 2): 0.13860411664942396  - Validation accuracy: 98.889%
            Loss at epoch 899 (batch 2): 0.133165375570591    - Validation accuracy: 98.333%
            Loss at epoch 949 (batch 2): 0.12890010110436428  - Validation accuracy: 98.333%
            Loss at epoch 999 (batch 2): 0.12433454132628034  - Validation accuracy: 98.333%
            >>> print('Loss over time:', losses)
            Errors: [ 2.50539121  2.28391007  2.40779468 ...,  0.11655055  0.12436938  0.09155006]
        """
        dtype = np.float32  # Force the NN to work only with np.float32
        # Check arrays
        X, y = np.asarray(X), np.asarray(y)
        if X.dtype is not dtype:
            X = X.astype(dtype)
        if dataset_normalization:
            X = (X - X.mean()) / X.std()

        # Check components
        self.check_components()

        # Split data into training data and validation data for early stopping
        if len(X) < 50:
            validation_fraction = 0
        if validation_fraction > 0:
            X_train, y_train, X_val, y_val = split_train_val(X, y, validation_fraction)
        else:
            X_train, y_train = X, y

        # Binarize labels if classification task
        if isinstance(self.cost_op, ClassificationCost):
            y_train = binarize_labels(y_train)

        # Deactivate signal propagation though first layer
        self.layers[0].deactivate_propagation()

        losses = list()
        nb_batches = 0
        if l2_alpha < 0:
            l2_alpha = 0
        else:
            l2_alpha /= self.batch_op.batch_size  # TODO: batch_size is not an attribute of Batching
        for epoch in range(epochs):
            batch_id = 0
            self.batch_op.start(X_train, y_train)
            next_batch = self.batch_op.next()
            while next_batch:
                batch_x, batch_y = next_batch
                batch_id += 1
                nb_batches += 1

                # Forward pass
                predictions = self.eval(batch_x)

                # Compute loss function
                loss = self.eval_loss(batch_y, predictions, l2_alpha)
                losses.append(loss)

                # Compute gradient of the loss function
                signal = self.cost_op.grad(batch_y, predictions).astype(dtype)

                # Propagate error through each layer
                for layer in reversed(self.layers):
                    signal, gradient = layer.backward(signal)
                    if gradient is not None:
                        self.optimizer.add_gradient_fragments(layer, gradient, l2_alpha)
                F = lambda: self.eval_loss(batch_y, self.eval(batch_x), l2_alpha)  # pylint: disable=cell-var-from-loop
                self.optimizer.update(F)

                if print_every > 0 and nb_batches % print_every == 0:
                    log('Loss at epoch {} (batch {}): {: <20}' \
                        .format(epoch, batch_id, loss),
                        end=' - Validation ' if validation_fraction > 0 else '\n')
                    if validation_fraction > 0:
                        self.cost_op.print_fitness(y_val, self.eval(X_val))

                # Retrieve batch for next iteration
                next_batch = self.batch_op.next()
        return np.array(losses)

[docs]    def eval(self, X, start=0, stop=-1):
        """ Feeds a sample (or batch) to the model linearly from any
        layer to any layer. By default, X is propagated through the
        entire stack.

        Args:
            X (:obj:`np.ndarray`):
                The sample (or batch of samples) to feed to the model.
            start (int):
                Index of the layer where X is to be fed.
            stop (int):
                Index of the last layer of propagation (-1 for last layer).

        Returns:
            :obj:`np.ndarray`:
                out:
                    Output of the propagation of X through each layer
                    of the model.
        """
        X = np.asarray(X, dtype=np.float32)
        if stop == -1 or stop > len(self.layers):
            stop = len(self.layers)
        for i in range(start, stop):
            X = self.layers[i].forward(X)
        return X

[docs]    def get_accuracy(self, X, y):
        """ Returns the accuracy of the provided dataset on the model.

        Raises:
            :class:`AttributeError` if the model is not classifying.

        See :meth:`bgd.ClassificationCost.accuracy`.
        """
        if isinstance(self.cost_op, ClassificationCost):
            #if y.ndim == 1:
            #    y = binarize_labels(y)
            return ClassificationCost.accuracy(y, self.eval(X))
        raise AttributeError('Model is not classifying. Accuracy unavailable.')

[docs]    def activate_dropout(self):
        """ Activates the Dropout layers (for training phase). """
        for layer in self.layers:
            if isinstance(layer, Dropout):
                layer.activate()

[docs]    def deactivate_dropout(self):
        """ Deactivates the Dropout layers (after training phase). """
        for layer in self.layers:
            if isinstance(layer, Dropout):
                layer.deactivate()

[docs]    def check_components(self):
        """ Verifies that all the components are set properly so
        that training is possible. If either the batch method, the
        optimizer or the error function is missing, an exception
        will be raised.

        Raises:
            RequiredComponentError:
                If any component of the model has not been set.
        """
        # Check batch optimization method
        if self.batch_op is None:
            raise RequiredComponentError(Batching.__name__)

        # Check optimizer
        if self.optimizer is None:
            raise RequiredComponentError(Optimizer.__name__)

        # Check loss function
        if self.cost_op is None:
            raise RequiredComponentError(Cost.__name__)