Source code for dasf.ml.cluster.som

#!/usr/bin/env python3

""" Kohonen's Self-Organized Map (SOM) algorithm module. """

import numpy as np
from xpysom_dask import XPySom

from dasf.ml.cluster.classifier import ClusterClassifier
from dasf.utils.decorators import task_handler
from dasf.utils.funcs import is_gpu_supported

try:
    import cupy as cp
except ImportError:
    pass


[docs] class SOM(ClusterClassifier): """ Initializes a Self Organizing Maps. A rule of thumb to set the size of the grid for a dimensionality reduction task is that it should contain 5*sqrt(N) neurons where N is the number of samples in the dataset to analyze. E.g. if your dataset has 150 samples, 5*sqrt(150) = 61.23 hence a map 8-by-8 should perform well. Parameters ---------- x : int x dimension of the SOM. y : int y dimension of the SOM. input_len : int Number of the elements of the vectors in input. sigma : float, default=min(x,y)/2 Spread of the neighborhood function, needs to be adequate to the dimensions of the map. sigmaN : float, default=0.01 Spread of the neighborhood function at last iteration. learning_rate : float, default=0.5 initial learning rate. learning_rateN : float, default=0.01 final learning rate decay_function : string, default='exponential' Function that reduces learning_rate and sigma at each iteration. Possible values: 'exponential', 'linear', 'aymptotic' neighborhood_function : string, default='gaussian' Function that weights the neighborhood of a position in the map. Possible values: 'gaussian', 'mexican_hat', 'bubble', 'triangle' topology : string, default='rectangular' Topology of the map. Possible values: 'rectangular', 'hexagonal' activation_distance : string, default='euclidean' Distance used to activate the map. Possible values: 'euclidean', 'cosine', 'manhattan' random_seed : int, default=None Random seed to use. n_parallel : uint, default=#max_CUDA_threads or 500*#CPUcores Number of samples to be processed at a time. Setting a too low value may drastically lower performance due to under-utilization, setting a too high value increases memory usage without granting any significant performance benefit. xp : numpy or cupy, default=cupy if can be imported else numpy Use numpy (CPU) or cupy (GPU) for computations. std_coeff: float, default=0.5 Used to calculate gausssian exponent denominator: d = 2*std_coeff**2*sigma**2 compact_support: bool, default=False Cut the neighbor function to 0 beyond neighbor radius sigma Examples -------- >>> from dasf.ml.cluster import SOM >>> import numpy as np >>> X = np.array([[1, 1], [2, 1], [1, 0], ... [4, 7], [3, 5], [3, 6]]) >>> som = SOM(x=3, y=2, input_len=2, ... num_epochs=100).fit(X) >>> som SOM(x=3, y=2, input_len=2, num_epochs=100) """ def __init__( self, x, y, input_len, num_epochs=100, sigma=0, sigmaN=1, learning_rate=0.5, learning_rateN=0.01, decay_function="exponential", neighborhood_function="gaussian", std_coeff=0.5, topology="rectangular", activation_distance="euclidean", random_seed=None, n_parallel=0, compact_support=False, **kwargs ): """ Constructor of the class SOM. """ super().__init__(**kwargs) self.x = x self.y = y self.input_len = input_len self.num_epochs = num_epochs self.sigma = sigma self.sigmaN = sigmaN self.learning_rate = learning_rate self.learning_rateN = learning_rateN self.decay_function = decay_function self.neighborhood_function = neighborhood_function self.std_coeff = std_coeff self.topology = topology self.activation_distance = activation_distance self.random_seed = random_seed self.n_parallel = n_parallel self.compact_support = compact_support self.__som_cpu = XPySom( x=self.x, y=self.y, input_len=self.input_len, sigma=self.sigma, sigmaN=self.sigmaN, learning_rate=self.learning_rate, learning_rateN=self.learning_rateN, decay_function=self.decay_function, neighborhood_function=self.neighborhood_function, std_coeff=self.std_coeff, topology=self.topology, activation_distance=self.activation_distance, random_seed=self.random_seed, n_parallel=self.n_parallel, compact_support=self.compact_support, xp=np, ) self.__som_mcpu = XPySom( x=self.x, y=self.y, input_len=self.input_len, sigma=self.sigma, sigmaN=self.sigmaN, learning_rate=self.learning_rate, learning_rateN=self.learning_rateN, decay_function=self.decay_function, neighborhood_function=self.neighborhood_function, std_coeff=self.std_coeff, topology=self.topology, activation_distance=self.activation_distance, random_seed=self.random_seed, n_parallel=self.n_parallel, compact_support=self.compact_support, xp=np, use_dask=True, ) if is_gpu_supported(): self.__som_gpu = XPySom( x=self.x, y=self.y, input_len=self.input_len, sigma=self.sigma, sigmaN=self.sigmaN, learning_rate=self.learning_rate, learning_rateN=self.learning_rateN, decay_function=self.decay_function, neighborhood_function=self.neighborhood_function, std_coeff=self.std_coeff, topology=self.topology, activation_distance=self.activation_distance, random_seed=self.random_seed, n_parallel=self.n_parallel, compact_support=self.compact_support, xp=cp, ) self.__som_mgpu = XPySom( x=self.x, y=self.y, input_len=self.input_len, sigma=self.sigma, sigmaN=self.sigmaN, learning_rate=self.learning_rate, learning_rateN=self.learning_rateN, decay_function=self.decay_function, neighborhood_function=self.neighborhood_function, std_coeff=self.std_coeff, topology=self.topology, activation_distance=self.activation_distance, random_seed=self.random_seed, n_parallel=self.n_parallel, compact_support=self.compact_support, xp=cp, use_dask=True, )
[docs] def _lazy_fit_cpu(self, X, y=None, sample_weight=None): """ Fit SOM method using Dask with CPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_mcpu return self.__som_mcpu.train(X, self.num_epochs)
[docs] def _lazy_fit_gpu(self, X, y=None, sample_weight=None): """ Fit SOM method using Dask with GPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_mgpu return self.__som_mgpu.train(X, self.num_epochs)
[docs] def _fit_cpu(self, X, y=None, sample_weight=None): """ Fit SOM method using CPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_cpu return self.__som_cpu.train(X, self.num_epochs)
[docs] def _fit_gpu(self, X, y=None, sample_weight=None): """ Fit SOM method using GPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_gpu return self.__som_gpu.train(X, self.num_epochs)
[docs] def _lazy_fit_predict_cpu(self, X, y=None, sample_weight=None): """ Fit SOM and select the winner neurons for the input using Dask with CPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). y : {array-like, sparse matrix} of shape (n_samples). This is just a placeholder to keep the compatibility with other fit_predict methods. SOM does not use labels to verify the input. sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit_predict methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_mcpu return self.__som_mcpu.train(X, self.num_epochs).predict(X)
[docs] def _lazy_fit_predict_gpu(self, X, y=None, sample_weight=None): """ Fit SOM and select the winner neurons for the input using Dask with GPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). y : {array-like, sparse matrix} of shape (n_samples). This is just a placeholder to keep the compatibility with other fit_predict methods. SOM does not use labels to verify the input. sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit_predict methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_mgpu return self.__som_mgpu.train(X, self.num_epochs).predict(X)
[docs] def _fit_predict_cpu(self, X, y=None, sample_weight=None): """ Fit SOM and select the winner neurons for the input using CPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). y : {array-like, sparse matrix} of shape (n_samples). This is just a placeholder to keep the compatibility with other fit_predict methods. SOM does not use labels to verify the input. sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit_predict methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_cpu return self.__som_cpu.train(X, self.num_epochs).predict(X)
[docs] def _fit_predict_gpu(self, X, y=None, sample_weight=None): """ Fit SOM and select the winner neurons for the input using GPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). y : {array-like, sparse matrix} of shape (n_samples). This is just a placeholder to keep the compatibility with other fit_predict methods. SOM does not use labels to verify the input. sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit_predict methods. This is not used by SOM. Returns ------- self : object Returns a fitted instance of self. """ self.__som = self.__som_gpu return self.__som_gpu.train(X, self.num_epochs).predict(X)
[docs] def _lazy_predict_cpu(self, X, sample_weight=None): """ Predict the input using a fitted SOM using Dask with CPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- labels : ndarray of shape (n_samples,) Cluster labels. Noisy samples are given the label -1. """ return self.__som_mcpu.predict(X)
[docs] def _lazy_predict_gpu(self, X, sample_weight=None): """ Predict the input using a fitted SOM using Dask with GPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- labels : ndarray of shape (n_samples,) Cluster labels. Noisy samples are given the label -1. """ return self.__som_mgpu.predict(X)
[docs] def _predict_cpu(self, X, sample_weight=None): """ Predict the input using a fitted SOM using CPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- labels : ndarray of shape (n_samples,) Cluster labels. Noisy samples are given the label -1. """ return self.__som_cpu.predict(X)
[docs] def _predict_gpu(self, X, sample_weight=None): """ Predict the input using a fitted SOM using GPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). sample_weight : array-like of shape (n_samples,), default=None This is just a placeholder to keep the compatibility with other fit methods. This is not used by SOM. Returns ------- labels : ndarray of shape (n_samples,) Cluster labels. Noisy samples are given the label -1. """ return self.__som_gpu.predict(X)
[docs] def _lazy_quantization_error_cpu(self, X): """ Returns the quantization error computed as the average distance between each input sample and its best matching unit using Dask with CPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). Returns ------- error : float The quantization error of the trained SOM. """ return self.__som_mcpu.quantization_error(X)
[docs] def _lazy_quantization_error_gpu(self, X): """ Returns the quantization error computed as the average distance between each input sample and its best matching unit using Dask with GPUs only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). Returns ------- error : float The quantization error of the trained SOM. """ return self.__som_mgpu.quantization_error(X)
[docs] def _quantization_error_cpu(self, X): """ Returns the quantization error computed as the average distance between each input sample and its best matching unit using CPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). Returns ------- error : float The quantization error of the trained SOM. """ return self.__som_cpu.quantization_error(X)
[docs] def _quantization_error_gpu(self, X): """ Returns the quantization error computed as the average distance between each input sample and its best matching unit using GPU only. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). Returns ------- error : float The quantization error of the trained SOM. """ return self.__som_gpu.quantization_error(X)
[docs] @task_handler def quantization_error(self, X): """ Generic quantization_error funtion according executor (for SOM method only). """ ...