#!/usr/bin/env python3
""" Kohonen's Self-Organized Map (SOM) algorithm module. """
import numpy as np
from xpysom_dask import XPySom
from dasf.ml.cluster.classifier import ClusterClassifier
from dasf.utils.decorators import task_handler
from dasf.utils.funcs import is_gpu_supported
try:
import cupy as cp
except ImportError:
pass
[docs]
class SOM(ClusterClassifier):
"""
Initializes a Self Organizing Maps.
A rule of thumb to set the size of the grid for a dimensionality
reduction task is that it should contain 5*sqrt(N) neurons
where N is the number of samples in the dataset to analyze.
E.g. if your dataset has 150 samples, 5*sqrt(150) = 61.23
hence a map 8-by-8 should perform well.
Parameters
----------
x : int
x dimension of the SOM.
y : int
y dimension of the SOM.
input_len : int
Number of the elements of the vectors in input.
sigma : float, default=min(x,y)/2
Spread of the neighborhood function, needs to be adequate
to the dimensions of the map.
sigmaN : float, default=0.01
Spread of the neighborhood function at last iteration.
learning_rate : float, default=0.5
initial learning rate.
learning_rateN : float, default=0.01
final learning rate
decay_function : string, default='exponential'
Function that reduces learning_rate and sigma at each iteration.
Possible values: 'exponential', 'linear', 'aymptotic'
neighborhood_function : string, default='gaussian'
Function that weights the neighborhood of a position in the map.
Possible values: 'gaussian', 'mexican_hat', 'bubble', 'triangle'
topology : string, default='rectangular'
Topology of the map.
Possible values: 'rectangular', 'hexagonal'
activation_distance : string, default='euclidean'
Distance used to activate the map.
Possible values: 'euclidean', 'cosine', 'manhattan'
random_seed : int, default=None
Random seed to use.
n_parallel : uint, default=#max_CUDA_threads or 500*#CPUcores
Number of samples to be processed at a time. Setting a too low
value may drastically lower performance due to under-utilization,
setting a too high value increases memory usage without granting
any significant performance benefit.
xp : numpy or cupy, default=cupy if can be imported else numpy
Use numpy (CPU) or cupy (GPU) for computations.
std_coeff: float, default=0.5
Used to calculate gausssian exponent denominator:
d = 2*std_coeff**2*sigma**2
compact_support: bool, default=False
Cut the neighbor function to 0 beyond neighbor radius sigma
Examples
--------
>>> from dasf.ml.cluster import SOM
>>> import numpy as np
>>> X = np.array([[1, 1], [2, 1], [1, 0],
... [4, 7], [3, 5], [3, 6]])
>>> som = SOM(x=3, y=2, input_len=2,
... num_epochs=100).fit(X)
>>> som
SOM(x=3, y=2, input_len=2, num_epochs=100)
"""
def __init__(
self,
x,
y,
input_len,
num_epochs=100,
sigma=0,
sigmaN=1,
learning_rate=0.5,
learning_rateN=0.01,
decay_function="exponential",
neighborhood_function="gaussian",
std_coeff=0.5,
topology="rectangular",
activation_distance="euclidean",
random_seed=None,
n_parallel=0,
compact_support=False,
**kwargs
):
""" Constructor of the class SOM. """
super().__init__(**kwargs)
self.x = x
self.y = y
self.input_len = input_len
self.num_epochs = num_epochs
self.sigma = sigma
self.sigmaN = sigmaN
self.learning_rate = learning_rate
self.learning_rateN = learning_rateN
self.decay_function = decay_function
self.neighborhood_function = neighborhood_function
self.std_coeff = std_coeff
self.topology = topology
self.activation_distance = activation_distance
self.random_seed = random_seed
self.n_parallel = n_parallel
self.compact_support = compact_support
self.__som_cpu = XPySom(
x=self.x,
y=self.y,
input_len=self.input_len,
sigma=self.sigma,
sigmaN=self.sigmaN,
learning_rate=self.learning_rate,
learning_rateN=self.learning_rateN,
decay_function=self.decay_function,
neighborhood_function=self.neighborhood_function,
std_coeff=self.std_coeff,
topology=self.topology,
activation_distance=self.activation_distance,
random_seed=self.random_seed,
n_parallel=self.n_parallel,
compact_support=self.compact_support,
xp=np,
)
self.__som_mcpu = XPySom(
x=self.x,
y=self.y,
input_len=self.input_len,
sigma=self.sigma,
sigmaN=self.sigmaN,
learning_rate=self.learning_rate,
learning_rateN=self.learning_rateN,
decay_function=self.decay_function,
neighborhood_function=self.neighborhood_function,
std_coeff=self.std_coeff,
topology=self.topology,
activation_distance=self.activation_distance,
random_seed=self.random_seed,
n_parallel=self.n_parallel,
compact_support=self.compact_support,
xp=np,
use_dask=True,
)
if is_gpu_supported():
self.__som_gpu = XPySom(
x=self.x,
y=self.y,
input_len=self.input_len,
sigma=self.sigma,
sigmaN=self.sigmaN,
learning_rate=self.learning_rate,
learning_rateN=self.learning_rateN,
decay_function=self.decay_function,
neighborhood_function=self.neighborhood_function,
std_coeff=self.std_coeff,
topology=self.topology,
activation_distance=self.activation_distance,
random_seed=self.random_seed,
n_parallel=self.n_parallel,
compact_support=self.compact_support,
xp=cp,
)
self.__som_mgpu = XPySom(
x=self.x,
y=self.y,
input_len=self.input_len,
sigma=self.sigma,
sigmaN=self.sigmaN,
learning_rate=self.learning_rate,
learning_rateN=self.learning_rateN,
decay_function=self.decay_function,
neighborhood_function=self.neighborhood_function,
std_coeff=self.std_coeff,
topology=self.topology,
activation_distance=self.activation_distance,
random_seed=self.random_seed,
n_parallel=self.n_parallel,
compact_support=self.compact_support,
xp=cp,
use_dask=True,
)
[docs]
def _lazy_fit_cpu(self, X, y=None, sample_weight=None):
"""
Fit SOM method using Dask with CPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_mcpu
return self.__som_mcpu.train(X, self.num_epochs)
[docs]
def _lazy_fit_gpu(self, X, y=None, sample_weight=None):
"""
Fit SOM method using Dask with GPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_mgpu
return self.__som_mgpu.train(X, self.num_epochs)
[docs]
def _fit_cpu(self, X, y=None, sample_weight=None):
"""
Fit SOM method using CPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_cpu
return self.__som_cpu.train(X, self.num_epochs)
[docs]
def _fit_gpu(self, X, y=None, sample_weight=None):
"""
Fit SOM method using GPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_gpu
return self.__som_gpu.train(X, self.num_epochs)
[docs]
def _lazy_fit_predict_cpu(self, X, y=None, sample_weight=None):
"""
Fit SOM and select the winner neurons for the input using Dask with
CPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
y : {array-like, sparse matrix} of shape (n_samples).
This is just a placeholder to keep the compatibility with other
fit_predict methods. SOM does not use labels to verify the input.
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit_predict methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_mcpu
return self.__som_mcpu.train(X, self.num_epochs).predict(X)
[docs]
def _lazy_fit_predict_gpu(self, X, y=None, sample_weight=None):
"""
Fit SOM and select the winner neurons for the input using Dask with
GPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
y : {array-like, sparse matrix} of shape (n_samples).
This is just a placeholder to keep the compatibility with other
fit_predict methods. SOM does not use labels to verify the input.
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit_predict methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_mgpu
return self.__som_mgpu.train(X, self.num_epochs).predict(X)
[docs]
def _fit_predict_cpu(self, X, y=None, sample_weight=None):
"""
Fit SOM and select the winner neurons for the input using CPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
y : {array-like, sparse matrix} of shape (n_samples).
This is just a placeholder to keep the compatibility with other
fit_predict methods. SOM does not use labels to verify the input.
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit_predict methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_cpu
return self.__som_cpu.train(X, self.num_epochs).predict(X)
[docs]
def _fit_predict_gpu(self, X, y=None, sample_weight=None):
"""
Fit SOM and select the winner neurons for the input using GPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
y : {array-like, sparse matrix} of shape (n_samples).
This is just a placeholder to keep the compatibility with other
fit_predict methods. SOM does not use labels to verify the input.
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit_predict methods. This is not used by SOM.
Returns
-------
self : object
Returns a fitted instance of self.
"""
self.__som = self.__som_gpu
return self.__som_gpu.train(X, self.num_epochs).predict(X)
[docs]
def _lazy_predict_cpu(self, X, sample_weight=None):
"""
Predict the input using a fitted SOM using Dask with CPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
labels : ndarray of shape (n_samples,)
Cluster labels. Noisy samples are given the label -1.
"""
return self.__som_mcpu.predict(X)
[docs]
def _lazy_predict_gpu(self, X, sample_weight=None):
"""
Predict the input using a fitted SOM using Dask with GPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
labels : ndarray of shape (n_samples,)
Cluster labels. Noisy samples are given the label -1.
"""
return self.__som_mgpu.predict(X)
[docs]
def _predict_cpu(self, X, sample_weight=None):
"""
Predict the input using a fitted SOM using CPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
labels : ndarray of shape (n_samples,)
Cluster labels. Noisy samples are given the label -1.
"""
return self.__som_cpu.predict(X)
[docs]
def _predict_gpu(self, X, sample_weight=None):
"""
Predict the input using a fitted SOM using GPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
sample_weight : array-like of shape (n_samples,), default=None
This is just a placeholder to keep the compatibility with other
fit methods. This is not used by SOM.
Returns
-------
labels : ndarray of shape (n_samples,)
Cluster labels. Noisy samples are given the label -1.
"""
return self.__som_gpu.predict(X)
[docs]
def _lazy_quantization_error_cpu(self, X):
"""
Returns the quantization error computed as the average distance
between each input sample and its best matching unit using Dask with
CPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
Returns
-------
error : float
The quantization error of the trained SOM.
"""
return self.__som_mcpu.quantization_error(X)
[docs]
def _lazy_quantization_error_gpu(self, X):
"""
Returns the quantization error computed as the average distance
between each input sample and its best matching unit using Dask with
GPUs only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
Returns
-------
error : float
The quantization error of the trained SOM.
"""
return self.__som_mgpu.quantization_error(X)
[docs]
def _quantization_error_cpu(self, X):
"""
Returns the quantization error computed as the average distance
between each input sample and its best matching unit using CPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
Returns
-------
error : float
The quantization error of the trained SOM.
"""
return self.__som_cpu.quantization_error(X)
[docs]
def _quantization_error_gpu(self, X):
"""
Returns the quantization error computed as the average distance
between each input sample and its best matching unit using GPU only.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features).
Returns
-------
error : float
The quantization error of the trained SOM.
"""
return self.__som_gpu.quantization_error(X)
[docs]
@task_handler
def quantization_error(self, X):
"""
Generic quantization_error funtion according executor (for SOM method
only).
"""
...