Source code for dasf.feature_extraction.histogram
#!/usr/bin/env python3
""" Histogram module. """
import dask.array as da
import numpy as np
try:
import cupy as cp
except ImportError: # pragma: no cover
pass
from dasf.transforms.base import TargeteredTransform, Transform
[docs]
class Histogram(TargeteredTransform, Transform):
"""Operator to extract the histogram of a data.
Parameters
----------
bins : Optional[int]
Number of bins (the default is None).
range : tuple
2-element tuple with the lower and upper range of the bins. If not
provided, range is simply (X.min(), X.max()) (the default is None).
normed : bool
If the historgram must be normalized (the default is False).
weights : type
An array of weights, of the same shape as X. Each value in a only
contributes its associated weight towards the bin count
(the default is None).
density : type
If False, the result will contain the number of samples in each bin.
If True, the result is the value of the probability density function
at the bin, normalized such that the integral over the range is 1
(the default is None).
Attributes
----------
bins
range
normed
weights
density
"""
def __init__(self,
bins: int = None,
range: tuple = None,
normed: bool = False,
weights=None,
density=None,
*args,
**kwargs):
""" Generic constructor of the class Histogram. """
TargeteredTransform.__init__(self, *args, **kwargs)
self._bins = bins
self._range = range
self._normed = normed
self._weights = weights
self._density = density
[docs]
def _lazy_transform_generic(self, X):
"""
Compute the histogram of a dataset using Dask.
Parameters
----------
X : array_like
Input data. The histogram is computed over the flattened array.
Returns
-------
hist : array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics. If `weights` are given,
``hist.dtype`` will be taken from `weights`.
bin_edges : array of dtype float
Return the bin edges ``(length(hist)+1)``.
"""
if self._range is None:
raise ValueError("Argument `range` cannot be None for Dask based methods.")
return da.histogram(
X,
bins=self._bins,
range=self._range,
normed=self._normed,
weights=self._weights,
density=self._density,
)
[docs]
def _transform_generic(self, X, xp):
"""
Compute the histogram of a dataset using local libraries.
Parameters
----------
X : array_like
Input data. The histogram is computed over the flattened array.
Returns
-------
hist : array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics. If `weights` are given,
``hist.dtype`` will be taken from `weights`.
bin_edges : array of dtype float
Return the bin edges ``(length(hist)+1)``.
"""
kwargs = {}
if xp == np:
kwargs['normed'] = self._normed
return xp.histogram(
X,
bins=self._bins,
range=self._range,
weights=self._weights,
density=self._density,
**kwargs,
)
[docs]
def _lazy_transform_cpu(self, X):
"""
Compute the histogram of a dataset using Dask with CPUs only.
Parameters
----------
X : array_like
Input data. The histogram is computed over the flattened array.
Returns
-------
hist : array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics. If `weights` are given,
``hist.dtype`` will be taken from `weights`.
bin_edges : array of dtype float
Return the bin edges ``(length(hist)+1)``.
"""
return self._lazy_transform_generic(X)
[docs]
def _lazy_transform_gpu(self, X, **kwargs):
"""
Compute the histogram of a dataset using Dask with GPUs only.
Parameters
----------
X : array_like
Input data. The histogram is computed over the flattened array.
Returns
-------
hist : array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics. If `weights` are given,
``hist.dtype`` will be taken from `weights`.
bin_edges : array of dtype float
Return the bin edges ``(length(hist)+1)``.
"""
return self._lazy_transform_generic(X)
[docs]
def _transform_cpu(self, X, **kwargs):
"""
Compute the histogram of a dataset using CPU only.
Parameters
----------
X : array_like
Input data. The histogram is computed over the flattened array.
Returns
-------
hist : array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics. If `weights` are given,
``hist.dtype`` will be taken from `weights`.
bin_edges : array of dtype float
Return the bin edges ``(length(hist)+1)``.
"""
return self._transform_generic(X, np)
[docs]
def _transform_gpu(self, X, **kwargs):
"""
Compute the histogram of a dataset using GPU only.
Parameters
----------
X : array_like
Input data. The histogram is computed over the flattened array.
Returns
-------
hist : array
The values of the histogram. See `density` and `weights` for a
description of the possible semantics. If `weights` are given,
``hist.dtype`` will be taken from `weights`.
bin_edges : array of dtype float
Return the bin edges ``(length(hist)+1)``.
"""
return self._transform_generic(X, cp)