Source code for dasf.ml.model_selection.split

#!/usr/bin/env python3

from dask_ml.model_selection import train_test_split as train_test_split_mcpu
from sklearn.model_selection import train_test_split as train_test_split_cpu

from dasf.transforms import TargeteredTransform, Transform

try:
    from cuml.model_selection import train_test_split as train_test_split_gpu
except ImportError:
    pass


class train_test_split(TargeteredTransform, Transform):
    def __init__(
        self,
        output="train",
        test_size=None,
        train_size=None,
        random_state=None,
        shuffle=None,
        blockwise=True,
        convert_mixed_types=False,
        **kwargs
    ):
        TargeteredTransform.__init__(self, **kwargs)

        self.output = output
        self.test_size = test_size
        self.train_size = train_size
        self.random_state = random_state
        self.shuffle = shuffle

        # Exclusive for Dask operations
        self.blockwise = blockwise

        self.convert_mixed_types = convert_mixed_types

[docs] def _lazy_transform_cpu(self, X): X, y = X X_train, X_test, y_train, y_test = train_test_split_mcpu( X, y, train_size=self.train_size, shuffle=self.shuffle, random_state=self.random_state, blockwise=self.blockwise, ) if self.output == "train": return X_train, y_train elif self.output == "test": return X_test, y_test
[docs] def _lazy_transform_gpu(self, X): raise NotImplementedError( "Function train_test_split() is not implemented for Dask and CuML" )
[docs] def _transform_cpu(self, X): X, y = X X_train, X_test, y_train, y_test = train_test_split_cpu( X, y, train_size=self.train_size, shuffle=self.shuffle, random_state=self.random_state, ) if self.output == "train": return X_train, y_train elif self.output == "test": return X_test, y_test
[docs] def _transform_gpu(self, X): X, y = X X_train, X_test, y_train, y_test = train_test_split_gpu( X, y, train_size=self.train_size, shuffle=self.shuffle, random_state=self.random_state, ) if self.output == "train": return X_train, y_train elif self.output == "test": return X_test, y_test