Source code for minerva.data.data_modules.har_rodrigues_24

from typing import List, Optional, Union

from lightning import LightningDataModule
from torch.utils.data import DataLoader

from minerva.data.datasets.har_rodrigues_24 import HARDatasetCPC
from minerva.utils.typing import PathLike


# Defining the data loader for the implementation

[docs]
class HARDataModuleCPC(LightningDataModule):
    def __init__(
        self,
        data_path: Union[PathLike, List[PathLike]],
        input_size: int = 6,
        window: int = 60,
        overlap: int = 30,
        batch_size: int = 64,
        use_train_as_val: bool = False,
        use_val_with_train: bool = True,
        columns: Optional[List[str]] = None,
        num_workers: int = 8,
        drop_last: bool = True,
        label: Optional[str] = "standard activity code",
        transpose_data: bool = True,
    ):
        """Data module for Human Activity Recognition (HAR) using CPC.

        This class handles the creation of training, validation, and test
        dataloaders for the HAR dataset. It uses the HARDatasetCPC class to
        load the data.

        Parameters
        ----------
        data_path : Union[PathLike, List[PathLike]]
            The root directory where the dataset is stored. If a list is
            the datasets will be concatenated, in their respective order, to
            each partition (train, val, test).
        input_size : int, optional
            The number of input features (default is 6).
        window : int, optional
            The size of the sliding window (default is 60).
        overlap : int, optional
            The overlap size for the sliding window (default is 30).
        batch_size : int, optional
            The batch size for the dataloaders (default is 64).
        use_val_with_train : bool
            Whether to use the training set with validation set togheter.
        label : Optional[str]
            The column to be used as the label. If None, no labels will be
            used. If 'return_index_as_label', the index of the data will be
            used as the label.
        transpose_data : bool
            If True, the data will be returned as a vector of shape (C, T),
            else the data will be returned as a vector of shape  (T, C).
        """
        super().__init__()
        self.data_path = data_path
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.drop_last = drop_last
        self.label = label
        self.transpose_data = transpose_data

        self.train_dataset = HARDatasetCPC(
            data_path,
            input_size,
            window,
            overlap,
            phase="train",
            use_train_as_val=use_train_as_val,
            use_val_with_train=use_val_with_train,
            columns=columns,
            label=label,
            transpose_data=transpose_data,
        )
        self.val_dataset = HARDatasetCPC(
            data_path,
            input_size,
            window,
            overlap,
            phase="val",
            use_train_as_val=use_train_as_val,
            use_val_with_train=use_val_with_train,
            columns=columns,
            label=label,
            transpose_data=transpose_data,
        )
        self.test_dataset = HARDatasetCPC(
            data_path,
            input_size,
            window,
            overlap,
            phase="test",
            use_train_as_val=use_train_as_val,
            use_val_with_train=use_val_with_train,
            columns=columns,
            label=label,
            transpose_data=transpose_data,
        )


[docs]
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=self.drop_last,
            num_workers=self.num_workers,
        )



[docs]
    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            drop_last=self.drop_last,
            num_workers=self.num_workers,
        )



[docs]
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            drop_last=self.drop_last,
            num_workers=self.num_workers,
        )



[docs]
    def __repr__(self):
        return (
            f"HARDataModuleCPC(batch_size={self.batch_size}, datasets={self.data_path})"
        )