Source code for minerva.data.data_modules.har_rodrigues_24

from typing import List, Optional, Union

from lightning import LightningDataModule
from torch.utils.data import DataLoader

from minerva.data.datasets.har_rodrigues_24 import HARDatasetCPC
from minerva.utils.typing import PathLike


# Defining the data loader for the implementation
[docs] class HARDataModuleCPC(LightningDataModule): def __init__( self, data_path: Union[PathLike, List[PathLike]], input_size: int = 6, window: int = 60, overlap: int = 30, batch_size: int = 64, use_train_as_val: bool = False, use_val_with_train: bool = True, columns: Optional[List[str]] = None, num_workers: int = 8, drop_last: bool = True, label: Optional[str] = "standard activity code", transpose_data: bool = True, ): """Data module for Human Activity Recognition (HAR) using CPC. This class handles the creation of training, validation, and test dataloaders for the HAR dataset. It uses the HARDatasetCPC class to load the data. Parameters ---------- data_path : Union[PathLike, List[PathLike]] The root directory where the dataset is stored. If a list is the datasets will be concatenated, in their respective order, to each partition (train, val, test). input_size : int, optional The number of input features (default is 6). window : int, optional The size of the sliding window (default is 60). overlap : int, optional The overlap size for the sliding window (default is 30). batch_size : int, optional The batch size for the dataloaders (default is 64). use_val_with_train : bool Whether to use the training set with validation set togheter. label : Optional[str] The column to be used as the label. If None, no labels will be used. If 'return_index_as_label', the index of the data will be used as the label. transpose_data : bool If True, the data will be returned as a vector of shape (C, T), else the data will be returned as a vector of shape (T, C). """ super().__init__() self.data_path = data_path self.batch_size = batch_size self.num_workers = num_workers self.drop_last = drop_last self.label = label self.transpose_data = transpose_data self.train_dataset = HARDatasetCPC( data_path, input_size, window, overlap, phase="train", use_train_as_val=use_train_as_val, use_val_with_train=use_val_with_train, columns=columns, label=label, transpose_data=transpose_data, ) self.val_dataset = HARDatasetCPC( data_path, input_size, window, overlap, phase="val", use_train_as_val=use_train_as_val, use_val_with_train=use_val_with_train, columns=columns, label=label, transpose_data=transpose_data, ) self.test_dataset = HARDatasetCPC( data_path, input_size, window, overlap, phase="test", use_train_as_val=use_train_as_val, use_val_with_train=use_val_with_train, columns=columns, label=label, transpose_data=transpose_data, )
[docs] def train_dataloader(self): return DataLoader( self.train_dataset, batch_size=self.batch_size, shuffle=True, drop_last=self.drop_last, num_workers=self.num_workers, )
[docs] def val_dataloader(self): return DataLoader( self.val_dataset, batch_size=self.batch_size, shuffle=False, drop_last=self.drop_last, num_workers=self.num_workers, )
[docs] def test_dataloader(self): return DataLoader( self.test_dataset, batch_size=self.batch_size, shuffle=False, drop_last=self.drop_last, num_workers=self.num_workers, )
[docs] def __repr__(self): return ( f"HARDataModuleCPC(batch_size={self.batch_size}, datasets={self.data_path})" )