Skip to content

dataset

Dataset(data)

Bases: OfflineEnvironment

A dataset environment.

This environment represents static tabular datasets.

Attributes:

Name Type Description
data DataFrame

The data to represent.

Initialize the dataset environment.

Parameters:

Name Type Description Default
data DataFrame

The data to represent.

required
Source code in src/flowcean/environments/dataset.py
24
25
26
27
28
29
30
31
def __init__(self, data: pl.DataFrame) -> None:
    """Initialize the dataset environment.

    Args:
        data: The data to represent.
    """
    self.data = data
    super().__init__()

__len__()

Return the number of samples in the dataset.

Source code in src/flowcean/environments/dataset.py
37
38
39
def __len__(self) -> int:
    """Return the number of samples in the dataset."""
    return len(self.data)

collect(environment, n=None, *, progress_bar=True)

Collect data from an environment.

Parameters:

Name Type Description Default
environment Iterable[DataFrame] | Collection[DataFrame]

The environment to collect data from.

required
n int | None

Number of samples to collect. If None, all samples are collected.

None
progress_bar bool | dict[str, Any]

Whether to show a progress bar. If a dictionary is provided, it will be passed to the progress bar.

True

Returns:

Type Description
Dataset

The collected dataset.

Source code in src/flowcean/environments/dataset.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def collect(
    environment: Iterable[pl.DataFrame] | Collection[pl.DataFrame],
    n: int | None = None,
    *,
    progress_bar: bool | dict[str, Any] = True,
) -> Dataset:
    """Collect data from an environment.

    Args:
        environment: The environment to collect data from.
        n: Number of samples to collect. If None, all samples are collected.
        progress_bar: Whether to show a progress bar. If a dictionary is
            provided, it will be passed to the progress bar.

    Returns:
        The collected dataset.
    """
    from flowcean.environments.dataset import Dataset

    samples = islice(environment, n)

    if n is not None:
        total = n
    elif isinstance(environment, Collection):
        total = len(environment)
    else:
        total = None

    if isinstance(progress_bar, dict):
        progress_bar.setdefault("desc", "Collecting samples")
        progress_bar.setdefault("total", total)
        samples = tqdm(
            samples,
            **progress_bar,
        )
    elif progress_bar:
        samples = tqdm(samples, desc="Collecting samples", total=total)

    data = pl.concat(samples, how="vertical")
    return Dataset(data)