Skip to content

sklearn

Accuracy(features=None)

Bases: SelectMixin, LazyMixin, Metric

Accuracy classification score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
Source code in src/flowcean/sklearn/metrics/classification.py
16
17
18
19
20
21
22
23
24
25
26
def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

ClassificationReport(features=None)

Bases: SelectMixin, LazyMixin, Metric

Build a text report showing the main classification metrics.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
Source code in src/flowcean/sklearn/metrics/classification.py
39
40
41
42
43
44
45
46
47
48
49
def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

FBetaScore(*, beta=1.0, features=None)

Bases: SelectMixin, LazyMixin, Metric

F-beta score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
beta float

The beta parameter.

1.0
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
Source code in src/flowcean/sklearn/metrics/classification.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    *,
    beta: float = 1.0,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        beta: The beta parameter.
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)
    self.beta = beta

PrecisionScore(features=None)

Bases: SelectMixin, LazyMixin, Metric

Precision classification score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
Source code in src/flowcean/sklearn/metrics/classification.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

Recall(features=None)

Bases: SelectMixin, LazyMixin, Metric

Recall classification score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
Source code in src/flowcean/sklearn/metrics/classification.py
114
115
116
117
118
119
120
121
122
123
124
def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

MaxError(feature=None)

Bases: SelectMixin, LazyMixin, Metric

Max error regression loss.

As defined by scikit-learn.

Initialize MaxError metric.

Parameters:

Name Type Description Default
feature str | None

The feature to calculate the metric for. If None, the metric expects a single feature in the data.

None
Source code in src/flowcean/sklearn/metrics/regression.py
19
20
21
22
23
24
25
26
27
def __init__(self, feature: str | None = None) -> None:
    """Initialize MaxError metric.

    Args:
        feature: The feature to calculate the metric for. If None, the
            metric expects a single feature in the data.
    """
    features = [feature] if feature is not None else None
    super().__init__(features=features)

MeanAbsoluteError(features=None, multioutput='raw_values')

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

Mean absolute error (MAE) regression loss.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
multioutput Literal['raw_values', 'uniform_average']

Defines how to aggregate multiple output values. See scikit-learn documentation for details.

'raw_values'
Source code in src/flowcean/sklearn/metrics/regression.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

MeanAbsolutePercentageError(features=None, multioutput='raw_values')

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

Mean absolute percentage error (MAPE) regression loss.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
multioutput Literal['raw_values', 'uniform_average']

Defines how to aggregate multiple output values. See scikit-learn documentation for details.

'raw_values'
Source code in src/flowcean/sklearn/metrics/regression.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

MeanSquaredError(features=None, multioutput='raw_values')

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

Mean squared error (MSE) regression loss.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
multioutput Literal['raw_values', 'uniform_average']

Defines how to aggregate multiple output values. See scikit-learn documentation for details.

'raw_values'
Source code in src/flowcean/sklearn/metrics/regression.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

R2Score(features=None, multioutput='raw_values')

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

R^2 (coefficient of determination) regression score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name Type Description Default
features list[str] | None

The features to calculate the metric for. If None, the metric uses all features in the data.

None
multioutput Literal['raw_values', 'uniform_average']

Defines how to aggregate multiple output values. See scikit-learn documentation for details.

'raw_values'
Source code in src/flowcean/sklearn/metrics/regression.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

SciKitModel(estimator, *, output_names, name=None)

Bases: Model

A model that wraps a scikit-learn model.

Initialize the model.

Parameters:

Name Type Description Default
estimator SupportsPredict

The scikit-learn estimator.

required
output_names Iterable[str]

The names of the output columns.

required
name str | None

The name of the model.

None
Source code in src/flowcean/sklearn/model.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    estimator: SupportsPredict,
    *,
    output_names: Iterable[str],
    name: str | None = None,
) -> None:
    """Initialize the model.

    Args:
        estimator: The scikit-learn estimator.
        output_names: The names of the output columns.
        name: The name of the model.
    """
    if name is None:
        name = estimator.__class__.__name__
    self._name = name
    self.estimator = estimator
    self.output_names = list(output_names)

RandomForestRegressorLearner(n_estimators=100, *, criterion='squared_error', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=1.0, max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

Bases: SupervisedLearner

Wrapper class for sklearn's RandomForestRegressor.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

Initialize the random forest learner.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

Source code in src/flowcean/sklearn/random_forest.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    n_estimators: int = 100,
    *,
    criterion: str = "squared_error",
    max_depth: int | None = None,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    min_weight_fraction_leaf: float = 0.0,
    max_features: float = 1.0,
    max_leaf_nodes: int | None = None,
    min_impurity_decrease: float = 0.0,
    bootstrap: bool = True,
    oob_score: bool = False,
    n_jobs: int | None = None,
    random_state: int | None = None,
    verbose: int = 0,
    warm_start: bool = False,
    ccp_alpha: float = 0.0,
    max_samples: int | float | None = None,  # noqa: PYI041
    monotonic_cst: NDArray | None = None,
) -> None:
    """Initialize the random forest learner.

    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    """
    self.regressor = RandomForestRegressor(
        n_estimators=n_estimators,
        criterion=criterion,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        min_weight_fraction_leaf=min_weight_fraction_leaf,
        max_features=max_features,
        max_leaf_nodes=max_leaf_nodes,
        min_impurity_decrease=min_impurity_decrease,
        bootstrap=bootstrap,
        oob_score=oob_score,
        n_jobs=n_jobs,
        random_state=random_state or get_seed(),
        verbose=verbose,
        warm_start=warm_start,
        ccp_alpha=ccp_alpha,
        max_samples=max_samples,
        monotonic_cst=monotonic_cst,
    )

learn(inputs, outputs)

Fit the random forest regressor on the given inputs and outputs.

Source code in src/flowcean/sklearn/random_forest.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@override
def learn(
    self,
    inputs: pl.LazyFrame,
    outputs: pl.LazyFrame,
) -> Model:
    """Fit the random forest regressor on the given inputs and outputs."""
    dfs = pl.collect_all([inputs, outputs])
    collected_inputs = dfs[0]
    collected_outputs = dfs[1]
    self.regressor.fit(collected_inputs, collected_outputs)
    logger.info("Using Random Forest Regressor")
    return SciKitModel(
        self.regressor,
        output_names=outputs.columns,
    )

RegressionTree(*, dot_graph_export_path=None, criterion='squared_error', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, ccp_alpha=0.0, monotonic_cst=None)

Bases: SupervisedLearner

Wrapper class for sklearn's DecisionTreeRegressor.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

Initialize the regression tree learner.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

Source code in src/flowcean/sklearn/regression_tree.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(
    self,
    *,
    dot_graph_export_path: None | str = None,
    criterion: str = "squared_error",
    splitter: str = "best",
    max_depth: int | None = None,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    min_weight_fraction_leaf: float = 0.0,
    max_features: float | None = None,
    random_state: int | None = None,
    max_leaf_nodes: int | None = None,
    min_impurity_decrease: float = 0.0,
    ccp_alpha: float = 0.0,
    monotonic_cst: NDArray | None = None,
) -> None:
    """Initialize the regression tree learner.

    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
    """
    self.regressor = DecisionTreeRegressor(
        criterion=criterion,
        splitter=splitter,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        min_weight_fraction_leaf=min_weight_fraction_leaf,
        max_features=max_features,
        max_leaf_nodes=max_leaf_nodes,
        min_impurity_decrease=min_impurity_decrease,
        random_state=random_state or get_seed(),
        ccp_alpha=ccp_alpha,
        monotonic_cst=monotonic_cst,
    )
    self.dot_graph_export_path = dot_graph_export_path