sklearn

`Accuracy(features=None)`

Bases: SelectMixin, LazyMixin, Metric

Accuracy classification score.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`

Source code in src/flowcean/sklearn/metrics/classification.py

def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

`ClassificationReport(features=None)`

Bases: SelectMixin, LazyMixin, Metric

Build a text report showing the main classification metrics.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`

Source code in src/flowcean/sklearn/metrics/classification.py

def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

`FBetaScore(*, beta=1.0, features=None)`

Bases: SelectMixin, LazyMixin, Metric

F-beta score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`beta`	`float`	The beta parameter.	`1.0`
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`

Source code in src/flowcean/sklearn/metrics/classification.py

def __init__(
    self,
    *,
    beta: float = 1.0,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        beta: The beta parameter.
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)
    self.beta = beta

`PrecisionScore(features=None)`

Bases: SelectMixin, LazyMixin, Metric

Precision classification score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`

Source code in src/flowcean/sklearn/metrics/classification.py

def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

`Recall(features=None)`

Bases: SelectMixin, LazyMixin, Metric

Recall classification score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`

Source code in src/flowcean/sklearn/metrics/classification.py

def __init__(
    self,
    features: list[str] | None = None,
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
    """
    super().__init__(features=features)

`MaxError(feature=None)`

Bases: SelectMixin, LazyMixin, Metric

Max error regression loss.

As defined by scikit-learn.

Initialize MaxError metric.

Parameters:

Name	Type	Description	Default
`feature`	`str \| None`	The feature to calculate the metric for. If None, the metric expects a single feature in the data.	`None`

Source code in src/flowcean/sklearn/metrics/regression.py

def __init__(self, feature: str | None = None) -> None:
    """Initialize MaxError metric.

    Args:
        feature: The feature to calculate the metric for. If None, the
            metric expects a single feature in the data.
    """
    features = [feature] if feature is not None else None
    super().__init__(features=features)

`MeanAbsoluteError(features=None, multioutput='raw_values')`

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

Mean absolute error (MAE) regression loss.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`
`multioutput`	`Literal['raw_values', 'uniform_average']`	Defines how to aggregate multiple output values. See scikit-learn documentation for details.	`'raw_values'`

Source code in src/flowcean/sklearn/metrics/regression.py

def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

`MeanAbsolutePercentageError(features=None, multioutput='raw_values')`

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

Mean absolute percentage error (MAPE) regression loss.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`
`multioutput`	`Literal['raw_values', 'uniform_average']`	Defines how to aggregate multiple output values. See scikit-learn documentation for details.	`'raw_values'`

Source code in src/flowcean/sklearn/metrics/regression.py

def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

`MeanSquaredError(features=None, multioutput='raw_values')`

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

Mean squared error (MSE) regression loss.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`
`multioutput`	`Literal['raw_values', 'uniform_average']`	Defines how to aggregate multiple output values. See scikit-learn documentation for details.	`'raw_values'`

Source code in src/flowcean/sklearn/metrics/regression.py

def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

`R2Score(features=None, multioutput='raw_values')`

Bases: SelectMixin, LazyMixin, MultiOutputMixin, Metric

R^2 (coefficient of determination) regression score.

As defined by scikit-learn.

Initialize metric.

Parameters:

Name	Type	Description	Default
`features`	`list[str] \| None`	The features to calculate the metric for. If None, the metric uses all features in the data.	`None`
`multioutput`	`Literal['raw_values', 'uniform_average']`	Defines how to aggregate multiple output values. See scikit-learn documentation for details.	`'raw_values'`

Source code in src/flowcean/sklearn/metrics/regression.py

def __init__(
    self,
    features: list[str] | None = None,
    multioutput: Literal[
        "raw_values",
        "uniform_average",
    ] = "raw_values",
) -> None:
    """Initialize metric.

    Args:
        features: The features to calculate the metric for. If None, the
            metric uses all features in the data.
        multioutput: Defines how to aggregate multiple output values.
            See [scikit-learn documentation](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html)
            for details.
    """
    super().__init__(features=features, multioutput=multioutput)

`SciKitModel(estimator, *, output_names, name=None)`

Bases: Model

A model that wraps a scikit-learn model.

Initialize the model.

Parameters:

Name	Type	Description	Default
`estimator`	`SupportsPredict`	The scikit-learn estimator.	required
`output_names`	`Iterable[str]`	The names of the output columns.	required
`name`	`str \| None`	The name of the model.	`None`

Source code in src/flowcean/sklearn/model.py

def __init__(
    self,
    estimator: SupportsPredict,
    *,
    output_names: Iterable[str],
    name: str | None = None,
) -> None:
    """Initialize the model.

    Args:
        estimator: The scikit-learn estimator.
        output_names: The names of the output columns.
        name: The name of the model.
    """
    if name is None:
        name = estimator.__class__.__name__
    self._name = name
    self.estimator = estimator
    self.output_names = list(output_names)

`RandomForestRegressorLearner(n_estimators=100, *, criterion='squared_error', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=1.0, max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)`

Bases: SupervisedLearner

Wrapper class for sklearn's RandomForestRegressor.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

Initialize the random forest learner.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

Source code in src/flowcean/sklearn/random_forest.py

def __init__(
    self,
    n_estimators: int = 100,
    *,
    criterion: str = "squared_error",
    max_depth: int | None = None,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    min_weight_fraction_leaf: float = 0.0,
    max_features: float = 1.0,
    max_leaf_nodes: int | None = None,
    min_impurity_decrease: float = 0.0,
    bootstrap: bool = True,
    oob_score: bool = False,
    n_jobs: int | None = None,
    random_state: int | None = None,
    verbose: int = 0,
    warm_start: bool = False,
    ccp_alpha: float = 0.0,
    max_samples: int | float | None = None,  # noqa: PYI041
    monotonic_cst: NDArray | None = None,
) -> None:
    """Initialize the random forest learner.

    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    """
    self.regressor = RandomForestRegressor(
        n_estimators=n_estimators,
        criterion=criterion,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        min_weight_fraction_leaf=min_weight_fraction_leaf,
        max_features=max_features,
        max_leaf_nodes=max_leaf_nodes,
        min_impurity_decrease=min_impurity_decrease,
        bootstrap=bootstrap,
        oob_score=oob_score,
        n_jobs=n_jobs,
        random_state=random_state or get_seed(),
        verbose=verbose,
        warm_start=warm_start,
        ccp_alpha=ccp_alpha,
        max_samples=max_samples,
        monotonic_cst=monotonic_cst,
    )

`learn(inputs, outputs)`

Fit the random forest regressor on the given inputs and outputs.

Source code in src/flowcean/sklearn/random_forest.py

@override
def learn(
    self,
    inputs: pl.LazyFrame,
    outputs: pl.LazyFrame,
) -> Model:
    """Fit the random forest regressor on the given inputs and outputs."""
    dfs = pl.collect_all([inputs, outputs])
    collected_inputs = dfs[0]
    collected_outputs = dfs[1]
    self.regressor.fit(collected_inputs, collected_outputs)
    logger.info("Using Random Forest Regressor")
    return SciKitModel(
        self.regressor,
        output_names=outputs.columns,
    )

`RegressionTree(*, dot_graph_export_path=None, criterion='squared_error', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, ccp_alpha=0.0, monotonic_cst=None)`

Bases: SupervisedLearner

Wrapper class for sklearn's DecisionTreeRegressor.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

Initialize the regression tree learner.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

Source code in src/flowcean/sklearn/regression_tree.py

def __init__(
    self,
    *,
    dot_graph_export_path: None | str = None,
    criterion: str = "squared_error",
    splitter: str = "best",
    max_depth: int | None = None,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    min_weight_fraction_leaf: float = 0.0,
    max_features: float | None = None,
    random_state: int | None = None,
    max_leaf_nodes: int | None = None,
    min_impurity_decrease: float = 0.0,
    ccp_alpha: float = 0.0,
    monotonic_cst: NDArray | None = None,
) -> None:
    """Initialize the regression tree learner.

    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html
    """
    self.regressor = DecisionTreeRegressor(
        criterion=criterion,
        splitter=splitter,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        min_weight_fraction_leaf=min_weight_fraction_leaf,
        max_features=max_features,
        max_leaf_nodes=max_leaf_nodes,
        min_impurity_decrease=min_impurity_decrease,
        random_state=random_state or get_seed(),
        ccp_alpha=ccp_alpha,
        monotonic_cst=monotonic_cst,
    )
    self.dot_graph_export_path = dot_graph_export_path