Skip to content

random_forest

RandomForestRegressorLearner(n_estimators=100, *, criterion='squared_error', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=1.0, max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

Bases: SupervisedLearner

Wrapper class for sklearn's RandomForestRegressor.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

Initialize the random forest learner.

Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

Source code in src/flowcean/sklearn/random_forest.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    n_estimators: int = 100,
    *,
    criterion: str = "squared_error",
    max_depth: int | None = None,
    min_samples_split: int = 2,
    min_samples_leaf: int = 1,
    min_weight_fraction_leaf: float = 0.0,
    max_features: float = 1.0,
    max_leaf_nodes: int | None = None,
    min_impurity_decrease: float = 0.0,
    bootstrap: bool = True,
    oob_score: bool = False,
    n_jobs: int | None = None,
    random_state: int | None = None,
    verbose: int = 0,
    warm_start: bool = False,
    ccp_alpha: float = 0.0,
    max_samples: int | float | None = None,  # noqa: PYI041
    monotonic_cst: NDArray | None = None,
) -> None:
    """Initialize the random forest learner.

    Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
    """
    self.regressor = RandomForestRegressor(
        n_estimators=n_estimators,
        criterion=criterion,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        min_weight_fraction_leaf=min_weight_fraction_leaf,
        max_features=max_features,
        max_leaf_nodes=max_leaf_nodes,
        min_impurity_decrease=min_impurity_decrease,
        bootstrap=bootstrap,
        oob_score=oob_score,
        n_jobs=n_jobs,
        random_state=random_state or get_seed(),
        verbose=verbose,
        warm_start=warm_start,
        ccp_alpha=ccp_alpha,
        max_samples=max_samples,
        monotonic_cst=monotonic_cst,
    )

learn(inputs, outputs)

Fit the random forest regressor on the given inputs and outputs.

Source code in src/flowcean/sklearn/random_forest.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@override
def learn(
    self,
    inputs: pl.LazyFrame,
    outputs: pl.LazyFrame,
) -> Model:
    """Fit the random forest regressor on the given inputs and outputs."""
    dfs = pl.collect_all([inputs, outputs])
    collected_inputs = dfs[0]
    collected_outputs = dfs[1]
    self.regressor.fit(collected_inputs, collected_outputs)
    logger.info("Using Random Forest Regressor")
    return SciKitModel(
        self.regressor,
        output_names=outputs.columns,
    )