Skip to content

tree

Node(child_left=-1, child_right=-1, split_feature=-2, split_feature_idx=-2, split_threshold=-2.0, samples=0) dataclass

Represents a node in a TestTree.

Initializes a node.

Parameters:

Name Type Description Default
child_left int

Index of the left child node (-1 if leaf).

-1
child_right int

Index of the right child node (-1 if leaf).

-1
split_feature str | int

Feature used for splitting (-2 if leaf).

-2
split_feature_idx int

Index of the split feature (-2 if leaf).

-2
split_threshold float

Threshold value for splitting (-2.0 if leaf).

-2.0
samples int

Number of samples reaching this node (0 if not a leaf).

0
Source code in src/flowcean/testing/generator/ddtig/domain/model_analyser/base/tree.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def __init__(
    self,
    child_left: int = -1,
    child_right: int = -1,
    split_feature: str | int = -2,
    split_feature_idx: int = -2,
    split_threshold: float = -2.0,
    samples: int = 0,
) -> None:
    """Initializes a node.

    Args:
        child_left: Index of the left child node (-1 if leaf).
        child_right: Index of the right child node (-1 if leaf).
        split_feature: Feature used for splitting (-2 if leaf).
        split_feature_idx: Index of the split feature (-2 if leaf).
        split_threshold: Threshold value for splitting (-2.0 if leaf).
        samples: Number of samples reaching this node (0 if not a leaf).
    """
    self.child_left = child_left
    self.child_right = child_right
    self.split_feature = split_feature
    self.split_feature_idx = split_feature_idx
    self.split_threshold = split_threshold
    self.samples = samples

TestTree(model_tree, specs_handler) dataclass

Represents a tree structure used for generating test inputs.

Attributes:

test_tree: dict Dictionary representing the structure of a River or scikit-learn tree.

Methods:

get_n_samples() Returns the total number of samples used to train the tree.

Initializes the TestTree from a model tree.

Parameters:

Name Type Description Default
model_tree HoeffdingTreeRegressor | HoeffdingTreeClassifier | Tree

A River or scikit-learn decision tree.

required
specs_handler SystemSpecsHandler

Object for accessing feature specifications.

required
Source code in src/flowcean/testing/generator/ddtig/domain/model_analyser/base/tree.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
def __init__(
    self,
    model_tree: HoeffdingTreeRegressor
    | HoeffdingTreeClassifier
    | sklearnTree,
    specs_handler: SystemSpecsHandler,
) -> None:
    """Initializes the TestTree from a model tree.

    Args:
        model_tree: A River or scikit-learn decision tree.
        specs_handler: Object for accessing feature specifications.
    """
    if isinstance(model_tree, sklearnTree):
        feature_dict = (
            specs_handler.extract_feature_names_with_idx_reversed()
        )
        self.test_tree = convert_sklearn_tree(model_tree, feature_dict)
        logger.info(
            "Converted a scikit-learn tree to TestTree successfully.",
        )
    else:
        feature_dict = specs_handler.extract_feature_names_with_idx()
        self.test_tree = convert_river_tree(model_tree, feature_dict)
        logger.info("Converted a River tree to TestTree successfully.")

get_n_samples()

Returns the total number of samples used to train the tree.

Returns:

Type Description
int

Total number of samples.

Source code in src/flowcean/testing/generator/ddtig/domain/model_analyser/base/tree.py
213
214
215
216
217
218
219
220
221
222
223
def get_n_samples(self) -> int:
    """Returns the total number of samples used to train the tree.

    Returns:
        Total number of samples.
    """
    samples = 0
    for key in self.test_tree:
        if self.test_tree[key].samples != 0:
            samples += self.test_tree[key].samples
    return samples

convert_river_tree(river_tree, feature_dict)

Extract the structure of a River Hoeffding tree.

Store the extracted structure in a dictionary.

Parameters:

Name Type Description Default
river_tree HoeffdingTreeRegressor | HoeffdingTreeClassifier

A River Hoeffding tree.

required
feature_dict dict

Dictionary mapping feature names to their indices.

required

Returns:

Type Description
dict

A dictionary representing the tree structure.

Source code in src/flowcean/testing/generator/ddtig/domain/model_analyser/base/tree.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def convert_river_tree(
    river_tree: HoeffdingTreeRegressor | HoeffdingTreeClassifier,
    feature_dict: dict,
) -> dict:
    """Extract the structure of a River Hoeffding tree.

    Store the extracted structure in a dictionary.

    Args:
        river_tree: A River Hoeffding tree.
        feature_dict: Dictionary mapping feature names to their indices.

    Returns:
        A dictionary representing the tree structure.
    """
    counter = 0

    # Traverse the tree using depth-first search.
    root = vars(river_tree).get("_root")

    # Yields: parent index, parent node, child node, child index, branch
    def iterate(node: object | None = None) -> Iterator[tuple]:
        if node is None:
            yield None, None, root, 0, None
            yield from iterate(root)

        nonlocal counter
        parent_no = counter

        if isinstance(node, DTBranch):
            for branch_index, child in enumerate(node.children):
                counter += 1
                yield parent_no, node, child, counter, branch_index
                if isinstance(child, DTBranch):
                    yield from iterate(child)

    tree = {}

    # Build tree structure from traversal
    for parent_no, parent, child, child_no, branch_index in iterate():
        # Store new node in dict
        tree.setdefault(child_no, Node())

        # Store feature of node if node is not a leaf
        if isinstance(child, DTBranch):
            # River's runtime node API exposes `feature`,
            # but type stubs do not.
            child_feature = cast("Any", child).feature
            tree[child_no].split_feature = child_feature
            tree[child_no].split_feature_idx = feature_dict[child_feature]
        else:
            # Get #samples for leaf node
            tree[child_no].samples = int(child.total_weight)

        # If node has a parent, store node as either child left
        # or child right of parent node
        if parent_no is not None:
            if branch_index == 0:
                tree[parent_no].child_left = child_no
            else:
                tree[parent_no].child_right = child_no
            tree[parent_no].split_threshold = float(
                parent.repr_branch(branch_index, shorten=True).split(" ")[-1],
            )
    return tree

convert_sklearn_tree(sklearn_tree, feature_dict)

Extract the structure of a scikit-learn decision tree.

Store the extracted structure in a dictionary.

Parameters:

Name Type Description Default
sklearn_tree Tree

A scikit-learn decision tree.

required
feature_dict dict

Dictionary mapping feature indices to feature names.

required

Returns:

Type Description
dict

A dictionary representing the tree structure.

Source code in src/flowcean/testing/generator/ddtig/domain/model_analyser/base/tree.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def convert_sklearn_tree(
    sklearn_tree: sklearnTree,
    feature_dict: dict,
) -> dict:
    """Extract the structure of a scikit-learn decision tree.

    Store the extracted structure in a dictionary.

    Args:
        sklearn_tree: A scikit-learn decision tree.
        feature_dict: Dictionary mapping feature indices to feature names.

    Returns:
        A dictionary representing the tree structure.
    """
    # Scikit-learn's runtime Tree object exposes these attributes, but the
    # available type information may be incomplete for static checkers.
    sk_tree = cast("Any", sklearn_tree)
    n_nodes = int(sk_tree.node_count)
    tree = {}

    # Traverse each node in the scikit-learn tree
    for node in range(n_nodes):
        split_feature = feature_dict.get(sk_tree.feature[node])
        if split_feature is None:
            split_feature = -2
        # Store info of sklearn node in our own Node object
        new_node = Node(
            child_left=sk_tree.children_left[node],
            child_right=sk_tree.children_right[node],
            split_feature=split_feature,
            split_feature_idx=sk_tree.feature[node],
            split_threshold=sk_tree.threshold[node],
        )
        # Get #samples for leaf node
        if new_node.child_left == -1 and new_node.child_right == -1:
            new_node.samples = sk_tree.n_node_samples[node]
        tree[node] = new_node
    return tree