Skip to content

match_sampling_rate

MatchSamplingRate(reference_feature_name, feature_interpolation_map)

Bases: Transform

Matches the sampling rate of all time series in the DataFrame.

Interpolates the time series to match the sampling rate of the reference time series. The below example shows the usage of a MatchSamplingRate transform in a run.py file. Assuming the loaded data is represented by the table:

feature_a feature_b const
list[struct[time,struct[]]] list[struct[time,struct[]]] int
--------------------------- --------------------------- -----
[{12:26:01.0, {1.2}}, [{12:26:00.0, {1.0}}, 1
{12:26:02.0, {2.4}}, {12:26:05.0, {2.0}}]
{12:26:03.0, {3.6}},
{12:26:04.0, {4.8}}]

The following transform can be used to match the sampling rate of the time series feature_b to the sampling rate of the time series feature_a.

    ...
    environment.load()
    data = environment.get_data()
    transform = MatchSamplingRate(
        reference_feature_name="feature_a",
        feature_interpolation_map={
            "feature_b": "linear",
        },
    )
    transformed_data = transform.transform(data)
    ...

The resulting Dataframe after the transform is:

feature_a feature_b const
list[struct[time,struct[]]] list[struct[time,struct[]]] int
--------------------------- --------------------------- -----
[{12:26:00.0, {1.2}}, [{12:26:00.0, {1.2}}, 1
{12:26:01.0, {2.4}}, {12:26:01.0, {1.4}},
{12:26:02.0, {3.6}}, {12:26:02.0, {1.6}},
{12:26:03.0, {4.8}}] {12:26:03.0, {1.8}}]

Initialize the transform.

Parameters:

Name Type Description Default
reference_feature_name str

Reference timeseries feature.

required
feature_interpolation_map dict[str, MatchSamplingRateMethod]

Key-value pairs of the timeseries features that are targeted in interpolation columns and the interpolation method to use. At the moment, the interpolation method can only be 'linear'.

required
Source code in src/flowcean/polars/transforms/match_sampling_rate.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    reference_feature_name: str,
    feature_interpolation_map: dict[str, MatchSamplingRateMethod],
) -> None:
    """Initialize the transform.

    Args:
        reference_feature_name: Reference timeseries feature.
        feature_interpolation_map: Key-value pairs of the timeseries
            features that are targeted in interpolation columns and the
            interpolation method to use. At the moment, the interpolation
            method can only be 'linear'.
    """
    self.reference_feature_name = reference_feature_name
    self.feature_interpolation_map = feature_interpolation_map

apply(data)

Transform the input DataFrame.

Parameters:

Name Type Description Default
data LazyFrame

Input DataFrame.

required

Returns:

Type Description
LazyFrame

Transformed DataFrame.

Source code in src/flowcean/polars/transforms/match_sampling_rate.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def apply(self, data: pl.LazyFrame) -> pl.LazyFrame:
    """Transform the input DataFrame.

    Args:
        data: Input DataFrame.

    Returns:
        Transformed DataFrame.

    """
    # preserve all constant columns that are not timeseries data
    transformed_data = pl.DataFrame()
    collected_data = data.collect()
    for i in range(len(collected_data.rows())):
        transformed_data_slice = self._transform_row(
            collected_data.slice(i, 1),
        )
        transformed_data = transformed_data.vstack(transformed_data_slice)
    return transformed_data.lazy()

FeatureNotFoundError(feature)

Bases: Exception

Feature not found in the DataFrame.

This exception is raised when a feature is not found in the DataFrame.

Source code in src/flowcean/polars/transforms/match_sampling_rate.py
213
214
def __init__(self, feature: str) -> None:
    super().__init__(f"{feature} not found")

UnknownInterpolationError(interpolation_method)

Bases: Exception

Interpolation method is not implemented yet.

This exception is raised when a feature is not found in the DataFrame.

Source code in src/flowcean/polars/transforms/match_sampling_rate.py
223
224
def __init__(self, interpolation_method: str) -> None:
    super().__init__(f"{interpolation_method} not found")

interpolate_feature(target_feature_name, data, reference_feature, interpolation_method)

Interpolate a single time series feature.

Parameters:

Name Type Description Default
target_feature_name str

Timeseries feature to interpolate.

required
data DataFrame

Input DataFrame.

required
reference_feature DataFrame

Reference timeseries feature.

required
interpolation_method str

Interpolation method to use.

required

Returns:

Type Description
DataFrame

Interpolated timeseries feature.

Source code in src/flowcean/polars/transforms/match_sampling_rate.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def interpolate_feature(
    target_feature_name: str,
    data: pl.DataFrame,
    reference_feature: pl.DataFrame,
    interpolation_method: str,
) -> pl.DataFrame:
    """Interpolate a single time series feature.

    Args:
        target_feature_name: Timeseries feature to interpolate.
        data: Input DataFrame.
        reference_feature: Reference timeseries feature.
        interpolation_method: Interpolation method to use.

    Returns:
        Interpolated timeseries feature.

    """
    logger.debug("Interpolating feature %s", target_feature_name)
    if interpolation_method == "linear":
        feature_df = (
            data.select(pl.col(target_feature_name).explode())
            .unnest(cs.all())
            .unnest("value")
            .rename(
                lambda name: target_feature_name + "_" + name
                if name != "time"
                else name,
            )
        )

        interpolated_features = (
            pl.concat(
                [reference_feature, feature_df],
                how="diagonal",
            )
            .sort("time")
            .with_columns(
                pl.col(feature_df.drop("time").columns).interpolate(),
            )
            .drop_nulls()
            .select(feature_df.columns)
        )
        restructure_to_time_series = pl.struct(
            pl.col("time"),
            pl.struct(
                {
                    col: pl.col(col)
                    for col in feature_df.columns
                    if col != "time"
                },
            ).alias("value"),
        )
        return interpolated_features.select(
            restructure_to_time_series.alias(target_feature_name),
        ).select(pl.all().implode())
    raise UnknownInterpolationError(interpolation_method=interpolation_method)