diff --git a/darts/tests/utils/tabularization/test_create_lagged_training_data.py b/darts/tests/utils/tabularization/test_create_lagged_training_data.py
index cd4f32f1e9..b1efa3ea0e 100644
--- a/darts/tests/utils/tabularization/test_create_lagged_training_data.py
+++ b/darts/tests/utils/tabularization/test_create_lagged_training_data.py
@@ -72,7 +72,8 @@ def get_feature_times(
         output_chunk_length: Optional[int],
         max_samples_per_ts: Optional[int],
         output_chunk_shift: int,
-    ):
+        stride: int,
+    ) -> pd.Index:
         """
         Helper function that returns the times shared by all specified series that can be used
         to create features and labels. This is performed by using the helper functions
@@ -101,6 +102,9 @@ def get_feature_times(
                 future, lags_future
             )
             times = times.intersection(future_times)
+        # Apply stride
+        if stride > 1:
+            times = times[::stride]
         # Take most recent `max_samples_per_ts` samples if requested:
         if (max_samples_per_ts is not None) and (len(times) > max_samples_per_ts):
             times = times[-max_samples_per_ts:]
@@ -433,6 +437,7 @@ def helper_create_expected_lagged_data(
         output_chunk_shift: int,
         multi_models: bool,
         max_samples_per_ts: Optional[int],
+        stride: int,
     ) -> tuple[np.ndarray, np.ndarray, Any]:
         """Helper function to create the X and y arrays by building them block by block (one per covariates)."""
         feats_times = self.get_feature_times(
@@ -445,6 +450,7 @@ def helper_create_expected_lagged_data(
             output_chunk_length,
             max_samples_per_ts,
             output_chunk_shift,
+            stride,
         )
         # Construct `X` by constructing each block, then concatenate these
         # blocks together along component axis:
@@ -487,6 +493,7 @@ def helper_check_lagged_data(
         max_samples_per_ts: Optional[int],
         use_moving_windows: bool,
         concatenate: bool,
+        stride: int,
         **kwargs,
     ):
         """Helper function to call the `create_lagged_training_data()` method with lags argument either in the list
@@ -537,6 +544,7 @@ def helper_check_lagged_data(
             use_moving_windows=use_moving_windows,
             output_chunk_shift=output_chunk_shift,
             concatenate=concatenate,
+            stride=stride,
         )
         # should have the exact same number of indexes
         assert len(times) == len(expected_times_x) == len(expected_times_y)
@@ -642,10 +650,13 @@ def helper_check_lagged_data(
     min_n_ts = 8 + max(output_chunk_shift_combos)
 
     @pytest.mark.parametrize(
-        "series_type",
-        ["datetime", "integer"],
+        "params",
+        product(
+            ["datetime", "integer"],  # series_type
+            [1, 3],  # stride
+        ),
     )
-    def test_lagged_training_data_equal_freq(self, series_type: str):
+    def test_lagged_training_data_equal_freq(self, params):
         """
         Tests that `create_lagged_training_data` produces `X`, `y`, and `times`
         outputs that are consistent with those generated by using the helper
@@ -659,6 +670,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
         are of the same frequency, the implementation of the 'moving window' method is
         being tested here.
         """
+        series_type, stride = params
         # Define datetime index timeseries - each has different number of components,
         # different start times, different lengths, and different values, but
         # they're all of the same frequency:
@@ -749,6 +761,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                     output_chunk_shift,
                     multi_models,
                     max_samples_per_ts,
+                    stride,
                 )
             )
 
@@ -770,6 +783,7 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
                 "max_samples_per_ts": max_samples_per_ts,
                 "use_moving_windows": True,
                 "concatenate": True,
+                "stride": stride,
             }
 
             self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -777,10 +791,13 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
             self.helper_check_lagged_data(convert_lags_to_dict=True, **kwargs)
 
     @pytest.mark.parametrize(
-        "series_type",
-        ["datetime", "integer"],
+        "params",
+        product(
+            ["datetime", "integer"],  # series_type
+            [1, 3],  # stride
+        ),
     )
-    def test_lagged_training_data_unequal_freq(self, series_type):
+    def test_lagged_training_data_unequal_freq(self, params):
         """
         Tests that `create_lagged_training_data` produces `X`, `y`, and `times`
         outputs that are consistent with those generated by using the helper
@@ -794,6 +811,7 @@ def test_lagged_training_data_unequal_freq(self, series_type):
         are *not* of the same frequency, the implementation of the 'time intersection' method
         is being tested here.
         """
+        series_type, stride = params
         # Define range index timeseries - each has different number of components,
         # different start times, different lengths, different values, and different
         # frequencies:
@@ -869,6 +887,7 @@ def test_lagged_training_data_unequal_freq(self, series_type):
                     output_chunk_shift,
                     multi_models,
                     max_samples_per_ts,
+                    stride,
                 )
             )
 
@@ -890,6 +909,7 @@ def test_lagged_training_data_unequal_freq(self, series_type):
                 "max_samples_per_ts": max_samples_per_ts,
                 "use_moving_windows": False,
                 "concatenate": True,
+                "stride": stride,
             }
 
             self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -901,10 +921,13 @@ def test_lagged_training_data_unequal_freq(self, series_type):
             )
 
     @pytest.mark.parametrize(
-        "series_type",
-        ["datetime", "integer"],
+        "params",
+        product(
+            ["datetime", "integer"],  # series_type
+            [1, 3],  # stride
+        ),
     )
-    def test_lagged_training_data_method_consistency(self, series_type):
+    def test_lagged_training_data_method_consistency(self, params):
         """
         Tests that `create_lagged_training_data` produces the same result
         when `use_moving_windows = False` and when `use_moving_windows = True`
@@ -918,6 +941,7 @@ def test_lagged_training_data_method_consistency(self, series_type):
         # Define datetime index timeseries - each has different number of components,
         # different start times, different lengths, different values, and of
         # different frequencies:
+        series_type, stride = params
         if series_type == "integer":
             target = helper_create_multivariate_linear_timeseries(
                 n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1
@@ -991,6 +1015,7 @@ def test_lagged_training_data_method_consistency(self, series_type):
                 multi_models=multi_models,
                 use_moving_windows=True,
                 output_chunk_shift=output_chunk_shift,
+                stride=stride,
             )
             # Using time intersection method:
             X_ti, y_ti, times_ti, _, _ = create_lagged_training_data(
@@ -1006,6 +1031,7 @@ def test_lagged_training_data_method_consistency(self, series_type):
                 multi_models=multi_models,
                 use_moving_windows=False,
                 output_chunk_shift=output_chunk_shift,
+                stride=stride,
             )
             assert np.allclose(X_mw, X_ti)
             assert np.allclose(y_mw, y_ti)
@@ -1021,6 +1047,7 @@ def test_lagged_training_data_method_consistency(self, series_type):
             [0, 1, 3],
             [False, True],
             ["datetime", "integer"],
+            [1, 3],  # stride
         ),
     )
     def test_lagged_training_data_single_lag_single_component_same_series(self, config):
@@ -1032,7 +1059,7 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf
         same time series, and the expected  `y` can be formed by taking a single slice
         from the `target`.
         """
-        output_chunk_shift, use_moving_windows, series_type = config
+        output_chunk_shift, use_moving_windows, series_type, stride = config
         if series_type == "integer":
             series = linear_timeseries(start=0, length=15)
         else:
@@ -1069,6 +1096,12 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf
         )
         expected_X = np.expand_dims(expected_X, axis=-1)
 
+        if stride > 1:
+            expected_X = expected_X[::stride]
+            expected_y = expected_y[::stride]
+            expected_times_x = expected_times_x[::stride]
+            expected_times_y = expected_times_y[::stride]
+
         kwargs = {
             "expected_X": expected_X,
             "expected_y": expected_y,
@@ -1087,6 +1120,7 @@ def test_lagged_training_data_single_lag_single_component_same_series(self, conf
             "max_samples_per_ts": None,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1196,6 +1230,7 @@ def test_lagged_training_data_extend_past_and_future_covariates(self, config):
             "max_samples_per_ts": max_samples_per_ts,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": 1,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1211,8 +1246,8 @@ def test_lagged_training_data_extend_past_and_future_covariates(self, config):
 
     @pytest.mark.parametrize(
         "config",
-        itertools.product(
-            [0, 1, 3], [False, True], ["datetime", "integer"], [False, True]
+        product(
+            [0, 1, 3], [False, True], ["datetime", "integer"], [False, True], [1, 3]
         ),
     )
     def test_lagged_training_data_single_point(self, config):
@@ -1220,7 +1255,9 @@ def test_lagged_training_data_single_point(self, config):
         Tests that `create_lagged_training_data` correctly handles case
         where only one possible training point can be generated.
         """
-        output_chunk_shift, use_moving_windows, series_type, multi_models = config
+        output_chunk_shift, use_moving_windows, series_type, multi_models, stride = (
+            config
+        )
         # Can only create feature using first value of series (i.e. `0`)
         # and can only create label using last value of series (i.e. `1`)
         if series_type == "integer":
@@ -1244,6 +1281,11 @@ def test_lagged_training_data_single_point(self, config):
             length=1,
             freq=target.freq,
         )
+        if stride > 1:
+            expected_X = expected_X[::stride]
+            expected_y = expected_y[::stride]
+            expected_times = expected_times[::stride]
+
         # Test correctness for 'moving window' and for 'time intersection' methods, as well
         # as for different `multi_models` values:
         kwargs = {
@@ -1264,6 +1306,7 @@ def test_lagged_training_data_single_point(self, config):
             "max_samples_per_ts": None,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1280,7 +1323,7 @@ def test_lagged_training_data_single_point(self, config):
     @pytest.mark.parametrize(
         "config",
         itertools.product(
-            [0, 1, 3], [False, True], ["datetime", "integer"], [False, True]
+            [0, 1, 3], [False, True], ["datetime", "integer"], [False, True], [1, 3]
         ),
     )
     def test_lagged_training_data_zero_lags(self, config):
@@ -1295,7 +1338,9 @@ def test_lagged_training_data_zero_lags(self, config):
         # only possible feature that can be created using these series utilises
         # the value of `future` at the same time as the label (i.e. a lag
         # of `0` away from the only feature time):
-        output_chunk_shift, use_moving_windows, series_type, multi_models = config
+        output_chunk_shift, use_moving_windows, series_type, multi_models, stride = (
+            config
+        )
 
         if series_type == "integer":
             target = linear_timeseries(
@@ -1329,6 +1374,11 @@ def test_lagged_training_data_zero_lags(self, config):
             length=1,
             freq=target.freq,
         )
+        if stride > 1:
+            expected_X = expected_X[::stride]
+            expected_y = expected_y[::stride]
+            expected_times = expected_times[::stride]
+
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
         kwargs = {
@@ -1349,6 +1399,7 @@ def test_lagged_training_data_zero_lags(self, config):
             "max_samples_per_ts": None,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1364,13 +1415,14 @@ def test_lagged_training_data_zero_lags(self, config):
 
     @pytest.mark.parametrize(
         "config",
-        itertools.product(
+        product(
             [0, 1, 3],
             [False, True],
             ["datetime", "integer"],
             [False, True],
             [-1, 0, 1],
             [-2, 0, 2],
+            [1, 3],
         ),
     )
     def test_lagged_training_data_no_target_lags_future_covariates(self, config):
@@ -1390,6 +1442,7 @@ def test_lagged_training_data_no_target_lags_future_covariates(self, config):
             multi_models,
             cov_start_shift,
             cov_lag,
+            stride,
         ) = config
 
         # adapt covariate start, length, and target length so that only 1 sample can be extracted
@@ -1429,6 +1482,11 @@ def test_lagged_training_data_no_target_lags_future_covariates(self, config):
             length=1,
             freq=target.freq,
         )
+        if stride > 1:
+            expected_X[::stride]
+            expected_y[::stride]
+            expected_times[::stride]
+
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
         kwargs = {
@@ -1449,6 +1507,7 @@ def test_lagged_training_data_no_target_lags_future_covariates(self, config):
             "max_samples_per_ts": None,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1471,6 +1530,7 @@ def test_lagged_training_data_no_target_lags_future_covariates(self, config):
             [False, True],
             [-1, 0],
             [-2, -1],
+            [1, 3],
         ),
     )
     def test_lagged_training_data_no_target_lags_past_covariates(self, config):
@@ -1489,6 +1549,7 @@ def test_lagged_training_data_no_target_lags_past_covariates(self, config):
             multi_models,
             cov_start_shift,
             cov_lag,
+            stride,
         ) = config
 
         # adapt covariate start, length, and target length so that only 1 sample can be extracted
@@ -1528,6 +1589,11 @@ def test_lagged_training_data_no_target_lags_past_covariates(self, config):
             length=1,
             freq=target.freq,
         )
+        if stride > 1:
+            expected_X[::stride]
+            expected_y[::stride]
+            expected_times[::stride]
+
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
         kwargs = {
@@ -1548,6 +1614,7 @@ def test_lagged_training_data_no_target_lags_past_covariates(self, config):
             "max_samples_per_ts": None,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1564,7 +1631,7 @@ def test_lagged_training_data_no_target_lags_past_covariates(self, config):
     @pytest.mark.parametrize(
         "config",
         itertools.product(
-            [0, 1, 3], [False, True], ["datetime", "integer"], [False, True]
+            [0, 1, 3], [False, True], ["datetime", "integer"], [False, True], [1, 3]
         ),
     )
     def test_lagged_training_data_positive_lags(self, config):
@@ -1580,7 +1647,9 @@ def test_lagged_training_data_positive_lags(self, config):
         # only possible feature that can be created using these series utilises
         # the value of `future` one timestep after the time of the label (i.e. a lag
         # of `1` away from the only feature time):
-        output_chunk_shift, use_moving_windows, series_type, multi_models = config
+        output_chunk_shift, use_moving_windows, series_type, multi_models, stride = (
+            config
+        )
 
         if series_type == "integer":
             target = linear_timeseries(
@@ -1613,6 +1682,11 @@ def test_lagged_training_data_positive_lags(self, config):
             length=1,
             freq=target.freq,
         )
+        if stride > 1:
+            expected_X[::stride]
+            expected_y[::stride]
+            expected_times[::stride]
+
         # Check correctness for 'moving windows' and 'time intersection' methods, as
         # well as for different `multi_models` values:
         kwargs = {
@@ -1633,6 +1707,7 @@ def test_lagged_training_data_positive_lags(self, config):
             "max_samples_per_ts": None,
             "use_moving_windows": use_moving_windows,
             "concatenate": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(convert_lags_to_dict=False, **kwargs)
@@ -1653,6 +1728,7 @@ def test_lagged_training_data_positive_lags(self, config):
             [1, 2],
             [True, False],
             ["datetime", "integer"],
+            [1, 3],
         ),
     )
     def test_lagged_training_data_comp_wise_lags(self, config):
@@ -1662,7 +1738,9 @@ def test_lagged_training_data_comp_wise_lags(self, config):
 
         Note that this is supported only when use_moving_window=True.
         """
-        output_chunk_shift, output_chunk_length, multi_models, series_type = config
+        output_chunk_shift, output_chunk_length, multi_models, series_type, stride = (
+            config
+        )
 
         lags_tg = {"target_0": [-4, -1], "target_1": [-4, -1]}
         lags_pc = [-3]
@@ -1716,6 +1794,7 @@ def test_lagged_training_data_comp_wise_lags(self, config):
             output_chunk_length,
             None,
             output_chunk_shift,
+            stride=stride,
         )
 
         # reorder the features to obtain target_0_lag-4, target_1_lag-4, target_0_lag-1, target_1_lag-1
@@ -1762,6 +1841,10 @@ def test_lagged_training_data_comp_wise_lags(self, config):
             multi_models,
             output_chunk_shift,
         )[:, :, np.newaxis]
+        if stride > 1:
+            expected_X[::stride]
+            expected_y[::stride]
+            feats_times[::stride]
 
         # lags are already in dict format
         self.helper_check_lagged_data(
@@ -1783,9 +1866,14 @@ def test_lagged_training_data_comp_wise_lags(self, config):
             max_samples_per_ts=None,
             use_moving_windows=True,
             concatenate=True,
+            stride=stride,
         )
 
-    def test_lagged_training_data_sequence_inputs(self):
+    @pytest.mark.parametrize(
+        "stride",
+        [1, 3],
+    )
+    def test_lagged_training_data_sequence_inputs(self, stride):
         """
         Tests that `create_lagged_training_data` correctly handles being
         passed a sequence of `TimeSeries` inputs, as opposed to individual
@@ -1806,12 +1894,14 @@ def test_lagged_training_data_sequence_inputs(self):
         expected_X_2 = np.concatenate(
             3 * [target_2.all_values(copy=False)[:-1, :, :]], axis=1
         )
-        expected_X = np.concatenate([expected_X_1, expected_X_2], axis=0)
-        expected_y_1 = target_1.all_values(copy=False)[1:, :, :]
-        expected_y_2 = target_2.all_values(copy=False)[1:, :, :]
+        expected_X = np.concatenate(
+            [expected_X_1[::stride], expected_X_2[::stride]], axis=0
+        )
+        expected_y_1 = target_1.all_values(copy=False)[1::stride, :, :]
+        expected_y_2 = target_2.all_values(copy=False)[1::stride, :, :]
         expected_y = np.concatenate([expected_y_1, expected_y_2], axis=0)
-        expected_times_1 = target_1.time_index[1:]
-        expected_times_2 = target_2.time_index[1:]
+        expected_times_1 = target_1.time_index[1::stride]
+        expected_times_2 = target_2.time_index[1::stride]
 
         kwargs = {
             "expected_X": expected_X,
@@ -1830,6 +1920,7 @@ def test_lagged_training_data_sequence_inputs(self):
             "multi_models": True,
             "max_samples_per_ts": None,
             "use_moving_windows": True,
+            "stride": stride,
         }
 
         # concatenate=True
@@ -1848,7 +1939,11 @@ def test_lagged_training_data_sequence_inputs(self):
             convert_lags_to_dict=True, concatenate=False, **kwargs
         )
 
-    def test_lagged_training_data_stochastic_series(self):
+    @pytest.mark.parametrize(
+        "stride",
+        [1, 3],
+    )
+    def test_lagged_training_data_stochastic_series(self, stride):
         """
         Tests that `create_lagged_training_data` is correctly vectorised
         over the sample axes of the input `TimeSeries`.
@@ -1863,10 +1958,10 @@ def test_lagged_training_data_stochastic_series(self):
         output_chunk_length = 1
         # Expected solution:
         expected_X = np.concatenate(
-            3 * [target.all_values(copy=False)[:-1, :, :]], axis=1
+            3 * [target.all_values(copy=False)[:-1:stride, :, :]], axis=1
         )
-        expected_y = target.all_values(copy=False)[1:, :, :]
-        expected_times = target.time_index[1:]
+        expected_y = target.all_values(copy=False)[1::stride, :, :]
+        expected_times = target.time_index[1::stride]
 
         kwargs = {
             "expected_X": expected_X,
@@ -1885,6 +1980,7 @@ def test_lagged_training_data_stochastic_series(self):
             "multi_models": True,
             "max_samples_per_ts": None,
             "use_moving_windows": True,
+            "stride": stride,
         }
 
         self.helper_check_lagged_data(
@@ -2729,6 +2825,7 @@ def test_correct_generated_weights_exponential(self, config):
             ["D", "2D", 2],
             [True, False],
             [True, False],
+            [1, 3],
         ),
     )
     def test_correct_user_weights(self, config):
@@ -2751,14 +2848,18 @@ def test_correct_user_weights(self, config):
             freq,
             single_series,
             univar_series,
+            stride,
         ) = config
+        lags = [-4, -1]
         if not isinstance(freq, int):
             freq = pd.tseries.frequencies.to_offset(freq)
             start = pd.Timestamp("2000-01-01")
         else:
             start = 1
 
-        train_y = linear_timeseries(start=start, length=training_size, freq=freq)
+        train_y = linear_timeseries(
+            start=start, end_value=training_size - 1, length=training_size, freq=freq
+        )
         if not univar_series:
             train_y.stack(train_y)
 
@@ -2776,13 +2877,14 @@ def test_correct_user_weights(self, config):
             ts_weights.stack(ts_weights + 1.0)
 
         _, y, _, _, weights = create_lagged_training_data(
-            lags=[-4, -1],
+            lags=lags,
             target_series=train_y if single_series else [train_y] * 2,
             output_chunk_length=ocl,
             uses_static_covariates=False,
             sample_weight=ts_weights if single_series else [ts_weights] * 2,
             output_chunk_shift=ocs,
             use_moving_windows=use_moving_windows,
+            stride=stride,
         )
 
         # weights shape must match label shape, since we have one
@@ -2796,11 +2898,15 @@ def test_correct_user_weights(self, config):
 
         # the weights correspond to the same sample and time index as the `y` labels
         expected_weights = []
-        len_y_single = len(y) if single_series else int(len(y) / 2)
+        len_y_single = len(y) if single_series else len(y) // 2
         for i in range(ocl):
-            mask = slice(-(i + len_y_single), -i if i else None)
+            # shifted by the steps required to create the first set of features
+            first_label_idx = -min(lags) + ocs + i
+            # make enough room for all the strided labels
+            last_label_idx = first_label_idx + len_y_single * stride
+            mask = slice(first_label_idx, last_label_idx, stride)
             expected_weights.append(weights_exact[mask])
-        expected_weights = np.concatenate(expected_weights, axis=1)[:, ::-1]
+        expected_weights = np.concatenate(expected_weights, axis=1)
         if not single_series:
             expected_weights = np.concatenate([expected_weights] * 2, axis=0)
         np.testing.assert_array_almost_equal(weights[:, :, 0], expected_weights)
diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py
index d29f3ac299..4785617c84 100644
--- a/darts/utils/data/tabularization.py
+++ b/darts/utils/data/tabularization.py
@@ -1155,8 +1155,7 @@ def _create_lagged_data_by_moving_window(
             # must take `(num_samples - 1)` values ahead of `first_window_end_idx`
             vals = vals[
                 first_window_start_idx : first_window_end_idx
-                + num_samples * stride
-                - 1,
+                + (num_samples - 1) * stride,
                 :,
                 :,
             ]