Add extrapolate_season.predict() function

- the function implements a forecasting "method" similar to the seasonal naive method => instead of simply taking the last observation given a seasonal lag, it linearly extrapolates all observations of the same seasonal lag from the past into the future; conceptually, it is like the seasonal naive method with built-in smoothing - the function is tested just like the `arima.predict()` and `ets.predict()` functions + rename the `tests.forecasts.methods.test_ts_methods` module into `tests.forecasts.methods.test_predictions` - re-organize some constants in the `tests` package - streamline some docstrings
2021-02-01 11:32:10 +01:00 · 2021-02-01 11:32:10 +01:00 · b8952213d8
commit b8952213d8
parent 1d63623dfc
9 changed files with 170 additions and 43 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -150,6 +150,9 @@ per-file-ignores =
    src/urban_meal_delivery/forecasts/methods/decomposition.py:
        # The module is not too complex.
        WPS232,
+    src/urban_meal_delivery/forecasts/methods/extrapolate_season.py:
+        # The module is not too complex.
+        WPS232,
    src/urban_meal_delivery/forecasts/timify.py:
        # No SQL injection as the inputs come from a safe source.
        S608,
--- a/src/urban_meal_delivery/forecasts/methods/init.py
+++ b/src/urban_meal_delivery/forecasts/methods/init.py
@ -3,3 +3,4 @@
 from urban_meal_delivery.forecasts.methods import arima
 from urban_meal_delivery.forecasts.methods import decomposition
 from urban_meal_delivery.forecasts.methods import ets
+from urban_meal_delivery.forecasts.methods import extrapolate_season
--- a/src/urban_meal_delivery/forecasts/methods/extrapolate_season.py
+++ b/src/urban_meal_delivery/forecasts/methods/extrapolate_season.py
@ -0,0 +1,72 @@
+"""Forecast by linear extrapolation of a seasonal component."""
+
+import pandas as pd
+from statsmodels.tsa import api as ts_stats
+
+
+def predict(
+    training_ts: pd.Series, forecast_interval: pd.DatetimeIndex, *, frequency: int,
+) -> pd.DataFrame:
+    """Extrapolate a seasonal component with a linear model.
+
+    A naive forecast for each time unit of the day is calculated by linear
+    extrapolation from all observations of the same time of day and on the same
+    day of the week (i.e., same seasonal lag).
+
+    Note: The function does not check if the `forecast_interval`
+    extends the `training_ts`'s interval without a gap!
+
+    Args:
+        training_ts: past observations to be fitted;
+            assumed to be a seasonal component after time series decomposition
+        forecast_interval: interval into which the `training_ts` is forecast;
+            its length becomes the numbers of time steps to be forecast
+        frequency: frequency of the observations in the `training_ts`
+
+    Returns:
+        predictions: point forecasts (i.e., the "prediction" column);
+            includes the four "low/high80/95" columns for the confidence intervals
+            that only contain `NaN` values as this method does not make
+            any statistical assumptions about the time series process
+
+    Raises:
+        ValueError: if `training_ts` contains `NaN` values or some predictions
+            could not be made for time steps in the `forecast_interval`
+    """
+    if training_ts.isnull().any():
+        raise ValueError('`training_ts` must not contain `NaN` values')
+
+    extrapolated_ts = pd.Series(index=forecast_interval, dtype=float)
+    seasonal_lag = frequency * (training_ts.index[1] - training_ts.index[0])
+
+    for lag in range(frequency):
+        # Obtain all `observations` of the same seasonal lag and
+        # fit a straight line through them (= `trend`).
+        observations = training_ts[slice(lag, 999_999_999, frequency)]
+        trend = observations - ts_stats.detrend(observations)
+
+        # Create a point forecast by linear extrapolation
+        # for one or even more time steps ahead.
+        slope = trend[-1] - trend[-2]
+        prediction = trend[-1] + slope
+        idx = observations.index.max() + seasonal_lag
+        while idx <= forecast_interval.max():
+            if idx in forecast_interval:
+                extrapolated_ts.loc[idx] = prediction
+            prediction += slope
+            idx += seasonal_lag
+
+    # Sanity check.
+    if extrapolated_ts.isnull().any():  # pragma: no cover
+        raise ValueError('missing predictions in the `forecast_interval`')
+
+    return pd.DataFrame(
+        data={
+            'prediction': extrapolated_ts.round(5),
+            'low80': float('NaN'),
+            'high80': float('NaN'),
+            'low95': float('NaN'),
+            'high95': float('NaN'),
+        },
+        index=forecast_interval,
+    )
--- a/tests/config.py
+++ b/tests/config.py
@ -16,10 +16,15 @@ NOON = 12
 START = datetime.datetime(YEAR, MONTH, DAY, config.SERVICE_START, 0)
 END = datetime.datetime(YEAR, MONTH, 15, config.SERVICE_END, 0)

-# Default time steps, for example, for `OrderHistory` objects.
+# Default time steps (in minutes), for example, for `OrderHistory` objects.
 LONG_TIME_STEP = 60
 SHORT_TIME_STEP = 30
 TIME_STEPS = (SHORT_TIME_STEP, LONG_TIME_STEP)
+# The `frequency` of vertical time series is the number of days in a week, 7,
+# times the number of time steps per day. With 12 operating hours (11 am - 11 pm)
+# the `frequency`s are 84 and 168 for the `LONG/SHORT_TIME_STEP`s.
+VERTICAL_FREQUENCY_LONG = 7 * 12
+VERTICAL_FREQUENCY_SHORT = 7 * 24

 # Default training horizons, for example, for
 # `OrderHistory.make_horizontal_time_series()`.
--- a/tests/forecasts/init.py
+++ b/tests/forecasts/init.py
@ -1 +1 @@
-"""Test the forecasting-related functionality."""
+"""Tests for the `urban_meal_delivery.forecasts` sub-package."""
--- a/tests/forecasts/conftest.py
+++ b/tests/forecasts/conftest.py
@ -9,13 +9,6 @@ from tests import config as test_config
 from urban_meal_delivery import config


-# See remarks in `vertical_datetime_index` fixture.
-VERTICAL_FREQUENCY = 7 * 12
-
-# The default `ns` suggested for the STL method.
-NS = 7
-
-
@pytest.fixture
 def horizontal_datetime_index():
    """A `pd.Index` with `DateTime` values.
--- a/tests/forecasts/methods/test_decomposition.py
+++ b/tests/forecasts/methods/test_decomposition.py
@ -5,11 +5,14 @@ import math
 import pandas as pd
 import pytest

-from tests.forecasts.conftest import NS
-from tests.forecasts.conftest import VERTICAL_FREQUENCY
+from tests import config as test_config
 from urban_meal_delivery.forecasts.methods import decomposition


+# The "periodic" `ns` suggested for the STL method.
+NS = 999
+
+
 class TestInvalidArguments:
    """Test `stl()` with invalid arguments."""

@ -18,85 +21,118 @@ class TestInvalidArguments:
        time_series = pd.Series(dtype=float, index=vertical_datetime_index)

        with pytest.raises(ValueError, match='`NaN` values'):
-            decomposition.stl(time_series, frequency=VERTICAL_FREQUENCY, ns=99)
+            decomposition.stl(
+                time_series, frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=NS,
+            )

    def test_ns_not_odd(self, vertical_no_demand):
        """`ns` must be odd and `>= 7`."""
        with pytest.raises(ValueError, match='`ns`'):
-            decomposition.stl(vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=8)
+            decomposition.stl(
+                vertical_no_demand, frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=8,
+            )

    @pytest.mark.parametrize('ns', [-99, -1, 1, 5])
    def test_ns_smaller_than_seven(self, vertical_no_demand, ns):
        """`ns` must be odd and `>= 7`."""
        with pytest.raises(ValueError, match='`ns`'):
-            decomposition.stl(vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=ns)
+            decomposition.stl(
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=ns,
+            )

    def test_nt_not_odd(self, vertical_no_demand):
        """`nt` must be odd and `>= default_nt`."""
        nt = 200
-        default_nt = math.ceil((1.5 * VERTICAL_FREQUENCY) / (1 - (1.5 / NS)))
+        default_nt = math.ceil(
+            (1.5 * test_config.VERTICAL_FREQUENCY_LONG) / (1 - (1.5 / NS)),
+        )

        assert nt > default_nt  # sanity check

        with pytest.raises(ValueError, match='`nt`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nt=nt,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                nt=nt,
            )

-    @pytest.mark.parametrize('nt', [-99, -1, 0, 1, 99, 159])
+    @pytest.mark.parametrize('nt', [-99, -1, 0, 1, 99, 125])
    def test_nt_not_at_least_the_default(self, vertical_no_demand, nt):
        """`nt` must be odd and `>= default_nt`."""
        # `default_nt` becomes 161.
-        default_nt = math.ceil((1.5 * VERTICAL_FREQUENCY) / (1 - (1.5 / NS)))
+        default_nt = math.ceil(
+            (1.5 * test_config.VERTICAL_FREQUENCY_LONG) / (1 - (1.5 / NS)),
+        )

        assert nt < default_nt  # sanity check

        with pytest.raises(ValueError, match='`nt`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nt=nt,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                nt=nt,
            )

    def test_nl_not_odd(self, vertical_no_demand):
        """`nl` must be odd and `>= frequency`."""
        nl = 200

-        assert nl > VERTICAL_FREQUENCY  # sanity check
+        assert nl > test_config.VERTICAL_FREQUENCY_LONG  # sanity check

        with pytest.raises(ValueError, match='`nl`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nl=nl,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                nl=nl,
            )

    def test_nl_at_least_the_frequency(self, vertical_no_demand):
        """`nl` must be odd and `>= frequency`."""
        nl = 77

-        assert nl < VERTICAL_FREQUENCY  # sanity check
+        assert nl < test_config.VERTICAL_FREQUENCY_LONG  # sanity check

        with pytest.raises(ValueError, match='`nl`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nl=nl,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                nl=nl,
            )

    def test_ds_not_zero_or_one(self, vertical_no_demand):
        """`ds` must be `0` or `1`."""
        with pytest.raises(ValueError, match='`ds`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, ds=2,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                ds=2,
            )

    def test_dt_not_zero_or_one(self, vertical_no_demand):
        """`dt` must be `0` or `1`."""
        with pytest.raises(ValueError, match='`dt`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, dt=2,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                dt=2,
            )

    def test_dl_not_zero_or_one(self, vertical_no_demand):
        """`dl` must be `0` or `1`."""
        with pytest.raises(ValueError, match='`dl`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, dl=2,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                dl=2,
            )

    @pytest.mark.parametrize('js', [-1, 0])
@ -104,7 +140,10 @@ class TestInvalidArguments:
        """`js` must be positive."""
        with pytest.raises(ValueError, match='`js`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, js=js,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                js=js,
            )

    @pytest.mark.parametrize('jt', [-1, 0])
@ -112,7 +151,10 @@ class TestInvalidArguments:
        """`jt` must be positive."""
        with pytest.raises(ValueError, match='`jt`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, jt=jt,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                jt=jt,
            )

    @pytest.mark.parametrize('jl', [-1, 0])
@ -120,7 +162,10 @@ class TestInvalidArguments:
        """`jl` must be positive."""
        with pytest.raises(ValueError, match='`jl`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, jl=jl,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                jl=jl,
            )

    @pytest.mark.parametrize('ni', [-1, 0])
@ -128,14 +173,20 @@ class TestInvalidArguments:
        """`ni` must be positive."""
        with pytest.raises(ValueError, match='`ni`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, ni=ni,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                ni=ni,
            )

    def test_no_not_non_negative(self, vertical_no_demand):
        """`no` must be non-negative."""
        with pytest.raises(ValueError, match='`no`'):
            decomposition.stl(
-                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, no=-1,
+                vertical_no_demand,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
+                ns=NS,
+                no=-1,
            )


@ -146,7 +197,7 @@ class TestValidArguments:
    def test_structure_of_returned_dataframe(self, vertical_no_demand):
        """`stl()` returns a `pd.DataFrame` with three columns."""
        result = decomposition.stl(
-            vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS,
+            vertical_no_demand, frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=NS,
        )

        assert isinstance(result, pd.DataFrame)
@ -173,7 +224,7 @@ class TestValidArguments:
        """
        decomposed = decomposition.stl(
            vertical_no_demand,
-            frequency=VERTICAL_FREQUENCY,
+            frequency=test_config.VERTICAL_FREQUENCY_LONG,
            ns=NS,
            nt=nt,
            nl=nl,
--- a/tests/forecasts/methods/test_predictions.py
+++ b/tests/forecasts/methods/test_predictions.py
@ -1,7 +1,4 @@
-"""Test the `arima.predict()` and `ets.predict()` functions.
-
-We consider both "classical" time series prediction models.
-"""
+"""Test all the `*.predict()` functions in the `methods` sub-package."""

 import datetime as dt

@ -9,10 +6,10 @@ import pandas as pd
 import pytest

 from tests import config as test_config
-from tests.forecasts.conftest import VERTICAL_FREQUENCY
 from urban_meal_delivery import config
 from urban_meal_delivery.forecasts.methods import arima
 from urban_meal_delivery.forecasts.methods import ets
+from urban_meal_delivery.forecasts.methods import extrapolate_season


@pytest.fixture
@ -60,7 +57,9 @@ def forecast_time_step():


@pytest.mark.r
-@pytest.mark.parametrize('func', [arima.predict, ets.predict])
+@pytest.mark.parametrize(
+    'func', [arima.predict, ets.predict, extrapolate_season.predict],
+)
 class TestMakePredictions:
    """Make predictions with `arima.predict()` and `ets.predict()`."""

@ -74,7 +73,7 @@ class TestMakePredictions:
            func(
                training_ts=vertical_no_demand,
                forecast_interval=forecast_interval,
-                frequency=VERTICAL_FREQUENCY,
+                frequency=test_config.VERTICAL_FREQUENCY_LONG,
            )

    def test_structure_of_returned_dataframe(
@ -84,7 +83,7 @@ class TestMakePredictions:
        result = func(
            training_ts=vertical_no_demand,
            forecast_interval=forecast_interval,
-            frequency=VERTICAL_FREQUENCY,
+            frequency=test_config.VERTICAL_FREQUENCY_LONG,
        )

        assert isinstance(result, pd.DataFrame)
@ -123,7 +122,7 @@ class TestMakePredictions:
        predictions = func(
            training_ts=vertical_no_demand,
            forecast_interval=forecast_interval,
-            frequency=VERTICAL_FREQUENCY,
+            frequency=test_config.VERTICAL_FREQUENCY_LONG,
        )

        result = predictions.sum().sum()
--- a/tests/forecasts/timify/conftest.py
+++ b/tests/forecasts/timify/conftest.py
@ -47,7 +47,10 @@ def order_totals(good_pixel_id):

@pytest.fixture
 def order_history(order_totals, grid):
-    """An `OrderHistory` object that does not need the database."""
+    """An `OrderHistory` object that does not need the database.
+
+    Uses the LONG_TIME_STEP as the length of a time step.
+    """
    oh = timify.OrderHistory(grid=grid, time_step=test_config.LONG_TIME_STEP)
    oh._data = order_totals