diff --git a/setup.cfg b/setup.cfg index 83d92d5..86a0f8d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -150,6 +150,9 @@ per-file-ignores = src/urban_meal_delivery/forecasts/methods/decomposition.py: # The module is not too complex. WPS232, + src/urban_meal_delivery/forecasts/methods/extrapolate_season.py: + # The module is not too complex. + WPS232, src/urban_meal_delivery/forecasts/timify.py: # No SQL injection as the inputs come from a safe source. S608, diff --git a/src/urban_meal_delivery/forecasts/methods/__init__.py b/src/urban_meal_delivery/forecasts/methods/__init__.py index 9b88926..5690e79 100644 --- a/src/urban_meal_delivery/forecasts/methods/__init__.py +++ b/src/urban_meal_delivery/forecasts/methods/__init__.py @@ -3,3 +3,4 @@ from urban_meal_delivery.forecasts.methods import arima from urban_meal_delivery.forecasts.methods import decomposition from urban_meal_delivery.forecasts.methods import ets +from urban_meal_delivery.forecasts.methods import extrapolate_season diff --git a/src/urban_meal_delivery/forecasts/methods/extrapolate_season.py b/src/urban_meal_delivery/forecasts/methods/extrapolate_season.py new file mode 100644 index 0000000..dfbc9c4 --- /dev/null +++ b/src/urban_meal_delivery/forecasts/methods/extrapolate_season.py @@ -0,0 +1,72 @@ +"""Forecast by linear extrapolation of a seasonal component.""" + +import pandas as pd +from statsmodels.tsa import api as ts_stats + + +def predict( + training_ts: pd.Series, forecast_interval: pd.DatetimeIndex, *, frequency: int, +) -> pd.DataFrame: + """Extrapolate a seasonal component with a linear model. + + A naive forecast for each time unit of the day is calculated by linear + extrapolation from all observations of the same time of day and on the same + day of the week (i.e., same seasonal lag). + + Note: The function does not check if the `forecast_interval` + extends the `training_ts`'s interval without a gap! + + Args: + training_ts: past observations to be fitted; + assumed to be a seasonal component after time series decomposition + forecast_interval: interval into which the `training_ts` is forecast; + its length becomes the numbers of time steps to be forecast + frequency: frequency of the observations in the `training_ts` + + Returns: + predictions: point forecasts (i.e., the "prediction" column); + includes the four "low/high80/95" columns for the confidence intervals + that only contain `NaN` values as this method does not make + any statistical assumptions about the time series process + + Raises: + ValueError: if `training_ts` contains `NaN` values or some predictions + could not be made for time steps in the `forecast_interval` + """ + if training_ts.isnull().any(): + raise ValueError('`training_ts` must not contain `NaN` values') + + extrapolated_ts = pd.Series(index=forecast_interval, dtype=float) + seasonal_lag = frequency * (training_ts.index[1] - training_ts.index[0]) + + for lag in range(frequency): + # Obtain all `observations` of the same seasonal lag and + # fit a straight line through them (= `trend`). + observations = training_ts[slice(lag, 999_999_999, frequency)] + trend = observations - ts_stats.detrend(observations) + + # Create a point forecast by linear extrapolation + # for one or even more time steps ahead. + slope = trend[-1] - trend[-2] + prediction = trend[-1] + slope + idx = observations.index.max() + seasonal_lag + while idx <= forecast_interval.max(): + if idx in forecast_interval: + extrapolated_ts.loc[idx] = prediction + prediction += slope + idx += seasonal_lag + + # Sanity check. + if extrapolated_ts.isnull().any(): # pragma: no cover + raise ValueError('missing predictions in the `forecast_interval`') + + return pd.DataFrame( + data={ + 'prediction': extrapolated_ts.round(5), + 'low80': float('NaN'), + 'high80': float('NaN'), + 'low95': float('NaN'), + 'high95': float('NaN'), + }, + index=forecast_interval, + ) diff --git a/tests/config.py b/tests/config.py index fd3e115..13fec36 100644 --- a/tests/config.py +++ b/tests/config.py @@ -16,10 +16,15 @@ NOON = 12 START = datetime.datetime(YEAR, MONTH, DAY, config.SERVICE_START, 0) END = datetime.datetime(YEAR, MONTH, 15, config.SERVICE_END, 0) -# Default time steps, for example, for `OrderHistory` objects. +# Default time steps (in minutes), for example, for `OrderHistory` objects. LONG_TIME_STEP = 60 SHORT_TIME_STEP = 30 TIME_STEPS = (SHORT_TIME_STEP, LONG_TIME_STEP) +# The `frequency` of vertical time series is the number of days in a week, 7, +# times the number of time steps per day. With 12 operating hours (11 am - 11 pm) +# the `frequency`s are 84 and 168 for the `LONG/SHORT_TIME_STEP`s. +VERTICAL_FREQUENCY_LONG = 7 * 12 +VERTICAL_FREQUENCY_SHORT = 7 * 24 # Default training horizons, for example, for # `OrderHistory.make_horizontal_time_series()`. diff --git a/tests/forecasts/__init__.py b/tests/forecasts/__init__.py index 50eaeb3..5d46e8b 100644 --- a/tests/forecasts/__init__.py +++ b/tests/forecasts/__init__.py @@ -1 +1 @@ -"""Test the forecasting-related functionality.""" +"""Tests for the `urban_meal_delivery.forecasts` sub-package.""" diff --git a/tests/forecasts/conftest.py b/tests/forecasts/conftest.py index c7fcf3b..ede73ba 100644 --- a/tests/forecasts/conftest.py +++ b/tests/forecasts/conftest.py @@ -9,13 +9,6 @@ from tests import config as test_config from urban_meal_delivery import config -# See remarks in `vertical_datetime_index` fixture. -VERTICAL_FREQUENCY = 7 * 12 - -# The default `ns` suggested for the STL method. -NS = 7 - - @pytest.fixture def horizontal_datetime_index(): """A `pd.Index` with `DateTime` values. diff --git a/tests/forecasts/methods/test_decomposition.py b/tests/forecasts/methods/test_decomposition.py index 0687d9c..c103c3f 100644 --- a/tests/forecasts/methods/test_decomposition.py +++ b/tests/forecasts/methods/test_decomposition.py @@ -5,11 +5,14 @@ import math import pandas as pd import pytest -from tests.forecasts.conftest import NS -from tests.forecasts.conftest import VERTICAL_FREQUENCY +from tests import config as test_config from urban_meal_delivery.forecasts.methods import decomposition +# The "periodic" `ns` suggested for the STL method. +NS = 999 + + class TestInvalidArguments: """Test `stl()` with invalid arguments.""" @@ -18,85 +21,118 @@ class TestInvalidArguments: time_series = pd.Series(dtype=float, index=vertical_datetime_index) with pytest.raises(ValueError, match='`NaN` values'): - decomposition.stl(time_series, frequency=VERTICAL_FREQUENCY, ns=99) + decomposition.stl( + time_series, frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=NS, + ) def test_ns_not_odd(self, vertical_no_demand): """`ns` must be odd and `>= 7`.""" with pytest.raises(ValueError, match='`ns`'): - decomposition.stl(vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=8) + decomposition.stl( + vertical_no_demand, frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=8, + ) @pytest.mark.parametrize('ns', [-99, -1, 1, 5]) def test_ns_smaller_than_seven(self, vertical_no_demand, ns): """`ns` must be odd and `>= 7`.""" with pytest.raises(ValueError, match='`ns`'): - decomposition.stl(vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=ns) + decomposition.stl( + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=ns, + ) def test_nt_not_odd(self, vertical_no_demand): """`nt` must be odd and `>= default_nt`.""" nt = 200 - default_nt = math.ceil((1.5 * VERTICAL_FREQUENCY) / (1 - (1.5 / NS))) + default_nt = math.ceil( + (1.5 * test_config.VERTICAL_FREQUENCY_LONG) / (1 - (1.5 / NS)), + ) assert nt > default_nt # sanity check with pytest.raises(ValueError, match='`nt`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nt=nt, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + nt=nt, ) - @pytest.mark.parametrize('nt', [-99, -1, 0, 1, 99, 159]) + @pytest.mark.parametrize('nt', [-99, -1, 0, 1, 99, 125]) def test_nt_not_at_least_the_default(self, vertical_no_demand, nt): """`nt` must be odd and `>= default_nt`.""" # `default_nt` becomes 161. - default_nt = math.ceil((1.5 * VERTICAL_FREQUENCY) / (1 - (1.5 / NS))) + default_nt = math.ceil( + (1.5 * test_config.VERTICAL_FREQUENCY_LONG) / (1 - (1.5 / NS)), + ) assert nt < default_nt # sanity check with pytest.raises(ValueError, match='`nt`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nt=nt, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + nt=nt, ) def test_nl_not_odd(self, vertical_no_demand): """`nl` must be odd and `>= frequency`.""" nl = 200 - assert nl > VERTICAL_FREQUENCY # sanity check + assert nl > test_config.VERTICAL_FREQUENCY_LONG # sanity check with pytest.raises(ValueError, match='`nl`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nl=nl, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + nl=nl, ) def test_nl_at_least_the_frequency(self, vertical_no_demand): """`nl` must be odd and `>= frequency`.""" nl = 77 - assert nl < VERTICAL_FREQUENCY # sanity check + assert nl < test_config.VERTICAL_FREQUENCY_LONG # sanity check with pytest.raises(ValueError, match='`nl`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nl=nl, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + nl=nl, ) def test_ds_not_zero_or_one(self, vertical_no_demand): """`ds` must be `0` or `1`.""" with pytest.raises(ValueError, match='`ds`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, ds=2, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + ds=2, ) def test_dt_not_zero_or_one(self, vertical_no_demand): """`dt` must be `0` or `1`.""" with pytest.raises(ValueError, match='`dt`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, dt=2, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + dt=2, ) def test_dl_not_zero_or_one(self, vertical_no_demand): """`dl` must be `0` or `1`.""" with pytest.raises(ValueError, match='`dl`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, dl=2, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + dl=2, ) @pytest.mark.parametrize('js', [-1, 0]) @@ -104,7 +140,10 @@ class TestInvalidArguments: """`js` must be positive.""" with pytest.raises(ValueError, match='`js`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, js=js, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + js=js, ) @pytest.mark.parametrize('jt', [-1, 0]) @@ -112,7 +151,10 @@ class TestInvalidArguments: """`jt` must be positive.""" with pytest.raises(ValueError, match='`jt`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, jt=jt, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + jt=jt, ) @pytest.mark.parametrize('jl', [-1, 0]) @@ -120,7 +162,10 @@ class TestInvalidArguments: """`jl` must be positive.""" with pytest.raises(ValueError, match='`jl`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, jl=jl, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + jl=jl, ) @pytest.mark.parametrize('ni', [-1, 0]) @@ -128,14 +173,20 @@ class TestInvalidArguments: """`ni` must be positive.""" with pytest.raises(ValueError, match='`ni`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, ni=ni, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + ni=ni, ) def test_no_not_non_negative(self, vertical_no_demand): """`no` must be non-negative.""" with pytest.raises(ValueError, match='`no`'): decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, no=-1, + vertical_no_demand, + frequency=test_config.VERTICAL_FREQUENCY_LONG, + ns=NS, + no=-1, ) @@ -146,7 +197,7 @@ class TestValidArguments: def test_structure_of_returned_dataframe(self, vertical_no_demand): """`stl()` returns a `pd.DataFrame` with three columns.""" result = decomposition.stl( - vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, + vertical_no_demand, frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=NS, ) assert isinstance(result, pd.DataFrame) @@ -173,7 +224,7 @@ class TestValidArguments: """ decomposed = decomposition.stl( vertical_no_demand, - frequency=VERTICAL_FREQUENCY, + frequency=test_config.VERTICAL_FREQUENCY_LONG, ns=NS, nt=nt, nl=nl, diff --git a/tests/forecasts/methods/test_ts_methods.py b/tests/forecasts/methods/test_predictions.py similarity index 89% rename from tests/forecasts/methods/test_ts_methods.py rename to tests/forecasts/methods/test_predictions.py index 11691c7..8155d62 100644 --- a/tests/forecasts/methods/test_ts_methods.py +++ b/tests/forecasts/methods/test_predictions.py @@ -1,7 +1,4 @@ -"""Test the `arima.predict()` and `ets.predict()` functions. - -We consider both "classical" time series prediction models. -""" +"""Test all the `*.predict()` functions in the `methods` sub-package.""" import datetime as dt @@ -9,10 +6,10 @@ import pandas as pd import pytest from tests import config as test_config -from tests.forecasts.conftest import VERTICAL_FREQUENCY from urban_meal_delivery import config from urban_meal_delivery.forecasts.methods import arima from urban_meal_delivery.forecasts.methods import ets +from urban_meal_delivery.forecasts.methods import extrapolate_season @pytest.fixture @@ -60,7 +57,9 @@ def forecast_time_step(): @pytest.mark.r -@pytest.mark.parametrize('func', [arima.predict, ets.predict]) +@pytest.mark.parametrize( + 'func', [arima.predict, ets.predict, extrapolate_season.predict], +) class TestMakePredictions: """Make predictions with `arima.predict()` and `ets.predict()`.""" @@ -74,7 +73,7 @@ class TestMakePredictions: func( training_ts=vertical_no_demand, forecast_interval=forecast_interval, - frequency=VERTICAL_FREQUENCY, + frequency=test_config.VERTICAL_FREQUENCY_LONG, ) def test_structure_of_returned_dataframe( @@ -84,7 +83,7 @@ class TestMakePredictions: result = func( training_ts=vertical_no_demand, forecast_interval=forecast_interval, - frequency=VERTICAL_FREQUENCY, + frequency=test_config.VERTICAL_FREQUENCY_LONG, ) assert isinstance(result, pd.DataFrame) @@ -123,7 +122,7 @@ class TestMakePredictions: predictions = func( training_ts=vertical_no_demand, forecast_interval=forecast_interval, - frequency=VERTICAL_FREQUENCY, + frequency=test_config.VERTICAL_FREQUENCY_LONG, ) result = predictions.sum().sum() diff --git a/tests/forecasts/timify/conftest.py b/tests/forecasts/timify/conftest.py index 6143cfe..cfb5fc7 100644 --- a/tests/forecasts/timify/conftest.py +++ b/tests/forecasts/timify/conftest.py @@ -47,7 +47,10 @@ def order_totals(good_pixel_id): @pytest.fixture def order_history(order_totals, grid): - """An `OrderHistory` object that does not need the database.""" + """An `OrderHistory` object that does not need the database. + + Uses the LONG_TIME_STEP as the length of a time step. + """ oh = timify.OrderHistory(grid=grid, time_step=test_config.LONG_TIME_STEP) oh._data = order_totals