Add wrappers for R's "arima" and "ets" functions

2021-01-11 20:17:00 +01:00 · 2021-01-11 20:17:00 +01:00 · 64482f48d0
commit 64482f48d0
parent 98b6830b46
10 changed files with 441 additions and 88 deletions
--- a/tests/config.py
+++ b/tests/config.py
@ -14,10 +14,7 @@ NOON = 12
 # `START` and `END` constitute a 15-day time span.
 # That implies a maximum `train_horizon` of `2` as that needs full 7-day weeks.
 START = datetime.datetime(YEAR, MONTH, DAY, config.SERVICE_START, 0)
-_end_day = (START + datetime.timedelta(weeks=2)).date()
-END = datetime.datetime(
-    _end_day.year, _end_day.month, _end_day.day, config.SERVICE_END, 0,
-)
+END = datetime.datetime(YEAR, MONTH, 15, config.SERVICE_END, 0)

 # Default time steps, for example, for `OrderHistory` objects.
 LONG_TIME_STEP = 60
--- a/tests/forecasts/conftest.py
+++ b/tests/forecasts/conftest.py
@ -0,0 +1,76 @@
+"""Fixtures and globals for testing `urban_meal_delivery.forecasts`."""
+
+import datetime as dt
+
+import pandas as pd
+import pytest
+
+from tests import config as test_config
+from urban_meal_delivery import config
+
+
+# See remarks in `vertical_datetime_index` fixture.
+VERTICAL_FREQUENCY = 7 * 12
+
+# The default `ns` suggested for the STL method.
+NS = 7
+
+
+@pytest.fixture
+def horizontal_datetime_index():
+    """A `pd.Index` with `DateTime` values.
+
+    The times resemble a horizontal time series with a `frequency` of `7`.
+    All observations take place at `NOON`.
+    """
+    first_start_at = dt.datetime(
+        test_config.YEAR, test_config.MONTH, test_config.DAY, test_config.NOON, 0,
+    )
+
+    gen = (
+        start_at
+        for start_at in pd.date_range(first_start_at, test_config.END, freq='D')
+    )
+
+    index = pd.Index(gen)
+    index.name = 'start_at'
+
+    assert len(index) == 15  # sanity check
+
+    return index
+
+
+@pytest.fixture
+def horizontal_no_demand(horizontal_datetime_index):
+    """A horizontal time series of order totals when there was no demand."""
+    return pd.Series(0, index=horizontal_datetime_index, name='order_totals')
+
+
+@pytest.fixture
+def vertical_datetime_index():
+    """A `pd.Index` with `DateTime` values.
+
+    The times resemble a vertical time series with a
+    `frequency` of `7` times the number of daily time steps,
+    which is `12` for `LONG_TIME_STEP` values.
+    """
+    gen = (
+        start_at
+        for start_at in pd.date_range(
+            test_config.START, test_config.END, freq=f'{test_config.LONG_TIME_STEP}T',
+        )
+        if config.SERVICE_START <= start_at.hour < config.SERVICE_END
+    )
+
+    index = pd.Index(gen)
+    index.name = 'start_at'
+
+    assert len(index) == 15 * 12  # sanity check
+
+    return index
+
+
+@pytest.fixture
+def vertical_no_demand(vertical_datetime_index):
+    """A vertical time series of order totals when there was no demand."""
+    return pd.Series(0, index=vertical_datetime_index, name='order_totals')
--- a/tests/forecasts/test_decomposition.py
+++ b/tests/forecasts/test_decomposition.py
@ -5,157 +5,149 @@ import math
 import pandas as pd
 import pytest

-from tests import config as test_config
-from urban_meal_delivery import config
+from tests.forecasts.conftest import NS
+from tests.forecasts.conftest import VERTICAL_FREQUENCY
 from urban_meal_delivery.forecasts import decomposition


-# See remarks in `datetime_index` fixture.
-FREQUENCY = 7 * 12
-
-# The default `ns` suggested for the STL method.
-NS = 7
-
-
-@pytest.fixture
-def datetime_index():
-    """A `pd.Index` with `DateTime` values.
-
-    The times resemble a vertical time series with a
-    `frequency` of `7` times the number of daily time steps,
-    which is `12` for `LONG_TIME_STEP` values.
-    """
-    gen = (
-        start_at
-        for start_at in pd.date_range(
-            test_config.START, test_config.END, freq=f'{test_config.LONG_TIME_STEP}T',
-        )
-        if config.SERVICE_START <= start_at.hour < config.SERVICE_END
-    )
-
-    index = pd.Index(gen)
-    index.name = 'start_at'
-
-    return index
-
-
-@pytest.fixture
-def no_demand(datetime_index):
-    """A time series of order totals when there was no demand."""
-    return pd.Series(0, index=datetime_index, name='order_totals')
-
-
 class TestInvalidArguments:
    """Test `stl()` with invalid arguments."""

-    def test_no_nans_in_time_series(self, datetime_index):
+    def test_no_nans_in_time_series(self, vertical_datetime_index):
        """`stl()` requires a `time_series` without `NaN` values."""
-        time_series = pd.Series(dtype=float, index=datetime_index)
+        time_series = pd.Series(dtype=float, index=vertical_datetime_index)

        with pytest.raises(ValueError, match='`NaN` values'):
-            decomposition.stl(time_series, frequency=FREQUENCY, ns=99)
+            decomposition.stl(time_series, frequency=VERTICAL_FREQUENCY, ns=99)

-    def test_ns_not_odd(self, no_demand):
+    def test_ns_not_odd(self, vertical_no_demand):
        """`ns` must be odd and `>= 7`."""
        with pytest.raises(ValueError, match='`ns`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=8)
+            decomposition.stl(vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=8)

    @pytest.mark.parametrize('ns', [-99, -1, 1, 5])
-    def test_ns_smaller_than_seven(self, no_demand, ns):
+    def test_ns_smaller_than_seven(self, vertical_no_demand, ns):
        """`ns` must be odd and `>= 7`."""
        with pytest.raises(ValueError, match='`ns`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=ns)
+            decomposition.stl(vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=ns)

-    def test_nt_not_odd(self, no_demand):
+    def test_nt_not_odd(self, vertical_no_demand):
        """`nt` must be odd and `>= default_nt`."""
        nt = 200
-        default_nt = math.ceil((1.5 * FREQUENCY) / (1 - (1.5 / NS)))
+        default_nt = math.ceil((1.5 * VERTICAL_FREQUENCY) / (1 - (1.5 / NS)))

        assert nt > default_nt  # sanity check

        with pytest.raises(ValueError, match='`nt`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, nt=nt)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nt=nt,
+            )

    @pytest.mark.parametrize('nt', [-99, -1, 0, 1, 99, 159])
-    def test_nt_not_at_least_the_default(self, no_demand, nt):
+    def test_nt_not_at_least_the_default(self, vertical_no_demand, nt):
        """`nt` must be odd and `>= default_nt`."""
        # `default_nt` becomes 161.
-        default_nt = math.ceil((1.5 * FREQUENCY) / (1 - (1.5 / NS)))
+        default_nt = math.ceil((1.5 * VERTICAL_FREQUENCY) / (1 - (1.5 / NS)))

        assert nt < default_nt  # sanity check

        with pytest.raises(ValueError, match='`nt`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, nt=nt)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nt=nt,
+            )

-    def test_nl_not_odd(self, no_demand):
+    def test_nl_not_odd(self, vertical_no_demand):
        """`nl` must be odd and `>= frequency`."""
        nl = 200

-        assert nl > FREQUENCY  # sanity check
+        assert nl > VERTICAL_FREQUENCY  # sanity check

        with pytest.raises(ValueError, match='`nl`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, nl=nl)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nl=nl,
+            )

-    def test_nl_at_least_the_frequency(self, no_demand):
+    def test_nl_at_least_the_frequency(self, vertical_no_demand):
        """`nl` must be odd and `>= frequency`."""
        nl = 77

-        assert nl < FREQUENCY  # sanity check
+        assert nl < VERTICAL_FREQUENCY  # sanity check

        with pytest.raises(ValueError, match='`nl`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, nl=nl)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, nl=nl,
+            )

-    def test_ds_not_zero_or_one(self, no_demand):
+    def test_ds_not_zero_or_one(self, vertical_no_demand):
        """`ds` must be `0` or `1`."""
        with pytest.raises(ValueError, match='`ds`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, ds=2)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, ds=2,
+            )

-    def test_dt_not_zero_or_one(self, no_demand):
+    def test_dt_not_zero_or_one(self, vertical_no_demand):
        """`dt` must be `0` or `1`."""
        with pytest.raises(ValueError, match='`dt`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, dt=2)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, dt=2,
+            )

-    def test_dl_not_zero_or_one(self, no_demand):
+    def test_dl_not_zero_or_one(self, vertical_no_demand):
        """`dl` must be `0` or `1`."""
        with pytest.raises(ValueError, match='`dl`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, dl=2)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, dl=2,
+            )

    @pytest.mark.parametrize('js', [-1, 0])
-    def test_js_not_positive(self, no_demand, js):
+    def test_js_not_positive(self, vertical_no_demand, js):
        """`js` must be positive."""
        with pytest.raises(ValueError, match='`js`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, js=js)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, js=js,
+            )

    @pytest.mark.parametrize('jt', [-1, 0])
-    def test_jt_not_positive(self, no_demand, jt):
+    def test_jt_not_positive(self, vertical_no_demand, jt):
        """`jt` must be positive."""
        with pytest.raises(ValueError, match='`jt`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, jt=jt)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, jt=jt,
+            )

    @pytest.mark.parametrize('jl', [-1, 0])
-    def test_jl_not_positive(self, no_demand, jl):
+    def test_jl_not_positive(self, vertical_no_demand, jl):
        """`jl` must be positive."""
        with pytest.raises(ValueError, match='`jl`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, jl=jl)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, jl=jl,
+            )

    @pytest.mark.parametrize('ni', [-1, 0])
-    def test_ni_not_positive(self, no_demand, ni):
+    def test_ni_not_positive(self, vertical_no_demand, ni):
        """`ni` must be positive."""
        with pytest.raises(ValueError, match='`ni`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, ni=ni)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, ni=ni,
+            )

-    def test_no_not_non_negative(self, no_demand):
+    def test_no_not_non_negative(self, vertical_no_demand):
        """`no` must be non-negative."""
        with pytest.raises(ValueError, match='`no`'):
-            decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS, no=-1)
+            decomposition.stl(
+                vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS, no=-1,
+            )


+@pytest.mark.r
 class TestValidArguments:
    """Test `stl()` with valid arguments."""

-    def test_structure_of_returned_dataframe(self, no_demand):
+    def test_structure_of_returned_dataframe(self, vertical_no_demand):
        """`stl()` returns a `pd.DataFrame` with three columns."""
-        result = decomposition.stl(no_demand, frequency=FREQUENCY, ns=NS)
+        result = decomposition.stl(
+            vertical_no_demand, frequency=VERTICAL_FREQUENCY, ns=NS,
+        )

        assert isinstance(result, pd.DataFrame)
        assert list(result.columns) == ['seasonal', 'trend', 'residual']
@ -173,15 +165,15 @@ class TestValidArguments:
    @pytest.mark.parametrize('ni', [2, 3])
    @pytest.mark.parametrize('no', [0, 1])
    def test_decompose_time_series_with_no_demand(  # noqa:WPS211,WPS216
-        self, no_demand, nt, nl, ds, dt, dl, js, jt, jl, ni, no,  # noqa:WPS110
+        self, vertical_no_demand, nt, nl, ds, dt, dl, js, jt, jl, ni, no,  # noqa:WPS110
    ):
        """Decomposing a time series with no demand ...

        ... returns a `pd.DataFrame` with three columns holding only `0.0` values.
        """
        decomposed = decomposition.stl(
-            no_demand,
-            frequency=FREQUENCY,
+            vertical_no_demand,
+            frequency=VERTICAL_FREQUENCY,
            ns=NS,
            nt=nt,
            nl=nl,
--- a/tests/forecasts/test_methods.py
+++ b/tests/forecasts/test_methods.py
@ -0,0 +1,128 @@
+"""Test the `arima.predict()` and `ets.predict()` functions."""
+
+import datetime as dt
+
+import pandas as pd
+import pytest
+
+from tests import config as test_config
+from tests.forecasts.conftest import VERTICAL_FREQUENCY
+from urban_meal_delivery import config
+from urban_meal_delivery.forecasts.methods import arima
+from urban_meal_delivery.forecasts.methods import ets
+
+
+@pytest.fixture
+def forecast_interval():
+    """A `pd.Index` with `DateTime` values ...
+
+    ... that takes place one day after the `START`-`END` horizon and
+    resembles an entire day (`12` "start_at" values as we use `LONG_TIME_STEP`).
+    """
+    future_day = test_config.END.date() + dt.timedelta(days=1)
+    first_start_at = dt.datetime(
+        future_day.year, future_day.month, future_day.day, config.SERVICE_START, 0,
+    )
+    end_of_day = dt.datetime(
+        future_day.year, future_day.month, future_day.day, config.SERVICE_END, 0,
+    )
+
+    gen = (
+        start_at
+        for start_at in pd.date_range(
+            first_start_at, end_of_day, freq=f'{test_config.LONG_TIME_STEP}T',
+        )
+        if config.SERVICE_START <= start_at.hour < config.SERVICE_END
+    )
+
+    index = pd.Index(gen)
+    index.name = 'start_at'
+
+    return index
+
+
+@pytest.fixture
+def forecast_time_step():
+    """A `pd.Index` with one `DateTime` value, resembling `NOON`."""
+    future_day = test_config.END.date() + dt.timedelta(days=1)
+
+    start_at = dt.datetime(
+        future_day.year, future_day.month, future_day.day, test_config.NOON, 0,
+    )
+
+    index = pd.Index([start_at])
+    index.name = 'start_at'
+
+    return index
+
+
+@pytest.mark.r
+@pytest.mark.parametrize('func', [arima.predict, ets.predict])
+class TestMakePredictions:
+    """Make predictions with `arima.predict()` and `ets.predict()`."""
+
+    def test_training_data_contains_nan_values(
+        self, func, vertical_no_demand, forecast_interval,
+    ):
+        """`training_ts` must not contain `NaN` values."""
+        vertical_no_demand.iloc[0] = pd.NA
+
+        with pytest.raises(ValueError, match='must not contain `NaN`'):
+            func(
+                training_ts=vertical_no_demand,
+                forecast_interval=forecast_interval,
+                frequency=VERTICAL_FREQUENCY,
+            )
+
+    def test_structure_of_returned_dataframe(
+        self, func, vertical_no_demand, forecast_interval,
+    ):
+        """Both `.predict()` return a `pd.DataFrame` with five columns."""
+        result = func(
+            training_ts=vertical_no_demand,
+            forecast_interval=forecast_interval,
+            frequency=VERTICAL_FREQUENCY,
+        )
+
+        assert isinstance(result, pd.DataFrame)
+        assert list(result.columns) == [
+            'predictions',
+            'low_80',
+            'high_80',
+            'low_95',
+            'high_95',
+        ]
+
+    def test_predict_horizontal_time_series_with_no_demand(
+        self, func, horizontal_no_demand, forecast_time_step,
+    ):
+        """Predicting a horizontal time series with no demand ...
+
+        ... returns a `pd.DataFrame` with five columns holding only `0.0` values.
+        """
+        predictions = func(
+            training_ts=horizontal_no_demand,
+            forecast_interval=forecast_time_step,
+            frequency=7,
+        )
+
+        result = predictions.sum().sum()
+
+        assert result == 0
+
+    def test_predict_vertical_time_series_with_no_demand(
+        self, func, vertical_no_demand, forecast_interval,
+    ):
+        """Predicting a vertical time series with no demand ...
+
+        ... returns a `pd.DataFrame` with five columns holding only `0.0` values.
+        """
+        predictions = func(
+            training_ts=vertical_no_demand,
+            forecast_interval=forecast_interval,
+            frequency=VERTICAL_FREQUENCY,
+        )
+
+        result = predictions.sum().sum()
+
+        assert result == 0