diff --git a/src/urban_meal_delivery/forecasts/__init__.py b/src/urban_meal_delivery/forecasts/__init__.py index 5ecdd1e..2dcd196 100644 --- a/src/urban_meal_delivery/forecasts/__init__.py +++ b/src/urban_meal_delivery/forecasts/__init__.py @@ -1,4 +1,29 @@ -"""Demand forecasting utilities.""" +"""Demand forecasting utilities. + +This sub-package is divided into further sub-packages and modules as follows: + +`methods` contains various time series related statistical methods, implemented +as plain `function` objects that are used to predict into the future given a +time series of historic order counts. The methods are context-agnostic, meaning +that they only take and return `pd.Series/DataFrame`s holding numbers and +are not concerned with how these numbers were generated or what they mean. +Some functions, like `arima.predict()` or `ets.predict()` wrap functions called +in R using the `rpy2` library. Others, like `extrapolate_season.predict()`, are +written in plain Python. + +`timify` defines an `OrderHistory` class that abstracts away the communication +with the database and provides `pd.Series` objects with the order counts that +are fed into the `methods`. In particular, it uses SQL statements behind the +scenes to calculate the historic order counts on a per-`Pixel` level. Once the +data is loaded from the database, an `OrderHistory` instance provides various +ways to slice out, or generate, different kinds of order time series (e.g., +"horizontal" vs. "vertical" time series). + +`models` defines various forecasting `*Model`s that combine a given kind of +time series with one of the forecasting `methods`. For example, the ETS method +applied to a horizontal time series is implemented in the `HorizontalETSModel`. +""" from urban_meal_delivery.forecasts import methods +from urban_meal_delivery.forecasts import models from urban_meal_delivery.forecasts import timify diff --git a/src/urban_meal_delivery/forecasts/models/__init__.py b/src/urban_meal_delivery/forecasts/models/__init__.py new file mode 100644 index 0000000..9d33f71 --- /dev/null +++ b/src/urban_meal_delivery/forecasts/models/__init__.py @@ -0,0 +1,34 @@ +"""Define the forecasting `*Model`s used in this project. + +`*Model`s are different from plain forecasting `methods` in that they are tied +to a given kind of historic order time series, as provided by the `OrderHistory` +class in the `timify` module. For example, the ARIMA model applied to a vertical +time series becomes the `VerticalARIMAModel`. + +An overview of the `*Model`s used for tactical forecasting can be found in section +"3.6 Forecasting Models" in the paper "Real-time Demand Forecasting for an Urban +Delivery Platform" that is part of the `urban-meal-delivery` research project. + +For the paper check: + https://github.com/webartifex/urban-meal-delivery-demand-forecasting/blob/main/paper.pdf + https://www.sciencedirect.com/science/article/pii/S1366554520307936 + +This sub-package is organized as follows. The `base` module defines an abstract +`ForecastingModelABC` class that unifies how the concrete `*Model`s work. +While the abstact `.predict()` method returns a `pd.DataFrame` (= basically, +the result of one of the forecasting `methods`, the concrete `.make_forecast()` +method converts the results into `Forecast` (=ORM) objects. +Also, `.make_forecast()` implements a caching strategy where already made +`Forecast`s are loaded from the database instead of calculating them again, +which could be a heavier computation. + +The `tactical` sub-package contains all the `*Model`s used to implement the +UDP's predictive routing strategy. + +A future `planning` sub-package will contain the `*Model`s used to plan the +`Courier`'s shifts a week ahead. +""" # noqa:RST215 + +from urban_meal_delivery.forecasts.models.tactical.horizontal import HorizontalETSModel +from urban_meal_delivery.forecasts.models.tactical.realtime import RealtimeARIMAModel +from urban_meal_delivery.forecasts.models.tactical.vertical import VerticalARIMAModel diff --git a/src/urban_meal_delivery/forecasts/models/base.py b/src/urban_meal_delivery/forecasts/models/base.py new file mode 100644 index 0000000..8be733a --- /dev/null +++ b/src/urban_meal_delivery/forecasts/models/base.py @@ -0,0 +1,116 @@ +"""The abstract blueprint for a forecasting `*Model`.""" + +import abc +import datetime as dt + +import pandas as pd + +from urban_meal_delivery import db +from urban_meal_delivery.forecasts import timify + + +class ForecastingModelABC(abc.ABC): + """An abstract interface of a forecasting `*Model`.""" + + def __init__(self, order_history: timify.OrderHistory) -> None: + """Initialize a new forecasting model. + + Args: + order_history: an abstraction providing the time series data + """ + self._order_history = order_history + + @property + @abc.abstractmethod + def name(self) -> str: + """The name of the model. + + Used to identify `Forecast`s of the same `*Model` in the database. + So, these must be chosen carefully and must not be changed later on! + + Example: "hets" or "varima" for tactical demand forecasting + """ + + @abc.abstractmethod + def predict( + self, pixel: db.Pixel, predict_at: dt.datetime, train_horizon: int, + ) -> pd.DataFrame: + """Concrete implementation of how a `*Model` makes a prediction. + + This method is called by the unified `*Model.make_forecast()` method, + which implements the caching logic with the database. + + Args: + pixel: pixel in which the prediction is made + predict_at: time step (i.e., "start_at") to make the prediction for + train_horizon: weeks of historic data used to predict `predict_at` + + Returns: + actuals, predictions, and possibly 80%/95% confidence intervals; + includes a row for the time step starting at `predict_at` and + may contain further rows for other time steps on the same day + """ # noqa:DAR202 + + def make_forecast( + self, pixel: db.Pixel, predict_at: dt.datetime, train_horizon: int, + ) -> db.Forecast: + """Make a forecast for the time step starting at `predict_at`. + + Important: This method uses a unified `predict_at` argument. + Some `*Model`s, in particular vertical ones, are only trained once per + day and then make a prediction for all time steps on that day, and + therefore, work with a `predict_day` argument instead of `predict_at` + behind the scenes. Then, all `Forecast`s are stored into the database + and only the one starting at `predict_at` is returned. + + Args: + pixel: pixel in which the `Forecast` is made + predict_at: time step (i.e., "start_at") to make the `Forecast` for + train_horizon: weeks of historic data used to forecast `predict_at` + + Returns: + actual, prediction, and possibly 80%/95% confidence intervals + for the time step starting at `predict_at` + + # noqa:DAR401 RuntimeError + """ + if ( # noqa:WPS337 + cached_forecast := db.session.query(db.Forecast) # noqa:ECE001,WPS221 + .filter_by(pixel=pixel) + .filter_by(start_at=predict_at) + .filter_by(time_step=self._order_history.time_step) + .filter_by(training_horizon=train_horizon) + .filter_by(model=self.name) + .first() + ) : + return cached_forecast + + # Horizontal and real-time `*Model`s return a `pd.DataFrame` with one + # row corresponding to the time step starting at `predict_at` whereas + # vertical models return several rows, covering all time steps of a day. + predictions = self.predict(pixel, predict_at, train_horizon) + + # Convert the `predictions` into a `list` of `Forecast` objects. + forecasts = db.Forecast.from_dataframe( + pixel=pixel, + time_step=self._order_history.time_step, + training_horizon=train_horizon, + model=self.name, + data=predictions, + ) + + # We persist all `Forecast`s into the database to + # not have to run the same model training again. + db.session.add_all(forecasts) + db.session.commit() + + # The one `Forecast` object asked for must be in `forecasts` + # if the concrete `*Model.predict()` method works correctly; ... + for forecast in forecasts: + if forecast.start_at == predict_at: + return forecast + + # ..., however, we put in a loud error, just in case. + raise RuntimeError( # pragma: no cover + '`Forecast` for `predict_at` was not returned by `*Model.predict()`', + ) diff --git a/src/urban_meal_delivery/forecasts/models/tactical/__init__.py b/src/urban_meal_delivery/forecasts/models/tactical/__init__.py new file mode 100644 index 0000000..df70622 --- /dev/null +++ b/src/urban_meal_delivery/forecasts/models/tactical/__init__.py @@ -0,0 +1,16 @@ +"""Forecasting `*Model`s to predict demand for tactical purposes. + +The `*Model`s in this module predict only a small number (e.g., one) +of time steps into the near future and are used to implement the UDP's +predictive routing strategies. + +They are classified into "horizontal", "vertical", and "real-time" models +with respect to what historic data they are trained on and how often they +are re-trained on the day to be predicted. For the details, check section +"3.6 Forecasting Models" in the paper "Real-time Demand Forecasting for an +Urban Delivery Platform". + +For the paper check: + https://github.com/webartifex/urban-meal-delivery-demand-forecasting/blob/main/paper.pdf + https://www.sciencedirect.com/science/article/pii/S1366554520307936 +""" # noqa:RST215 diff --git a/src/urban_meal_delivery/forecasts/models/tactical/horizontal.py b/src/urban_meal_delivery/forecasts/models/tactical/horizontal.py new file mode 100644 index 0000000..53e85be --- /dev/null +++ b/src/urban_meal_delivery/forecasts/models/tactical/horizontal.py @@ -0,0 +1,67 @@ +"""Horizontal forecasting `*Model`s to predict demand for tactical purposes. + +Horizontal `*Model`s take the historic order counts only from time steps +corresponding to the same time of day as the one to be predicted (i.e., the +one starting at `predict_at`). Then, they make a prediction for only one day +into the future. Thus, the training time series have a `frequency` of `7`, the +number of days in a week. +""" # noqa:RST215 + +import datetime as dt + +import pandas as pd + +from urban_meal_delivery import db +from urban_meal_delivery.forecasts import methods +from urban_meal_delivery.forecasts.models import base + + +class HorizontalETSModel(base.ForecastingModelABC): + """The ETS model applied on a horizontal time series.""" + + name = 'hets' + + def predict( + self, pixel: db.Pixel, predict_at: dt.datetime, train_horizon: int, + ) -> pd.DataFrame: + """Predict demand for a time step. + + Args: + pixel: pixel in which the prediction is made + predict_at: time step (i.e., "start_at") to make the prediction for + train_horizon: weeks of historic data used to predict `predict_at` + + Returns: + actual order counts (i.e., the "actual" column), + point forecasts (i.e., the "prediction" column), and + confidence intervals (i.e, the four "low/high/80/95" columns); + contains one row for the `predict_at` time step + + # noqa:DAR401 RuntimeError + """ + # Generate the historic (and horizontal) order time series. + training_ts, frequency, actuals_ts = self._order_history.make_horizontal_ts( + pixel_id=pixel.id, predict_at=predict_at, train_horizon=train_horizon, + ) + + # Sanity check. + if frequency != 7: # pragma: no cover + raise RuntimeError('`frequency` should be `7`') + + # Make `predictions` with the seasonal ETS method ("ZZZ" model). + predictions = methods.ets.predict( + training_ts=training_ts, + forecast_interval=actuals_ts.index, + frequency=frequency, # `== 7`, the number of weekdays + seasonal_fit=True, # because there was no decomposition before + ) + + predictions.insert(loc=0, column='actual', value=actuals_ts) + + # Sanity checks. + if predictions.isnull().any().any(): # pragma: no cover + raise RuntimeError('missing predictions in hets model') + if predict_at not in predictions.index: # pragma: no cover + raise RuntimeError('missing prediction for `predict_at`') + + return predictions diff --git a/src/urban_meal_delivery/forecasts/models/tactical/realtime.py b/src/urban_meal_delivery/forecasts/models/tactical/realtime.py new file mode 100644 index 0000000..bf30ee5 --- /dev/null +++ b/src/urban_meal_delivery/forecasts/models/tactical/realtime.py @@ -0,0 +1,117 @@ +"""Real-time forecasting `*Model`s to predict demand for tactical purposes. + +Real-time `*Model`s take order counts of all time steps in the training data +and make a prediction for only one time step on the day to be predicted (i.e., +the one starting at `predict_at`). Thus, the training time series have a +`frequency` of the number of weekdays, `7`, times the number of time steps on a +day. For example, for 60-minute time steps, the `frequency` becomes `7 * 12` +(= operating hours from 11 am to 11 pm), which is `84`. Real-time `*Model`s +train the forecasting `methods` on a seasonally decomposed time series internally. +""" # noqa:RST215 + +import datetime as dt + +import pandas as pd + +from urban_meal_delivery import db +from urban_meal_delivery.forecasts import methods +from urban_meal_delivery.forecasts.models import base + + +class RealtimeARIMAModel(base.ForecastingModelABC): + """The ARIMA model applied on a real-time time series.""" + + name = 'rtarima' + + def predict( + self, pixel: db.Pixel, predict_at: dt.datetime, train_horizon: int, + ) -> pd.DataFrame: + """Predict demand for a time step. + + Args: + pixel: pixel in which the prediction is made + predict_at: time step (i.e., "start_at") to make the prediction for + train_horizon: weeks of historic data used to predict `predict_at` + + Returns: + actual order counts (i.e., the "actual" column), + point forecasts (i.e., the "prediction" column), and + confidence intervals (i.e, the four "low/high/80/95" columns); + contains one row for the `predict_at` time step + + # noqa:DAR401 RuntimeError + """ + # Generate the historic (and real-time) order time series. + training_ts, frequency, actuals_ts = self._order_history.make_realtime_ts( + pixel_id=pixel.id, predict_at=predict_at, train_horizon=train_horizon, + ) + + # Decompose the `training_ts` to make predictions for the seasonal + # component and the seasonally adjusted observations separately. + decomposed_training_ts = methods.decomposition.stl( + time_series=training_ts, + frequency=frequency, + # "Periodic" `ns` parameter => same seasonal component value + # for observations of the same lag. + ns=999, + ) + + # Make predictions for the seasonal component by linear extrapolation. + seasonal_predictions = methods.extrapolate_season.predict( + training_ts=decomposed_training_ts['seasonal'], + forecast_interval=actuals_ts.index, + frequency=frequency, + ) + + # Make predictions with the ARIMA model on the seasonally adjusted time series. + seasonally_adjusted_predictions = methods.arima.predict( + training_ts=( + decomposed_training_ts['trend'] + decomposed_training_ts['residual'] + ), + forecast_interval=actuals_ts.index, + # Because the seasonality was taken out before, + # the `training_ts` has, by definition, a `frequency` of `1`. + frequency=1, + seasonal_fit=False, + ) + + # The overall `predictions` are the sum of the separate predictions above. + # As the linear extrapolation of the seasonal component has no + # confidence interval, we put the one from the ARIMA model around + # the extrapolated seasonal component. + predictions = pd.DataFrame( + data={ + 'actual': actuals_ts, + 'prediction': ( + seasonal_predictions['prediction'] # noqa:WPS204 + + seasonally_adjusted_predictions['prediction'] + ), + 'low80': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['low80'] + ), + 'high80': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['high80'] + ), + 'low95': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['low95'] + ), + 'high95': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['high95'] + ), + }, + index=actuals_ts.index, + ) + + # Sanity checks. + if len(predictions) != 1: # pragma: no cover + raise RuntimeError('real-time models should predict exactly one time step') + if predictions.isnull().any().any(): # pragma: no cover + raise RuntimeError('missing predictions in rtarima model') + if predict_at not in predictions.index: # pragma: no cover + raise RuntimeError('missing prediction for `predict_at`') + + return predictions diff --git a/src/urban_meal_delivery/forecasts/models/tactical/vertical.py b/src/urban_meal_delivery/forecasts/models/tactical/vertical.py new file mode 100644 index 0000000..caf4317 --- /dev/null +++ b/src/urban_meal_delivery/forecasts/models/tactical/vertical.py @@ -0,0 +1,119 @@ +"""Vertical forecasting `*Model`s to predict demand for tactical purposes. + +Vertical `*Model`s take order counts of all time steps in the training data +and make a prediction for all time steps on the day to be predicted at once. +Thus, the training time series have a `frequency` of the number of weekdays, +`7`, times the number of time steps on a day. For example, with 60-minute time +steps, the `frequency` becomes `7 * 12` (= operating hours from 11 am to 11 pm), +which is `84`. Vertical `*Model`s train the forecasting `methods` on a seasonally +decomposed time series internally. +""" # noqa:RST215 + +import datetime as dt + +import pandas as pd + +from urban_meal_delivery import db +from urban_meal_delivery.forecasts import methods +from urban_meal_delivery.forecasts.models import base + + +class VerticalARIMAModel(base.ForecastingModelABC): + """The ARIMA model applied on a vertical time series.""" + + name = 'varima' + + def predict( + self, pixel: db.Pixel, predict_at: dt.datetime, train_horizon: int, + ) -> pd.DataFrame: + """Predict demand for a time step. + + Args: + pixel: pixel in which the prediction is made + predict_at: time step (i.e., "start_at") to make the prediction for + train_horizon: weeks of historic data used to predict `predict_at` + + Returns: + actual order counts (i.e., the "actual" column), + point forecasts (i.e., the "prediction" column), and + confidence intervals (i.e, the four "low/high/80/95" columns); + contains several rows, including one for the `predict_at` time step + + # noqa:DAR401 RuntimeError + """ + # Generate the historic (and vertical) order time series. + training_ts, frequency, actuals_ts = self._order_history.make_vertical_ts( + pixel_id=pixel.id, + predict_day=predict_at.date(), + train_horizon=train_horizon, + ) + + # Decompose the `training_ts` to make predictions for the seasonal + # component and the seasonally adjusted observations separately. + decomposed_training_ts = methods.decomposition.stl( + time_series=training_ts, + frequency=frequency, + # "Periodic" `ns` parameter => same seasonal component value + # for observations of the same lag. + ns=999, + ) + + # Make predictions for the seasonal component by linear extrapolation. + seasonal_predictions = methods.extrapolate_season.predict( + training_ts=decomposed_training_ts['seasonal'], + forecast_interval=actuals_ts.index, + frequency=frequency, + ) + + # Make predictions with the ARIMA model on the seasonally adjusted time series. + seasonally_adjusted_predictions = methods.arima.predict( + training_ts=( + decomposed_training_ts['trend'] + decomposed_training_ts['residual'] + ), + forecast_interval=actuals_ts.index, + # Because the seasonality was taken out before, + # the `training_ts` has, by definition, a `frequency` of `1`. + frequency=1, + seasonal_fit=False, + ) + + # The overall `predictions` are the sum of the separate predictions above. + # As the linear extrapolation of the seasonal component has no + # confidence interval, we put the one from the ARIMA model around + # the extrapolated seasonal component. + predictions = pd.DataFrame( + data={ + 'actual': actuals_ts, + 'prediction': ( + seasonal_predictions['prediction'] # noqa:WPS204 + + seasonally_adjusted_predictions['prediction'] + ), + 'low80': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['low80'] + ), + 'high80': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['high80'] + ), + 'low95': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['low95'] + ), + 'high95': ( + seasonal_predictions['prediction'] + + seasonally_adjusted_predictions['high95'] + ), + }, + index=actuals_ts.index, + ) + + # Sanity checks. + if len(predictions) <= 1: # pragma: no cover + raise RuntimeError('vertical models should predict several time steps') + if predictions.isnull().any().any(): # pragma: no cover + raise RuntimeError('missing predictions in varima model') + if predict_at not in predictions.index: # pragma: no cover + raise RuntimeError('missing prediction for `predict_at`') + + return predictions diff --git a/tests/config.py b/tests/config.py index 13fec36..7b1ec29 100644 --- a/tests/config.py +++ b/tests/config.py @@ -11,10 +11,10 @@ YEAR, MONTH, DAY = 2016, 7, 1 # The hour when most test cases take place. NOON = 12 -# `START` and `END` constitute a 15-day time span. -# That implies a maximum `train_horizon` of `2` as that needs full 7-day weeks. +# `START` and `END` constitute a 22-day time span. +# That implies a maximum `train_horizon` of `3` as that needs full 7-day weeks. START = datetime.datetime(YEAR, MONTH, DAY, config.SERVICE_START, 0) -END = datetime.datetime(YEAR, MONTH, 15, config.SERVICE_END, 0) +END = datetime.datetime(YEAR, MONTH, DAY + 21, config.SERVICE_END, 0) # Default time steps (in minutes), for example, for `OrderHistory` objects. LONG_TIME_STEP = 60 @@ -28,6 +28,6 @@ VERTICAL_FREQUENCY_SHORT = 7 * 24 # Default training horizons, for example, for # `OrderHistory.make_horizontal_time_series()`. -LONG_TRAIN_HORIZON = 2 -SHORT_TRAIN_HORIZON = 1 +LONG_TRAIN_HORIZON = 3 +SHORT_TRAIN_HORIZON = 2 TRAIN_HORIZONS = (SHORT_TRAIN_HORIZON, LONG_TRAIN_HORIZON) diff --git a/tests/forecasts/conftest.py b/tests/forecasts/conftest.py index ede73ba..527b5b9 100644 --- a/tests/forecasts/conftest.py +++ b/tests/forecasts/conftest.py @@ -1,4 +1,4 @@ -"""Fixtures and globals for testing `urban_meal_delivery.forecasts`.""" +"""Fixtures for testing the `urban_meal_delivery.forecasts` sub-package.""" import datetime as dt @@ -7,6 +7,7 @@ import pytest from tests import config as test_config from urban_meal_delivery import config +from urban_meal_delivery.forecasts import timify @pytest.fixture @@ -28,7 +29,10 @@ def horizontal_datetime_index(): index = pd.Index(gen) index.name = 'start_at' - assert len(index) == 15 # sanity check + # Sanity check. + # `+1` as both the `START` and `END` day are included. + n_days = (test_config.END - test_config.START).days + 1 + assert len(index) == n_days return index @@ -58,7 +62,10 @@ def vertical_datetime_index(): index = pd.Index(gen) index.name = 'start_at' - assert len(index) == 15 * 12 # sanity check + # Sanity check: n_days * n_number_of_opening_hours. + # `+1` as both the `START` and `END` day are included. + n_days = (test_config.END - test_config.START).days + 1 + assert len(index) == n_days * 12 return index @@ -67,3 +74,54 @@ def vertical_datetime_index(): def vertical_no_demand(vertical_datetime_index): """A vertical time series with order totals: no demand.""" return pd.Series(0, index=vertical_datetime_index, name='n_orders') + + +@pytest.fixture +def good_pixel_id(pixel): + """A `pixel_id` that is on the `grid`.""" + return pixel.id # `== 1` + + +@pytest.fixture +def order_totals(good_pixel_id): + """A mock for `OrderHistory.totals`. + + To be a bit more realistic, we sample two pixels on the `grid`. + + Uses the LONG_TIME_STEP as the length of a time step. + """ + pixel_ids = [good_pixel_id, good_pixel_id + 1] + + gen = ( + (pixel_id, start_at) + for pixel_id in pixel_ids + for start_at in pd.date_range( + test_config.START, test_config.END, freq=f'{test_config.LONG_TIME_STEP}T', + ) + if config.SERVICE_START <= start_at.hour < config.SERVICE_END + ) + + # Re-index `data` filling in `0`s where there is no demand. + index = pd.MultiIndex.from_tuples(gen) + index.names = ['pixel_id', 'start_at'] + + df = pd.DataFrame(data={'n_orders': 1}, index=index) + + # Sanity check: n_pixels * n_time_steps_per_day * n_days. + # `+1` as both the `START` and `END` day are included. + n_days = (test_config.END - test_config.START).days + 1 + assert len(df) == 2 * 12 * n_days + + return df + + +@pytest.fixture +def order_history(order_totals, grid): + """An `OrderHistory` object that does not need the database. + + Uses the LONG_TIME_STEP as the length of a time step. + """ + oh = timify.OrderHistory(grid=grid, time_step=test_config.LONG_TIME_STEP) + oh._data = order_totals + + return oh diff --git a/tests/forecasts/test_models.py b/tests/forecasts/test_models.py new file mode 100644 index 0000000..c4b8a91 --- /dev/null +++ b/tests/forecasts/test_models.py @@ -0,0 +1,181 @@ +"""Tests for the `urban_meal_delivery.forecasts.models` sub-package.""" + +import datetime as dt + +import pandas as pd +import pytest + +from tests import config as test_config +from urban_meal_delivery import db +from urban_meal_delivery.forecasts import models + + +MODELS = ( + models.HorizontalETSModel, + models.RealtimeARIMAModel, + models.VerticalARIMAModel, +) + + +@pytest.mark.parametrize('model_cls', MODELS) +class TestGenericForecastingModelProperties: + """Test everything all concrete `*Model`s have in common. + + The test cases here replace testing the `ForecastingModelABC` class on its own. + + As uncertainty is in the nature of forecasting, we do not test the individual + point forecasts or confidence intervals themselves. Instead, we confirm + that all the `*Model`s adhere to the `ForecastingModelABC` generically. + So, these test cases are more like integration tests conceptually. + + Also, note that some `methods.*.predict()` functions use R behind the scenes. + """ # noqa:RST215 + + def test_create_model(self, model_cls, order_history): + """Test instantiation of a new and concrete `*Model` object.""" + model = model_cls(order_history=order_history) + + assert model is not None + + def test_model_has_a_name(self, model_cls, order_history): + """Access the `*Model.name` property.""" + model = model_cls(order_history=order_history) + + result = model.name + + assert isinstance(result, str) + + unique_model_names = set() + + def test_each_model_has_a_unique_name(self, model_cls, order_history): + """The `*Model.name` values must be unique across all `*Model`s. + + Important: this test case has a side effect that is visible + across the different parametrized versions of this case! + """ # noqa:RST215 + model = model_cls(order_history=order_history) + + assert model.name not in self.unique_model_names + + self.unique_model_names.add(model.name) + + @pytest.fixture + def predict_at(self) -> dt.datetime: + """`NOON` on the day to be predicted.""" + return dt.datetime( + test_config.END.year, + test_config.END.month, + test_config.END.day, + test_config.NOON, + ) + + @pytest.mark.r + def test_make_prediction_structure( + self, model_cls, order_history, pixel, predict_at, + ): + """`*Model.predict()` returns a `pd.DataFrame` ... + + ... with known columns. + """ # noqa:RST215 + model = model_cls(order_history=order_history) + + result = model.predict( + pixel=pixel, + predict_at=predict_at, + train_horizon=test_config.LONG_TRAIN_HORIZON, + ) + + assert isinstance(result, pd.DataFrame) + assert list(result.columns) == [ + 'actual', + 'prediction', + 'low80', + 'high80', + 'low95', + 'high95', + ] + + @pytest.mark.r + def test_make_prediction_for_given_time_step( + self, model_cls, order_history, pixel, predict_at, + ): + """`*Model.predict()` returns a row for ... + + ... the time step starting at `predict_at`. + """ # noqa:RST215 + model = model_cls(order_history=order_history) + + result = model.predict( + pixel=pixel, + predict_at=predict_at, + train_horizon=test_config.LONG_TRAIN_HORIZON, + ) + + assert predict_at in result.index + + @pytest.mark.r + def test_make_prediction_contains_actual_values( + self, model_cls, order_history, pixel, predict_at, + ): + """`*Model.predict()` returns a `pd.DataFrame` ... + + ... where the "actual" and "prediction" columns must not be empty. + """ # noqa:RST215 + model = model_cls(order_history=order_history) + + result = model.predict( + pixel=pixel, + predict_at=predict_at, + train_horizon=test_config.LONG_TRAIN_HORIZON, + ) + + assert not result['actual'].isnull().any() + assert not result['prediction'].isnull().any() + + @pytest.mark.db + @pytest.mark.r + def test_make_forecast( # noqa:WPS211 + self, db_session, model_cls, order_history, pixel, predict_at, + ): + """`*Model.make_forecast()` returns a `Forecast` object.""" # noqa:RST215 + model = model_cls(order_history=order_history) + + result = model.make_forecast( + pixel=pixel, + predict_at=predict_at, + train_horizon=test_config.LONG_TRAIN_HORIZON, + ) + + assert isinstance(result, db.Forecast) + assert result.pixel == pixel + assert result.start_at == predict_at + assert result.training_horizon == test_config.LONG_TRAIN_HORIZON + + @pytest.mark.db + @pytest.mark.r + def test_make_forecast_is_cached( # noqa:WPS211 + self, db_session, model_cls, order_history, pixel, predict_at, + ): + """`*Model.make_forecast()` caches the `Forecast` object.""" # noqa:RST215 + model = model_cls(order_history=order_history) + + assert db_session.query(db.Forecast).count() == 0 + + result1 = model.make_forecast( + pixel=pixel, + predict_at=predict_at, + train_horizon=test_config.LONG_TRAIN_HORIZON, + ) + + n_cached_forecasts = db_session.query(db.Forecast).count() + assert n_cached_forecasts >= 1 + + result2 = model.make_forecast( + pixel=pixel, + predict_at=predict_at, + train_horizon=test_config.LONG_TRAIN_HORIZON, + ) + + assert n_cached_forecasts == db_session.query(db.Forecast).count() + + assert result1 == result2 diff --git a/tests/forecasts/timify/conftest.py b/tests/forecasts/timify/conftest.py deleted file mode 100644 index cfb5fc7..0000000 --- a/tests/forecasts/timify/conftest.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Fixture for testing the `urban_meal_delivery.forecast.timify` module.""" - -import pandas as pd -import pytest - -from tests import config as test_config -from urban_meal_delivery import config -from urban_meal_delivery.forecasts import timify - - -@pytest.fixture -def good_pixel_id(pixel): - """A `pixel_id` that is on the `grid`.""" - return pixel.id # `== 1` - - -@pytest.fixture -def order_totals(good_pixel_id): - """A mock for `OrderHistory.totals`. - - To be a bit more realistic, we sample two pixels on the `grid`. - - Uses the LONG_TIME_STEP as the length of a time step. - """ - pixel_ids = [good_pixel_id, good_pixel_id + 1] - - gen = ( - (pixel_id, start_at) - for pixel_id in pixel_ids - for start_at in pd.date_range( - test_config.START, test_config.END, freq=f'{test_config.LONG_TIME_STEP}T', - ) - if config.SERVICE_START <= start_at.hour < config.SERVICE_END - ) - - # Re-index `data` filling in `0`s where there is no demand. - index = pd.MultiIndex.from_tuples(gen) - index.names = ['pixel_id', 'start_at'] - - df = pd.DataFrame(data={'n_orders': 1}, index=index) - - # Sanity check: n_pixels * n_time_steps_per_day * n_weekdays * n_weeks. - assert len(df) == 2 * 12 * (7 * 2 + 1) - - return df - - -@pytest.fixture -def order_history(order_totals, grid): - """An `OrderHistory` object that does not need the database. - - Uses the LONG_TIME_STEP as the length of a time step. - """ - oh = timify.OrderHistory(grid=grid, time_step=test_config.LONG_TIME_STEP) - oh._data = order_totals - - return oh diff --git a/tests/forecasts/timify/test_make_time_series.py b/tests/forecasts/timify/test_make_time_series.py index 78189c7..c47c14a 100644 --- a/tests/forecasts/timify/test_make_time_series.py +++ b/tests/forecasts/timify/test_make_time_series.py @@ -17,8 +17,8 @@ from urban_meal_delivery import config def good_predict_at(): """A `predict_at` within `START`-`END` and ... - ... a long enough history so that either `train_horizon=1` - or `train_horizon=2` works. + ... a long enough history so that either `SHORT_TRAIN_HORIZON` + or `LONG_TRAIN_HORIZON` works. """ return datetime.datetime( test_config.END.year, @@ -33,10 +33,10 @@ def good_predict_at(): def bad_predict_at(): """A `predict_at` within `START`-`END` but ... - ... not a long enough history so that both `train_horizon=1` - and `train_horizon=2` do not work. + ... not a long enough history so that both `SHORT_TRAIN_HORIZON` + and `LONG_TRAIN_HORIZON` do not work. """ - predict_day = test_config.END - datetime.timedelta(weeks=1, days=1) + predict_day = test_config.END - datetime.timedelta(weeks=2, days=1) return datetime.datetime( predict_day.year, predict_day.month, predict_day.day, test_config.NOON, 0, )