Add OrderHistory.make_vertical_time_series()
- the method slices out a vertical time series from the data within an `OrderHistory` object
This commit is contained in:
parent
b61db734b6
commit
5330ceb771
2 changed files with 194 additions and 1 deletions
|
@ -212,3 +212,85 @@ class OrderHistory:
|
|||
actual_df = intra_pixel.loc[[predict_at]]
|
||||
|
||||
return training_df, frequency, actual_df
|
||||
|
||||
def make_vertical_time_series( # noqa:WPS210
|
||||
self, pixel_id: int, predict_day: dt.date, train_horizon: int,
|
||||
) -> Tuple[pd.DataFrame, int, pd.DataFrame]:
|
||||
"""Slice a vertical time series out of the `.totals`.
|
||||
|
||||
Create a time series covering `train_horizon` weeks that can be used
|
||||
for training a forecasting model to predict the demand on the `predict_day`.
|
||||
|
||||
For explanation of the terms "horizontal", "vertical", and "real-time"
|
||||
in the context of time series, see section 3.2 in the following paper:
|
||||
https://github.com/webartifex/urban-meal-delivery-demand-forecasting/blob/main/paper.pdf
|
||||
|
||||
Args:
|
||||
pixel_id: pixel in which the time series is aggregated
|
||||
predict_day: day for which predictions are made
|
||||
train_horizon: weeks of historic data used to predict `predict_at`
|
||||
|
||||
Returns:
|
||||
training time series, frequency, actual order counts on `predict_day`
|
||||
|
||||
Raises:
|
||||
LookupError: `pixel_id` is not in the `grid`
|
||||
RuntimeError: desired time series slice is not entirely in `.totals`
|
||||
"""
|
||||
try:
|
||||
intra_pixel = self.totals.loc[pixel_id]
|
||||
except KeyError:
|
||||
raise LookupError('The `pixel_id` is not in the `grid`') from None
|
||||
|
||||
if predict_day >= config.CUTOFF_DAY.date(): # pragma: no cover
|
||||
raise RuntimeError('Internal error: cannot predict beyond the given data')
|
||||
|
||||
# The first and last training day are just before the `predict_day`
|
||||
# and span exactly `train_horizon` weeks covering all times of the day.
|
||||
first_train_day = predict_day - dt.timedelta(weeks=train_horizon)
|
||||
first_start_at = dt.datetime(
|
||||
first_train_day.year,
|
||||
first_train_day.month,
|
||||
first_train_day.day,
|
||||
config.SERVICE_START,
|
||||
0,
|
||||
)
|
||||
last_train_day = predict_day - dt.timedelta(days=1)
|
||||
last_start_at = dt.datetime(
|
||||
last_train_day.year,
|
||||
last_train_day.month,
|
||||
last_train_day.day,
|
||||
config.SERVICE_END, # subtract one `time_step` below
|
||||
0,
|
||||
) - dt.timedelta(minutes=self._time_step)
|
||||
|
||||
# The frequency is the number of weekdays times the number of daily time steps.
|
||||
frequency = 7 * self._n_daily_time_steps
|
||||
|
||||
# Take all the counts between `first_train_day` and `last_train_day`.
|
||||
training_df = intra_pixel.loc[
|
||||
first_start_at:last_start_at # type: ignore
|
||||
]
|
||||
if len(training_df) != frequency * train_horizon:
|
||||
raise RuntimeError('Not enough historic data for `predict_day`')
|
||||
|
||||
first_prediction_at = dt.datetime(
|
||||
predict_day.year,
|
||||
predict_day.month,
|
||||
predict_day.day,
|
||||
config.SERVICE_START,
|
||||
0,
|
||||
)
|
||||
last_prediction_at = dt.datetime(
|
||||
predict_day.year,
|
||||
predict_day.month,
|
||||
predict_day.day,
|
||||
config.SERVICE_END, # subtract one `time_step` below
|
||||
0,
|
||||
) - dt.timedelta(minutes=self._time_step)
|
||||
|
||||
actuals_df = intra_pixel.loc[
|
||||
first_prediction_at:last_prediction_at # type: ignore
|
||||
]
|
||||
|
||||
return training_df, frequency, actuals_df
|
||||
|
|
|
@ -117,7 +117,7 @@ class TestMakeHorizontalTimeSeries:
|
|||
):
|
||||
"""The length of a training time series must be a multiple of `7` ...
|
||||
|
||||
whereas the time series with the actual order counts always holds `1` value.
|
||||
... whereas the time series with the actual order counts has only `1` value.
|
||||
"""
|
||||
result = order_history.make_horizontal_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
|
@ -171,3 +171,114 @@ class TestMakeHorizontalTimeSeries:
|
|||
order_history.make_horizontal_time_series(
|
||||
pixel_id=good_pixel_id, predict_at=good_predict_at, train_horizon=999,
|
||||
)
|
||||
|
||||
|
||||
class TestMakeVerticalTimeSeries:
|
||||
"""Test the `OrderHistory.make_vertical_time_series()` method."""
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_wrong_pixel(self, order_history, good_predict_at, train_horizon):
|
||||
"""A `pixel_id` that is not in the `grid`."""
|
||||
with pytest.raises(LookupError):
|
||||
order_history.make_vertical_time_series(
|
||||
pixel_id=999_999,
|
||||
predict_day=good_predict_at.date(),
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_are_dataframes(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The time series come in a one-column `pd.DataFrame`."""
|
||||
result = order_history.make_vertical_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_day=good_predict_at.date(),
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
|
||||
assert isinstance(training_df, pd.DataFrame)
|
||||
assert training_df.columns == ['total_orders']
|
||||
assert isinstance(actual_df, pd.DataFrame)
|
||||
assert actual_df.columns == ['total_orders']
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The length of a training time series is the product of the ...
|
||||
|
||||
... weekly time steps (i.e., product of `7` and the number of daily time steps)
|
||||
and the `train_horizon` in weeks.
|
||||
|
||||
The time series with the actual order counts always holds one observation
|
||||
per time step of a day.
|
||||
"""
|
||||
result = order_history.make_vertical_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_day=good_predict_at.date(),
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
* (config.SERVICE_END - config.SERVICE_START)
|
||||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
|
||||
assert len(actual_df) == n_daily_time_steps
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_frequency_is_number_number_of_weekly_time_steps(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The `frequency` is the number of weekly time steps."""
|
||||
result = order_history.make_vertical_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_day=good_predict_at.date(),
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
_, frequency, _ = result # noqa:WPS434
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
* (config.SERVICE_END - config.SERVICE_START)
|
||||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert frequency == 7 * n_daily_time_steps
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_no_long_enough_history1(
|
||||
self, order_history, good_pixel_id, bad_predict_at, train_horizon,
|
||||
):
|
||||
"""If the `predict_at` day is too early in the `START`-`END` horizon ...
|
||||
|
||||
... the history of order totals is not long enough.
|
||||
"""
|
||||
with pytest.raises(RuntimeError):
|
||||
order_history.make_vertical_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_day=bad_predict_at.date(),
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
def test_no_long_enough_history2(
|
||||
self, order_history, good_pixel_id, good_predict_at,
|
||||
):
|
||||
"""If the `train_horizon` is longer than the `START`-`END` horizon ...
|
||||
|
||||
... the history of order totals can never be long enough.
|
||||
"""
|
||||
with pytest.raises(RuntimeError):
|
||||
order_history.make_vertical_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_day=good_predict_at.date(),
|
||||
train_horizon=999,
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue