Add OrderHistory.make_real_time_time_series()
- the method slices out a real-time time series from the data within an `OrderHistory` object
This commit is contained in:
parent
5330ceb771
commit
100fac659a
3 changed files with 257 additions and 3 deletions
|
|
@ -1,4 +1,8 @@
|
|||
"""Test the time series related code."""
|
||||
"""Test the code generating time series with the order totals.
|
||||
|
||||
Unless otherwise noted, each `time_step` is 60 minutes long implying
|
||||
12 time steps per day (i.e., we use `LONG_TIME_STEP` by default).
|
||||
"""
|
||||
# pylint:disable=no-self-use,unused-argument
|
||||
|
||||
import datetime
|
||||
|
|
@ -63,7 +67,11 @@ def good_predict_at():
|
|||
or `train_horizon=2` works.
|
||||
"""
|
||||
return datetime.datetime(
|
||||
test_config.END.year, test_config.END.month, test_config.END.day, 12, 0,
|
||||
test_config.END.year,
|
||||
test_config.END.month,
|
||||
test_config.END.day,
|
||||
test_config.NOON,
|
||||
0,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -76,7 +84,7 @@ def bad_predict_at():
|
|||
"""
|
||||
predict_day = test_config.END - datetime.timedelta(weeks=1, days=1)
|
||||
return datetime.datetime(
|
||||
predict_day.year, predict_day.month, predict_day.day, 12, 0,
|
||||
predict_day.year, predict_day.month, predict_day.day, test_config.NOON, 0,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -282,3 +290,156 @@ class TestMakeVerticalTimeSeries:
|
|||
predict_day=good_predict_at.date(),
|
||||
train_horizon=999,
|
||||
)
|
||||
|
||||
|
||||
class TestMakeRealTimeTimeSeries:
|
||||
"""Test the `OrderHistory.make_real_time_time_series()` method."""
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_wrong_pixel(self, order_history, good_predict_at, train_horizon):
|
||||
"""A `pixel_id` that is not in the `grid`."""
|
||||
with pytest.raises(LookupError):
|
||||
order_history.make_real_time_time_series(
|
||||
pixel_id=999_999,
|
||||
predict_at=good_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_are_dataframes(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The time series come in a one-column `pd.DataFrame`."""
|
||||
result = order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_at=good_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
|
||||
assert isinstance(training_df, pd.DataFrame)
|
||||
assert training_df.columns == ['total_orders']
|
||||
assert isinstance(actual_df, pd.DataFrame)
|
||||
assert actual_df.columns == ['total_orders']
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length1(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The length of a training time series is the product of the ...
|
||||
|
||||
... weekly time steps (i.e., product of `7` and the number of daily time steps)
|
||||
and the `train_horizon` in weeks; however, this assertion only holds if
|
||||
we predict the first `time_step` of the day.
|
||||
|
||||
The time series with the actual order counts always holds `1` value.
|
||||
"""
|
||||
predict_at = datetime.datetime(
|
||||
good_predict_at.year,
|
||||
good_predict_at.month,
|
||||
good_predict_at.day,
|
||||
config.SERVICE_START,
|
||||
0,
|
||||
)
|
||||
result = order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id, predict_at=predict_at, train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
* (config.SERVICE_END - config.SERVICE_START)
|
||||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
|
||||
assert len(actual_df) == 1
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length2(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The length of a training time series is the product of the ...
|
||||
|
||||
... weekly time steps (i.e., product of `7` and the number of daily time steps)
|
||||
and the `train_horizon` in weeks; however, this assertion only holds if
|
||||
we predict the first `time_step` of the day. Predicting any other `time_step`
|
||||
means that the training time series becomes longer by the number of time steps
|
||||
before the one being predicted.
|
||||
|
||||
The time series with the actual order counts always holds `1` value.
|
||||
"""
|
||||
assert good_predict_at.hour == test_config.NOON
|
||||
|
||||
result = order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_at=good_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
* (config.SERVICE_END - config.SERVICE_START)
|
||||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
n_time_steps_before = (
|
||||
60 * (test_config.NOON - config.SERVICE_START) // test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert (
|
||||
len(training_df)
|
||||
== 7 * n_daily_time_steps * train_horizon + n_time_steps_before
|
||||
)
|
||||
assert len(actual_df) == 1
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_frequency_is_number_number_of_weekly_time_steps(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The `frequency` is the number of weekly time steps."""
|
||||
result = order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_at=good_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
_, frequency, _ = result # noqa:WPS434
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
* (config.SERVICE_END - config.SERVICE_START)
|
||||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert frequency == 7 * n_daily_time_steps
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_no_long_enough_history1(
|
||||
self, order_history, good_pixel_id, bad_predict_at, train_horizon,
|
||||
):
|
||||
"""If the `predict_at` day is too early in the `START`-`END` horizon ...
|
||||
|
||||
... the history of order totals is not long enough.
|
||||
"""
|
||||
with pytest.raises(RuntimeError):
|
||||
order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_at=bad_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
def test_no_long_enough_history2(
|
||||
self, order_history, good_pixel_id, good_predict_at,
|
||||
):
|
||||
"""If the `train_horizon` is longer than the `START`-`END` horizon ...
|
||||
|
||||
... the history of order totals can never be long enough.
|
||||
"""
|
||||
with pytest.raises(RuntimeError):
|
||||
order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id, predict_at=good_predict_at, train_horizon=999,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue