Return the resulting time series as pd.Series
This commit is contained in:
parent
9196c88ed4
commit
84876047c1
2 changed files with 56 additions and 52 deletions
|
@ -149,7 +149,7 @@ class OrderHistory:
|
||||||
|
|
||||||
def make_horizontal_time_series( # noqa:WPS210
|
def make_horizontal_time_series( # noqa:WPS210
|
||||||
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
|
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
|
||||||
) -> Tuple[pd.DataFrame, int, int]:
|
) -> Tuple[pd.Series, int, pd.Series]:
|
||||||
"""Slice a horizontal time series out of the `.totals`.
|
"""Slice a horizontal time series out of the `.totals`.
|
||||||
|
|
||||||
Create a time series covering `train_horizon` weeks that can be used
|
Create a time series covering `train_horizon` weeks that can be used
|
||||||
|
@ -203,19 +203,20 @@ class OrderHistory:
|
||||||
frequency = 7
|
frequency = 7
|
||||||
|
|
||||||
# Take only the counts at the `predict_at` time.
|
# Take only the counts at the `predict_at` time.
|
||||||
training_df = intra_pixel.loc[
|
training_ts = intra_pixel.loc[
|
||||||
first_start_at : last_start_at : self._n_daily_time_steps # type: ignore
|
first_start_at : last_start_at : self._n_daily_time_steps, # type: ignore
|
||||||
|
'total_orders',
|
||||||
]
|
]
|
||||||
if len(training_df) != frequency * train_horizon:
|
if len(training_ts) != frequency * train_horizon:
|
||||||
raise RuntimeError('Not enough historic data for `predict_at`')
|
raise RuntimeError('Not enough historic data for `predict_at`')
|
||||||
|
|
||||||
actual_df = intra_pixel.loc[[predict_at]]
|
actuals_ts = intra_pixel.loc[[predict_at], 'total_orders']
|
||||||
|
|
||||||
return training_df, frequency, actual_df
|
return training_ts, frequency, actuals_ts
|
||||||
|
|
||||||
def make_vertical_time_series( # noqa:WPS210
|
def make_vertical_time_series( # noqa:WPS210
|
||||||
self, pixel_id: int, predict_day: dt.date, train_horizon: int,
|
self, pixel_id: int, predict_day: dt.date, train_horizon: int,
|
||||||
) -> Tuple[pd.DataFrame, int, pd.DataFrame]:
|
) -> Tuple[pd.Series, int, pd.Series]:
|
||||||
"""Slice a vertical time series out of the `.totals`.
|
"""Slice a vertical time series out of the `.totals`.
|
||||||
|
|
||||||
Create a time series covering `train_horizon` weeks that can be used
|
Create a time series covering `train_horizon` weeks that can be used
|
||||||
|
@ -268,10 +269,11 @@ class OrderHistory:
|
||||||
frequency = 7 * self._n_daily_time_steps
|
frequency = 7 * self._n_daily_time_steps
|
||||||
|
|
||||||
# Take all the counts between `first_train_day` and `last_train_day`.
|
# Take all the counts between `first_train_day` and `last_train_day`.
|
||||||
training_df = intra_pixel.loc[
|
training_ts = intra_pixel.loc[
|
||||||
first_start_at:last_start_at # type: ignore
|
first_start_at:last_start_at, # type: ignore
|
||||||
|
'total_orders',
|
||||||
]
|
]
|
||||||
if len(training_df) != frequency * train_horizon:
|
if len(training_ts) != frequency * train_horizon:
|
||||||
raise RuntimeError('Not enough historic data for `predict_day`')
|
raise RuntimeError('Not enough historic data for `predict_day`')
|
||||||
|
|
||||||
first_prediction_at = dt.datetime(
|
first_prediction_at = dt.datetime(
|
||||||
|
@ -289,15 +291,16 @@ class OrderHistory:
|
||||||
0,
|
0,
|
||||||
) - dt.timedelta(minutes=self._time_step)
|
) - dt.timedelta(minutes=self._time_step)
|
||||||
|
|
||||||
actuals_df = intra_pixel.loc[
|
actuals_ts = intra_pixel.loc[
|
||||||
first_prediction_at:last_prediction_at # type: ignore
|
first_prediction_at:last_prediction_at, # type: ignore
|
||||||
|
'total_orders',
|
||||||
]
|
]
|
||||||
|
|
||||||
return training_df, frequency, actuals_df
|
return training_ts, frequency, actuals_ts
|
||||||
|
|
||||||
def make_real_time_time_series( # noqa:WPS210
|
def make_real_time_time_series( # noqa:WPS210
|
||||||
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
|
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
|
||||||
) -> Tuple[pd.DataFrame, int, int]:
|
) -> Tuple[pd.Series, int, pd.Series]:
|
||||||
"""Slice a vertical real-time time series out of the `.totals`.
|
"""Slice a vertical real-time time series out of the `.totals`.
|
||||||
|
|
||||||
Create a time series covering `train_horizon` weeks that can be used
|
Create a time series covering `train_horizon` weeks that can be used
|
||||||
|
@ -361,8 +364,9 @@ class OrderHistory:
|
||||||
|
|
||||||
# Take all the counts between `first_train_day` and `last_train_day`,
|
# Take all the counts between `first_train_day` and `last_train_day`,
|
||||||
# including the ones on the `predict_at` day prior to `predict_at`.
|
# including the ones on the `predict_at` day prior to `predict_at`.
|
||||||
training_df = intra_pixel.loc[
|
training_ts = intra_pixel.loc[
|
||||||
first_start_at:last_start_at # type: ignore
|
first_start_at:last_start_at, # type: ignore
|
||||||
|
'total_orders',
|
||||||
]
|
]
|
||||||
n_time_steps_on_predict_day = (
|
n_time_steps_on_predict_day = (
|
||||||
(
|
(
|
||||||
|
@ -378,9 +382,9 @@ class OrderHistory:
|
||||||
// 60 # -> minutes
|
// 60 # -> minutes
|
||||||
// self._time_step
|
// self._time_step
|
||||||
)
|
)
|
||||||
if len(training_df) != frequency * train_horizon + n_time_steps_on_predict_day:
|
if len(training_ts) != frequency * train_horizon + n_time_steps_on_predict_day:
|
||||||
raise RuntimeError('Not enough historic data for `predict_day`')
|
raise RuntimeError('Not enough historic data for `predict_day`')
|
||||||
|
|
||||||
actual_df = intra_pixel.loc[[predict_at]]
|
actuals_ts = intra_pixel.loc[[predict_at], 'total_orders']
|
||||||
|
|
||||||
return training_df, frequency, actual_df
|
return training_ts, frequency, actuals_ts
|
||||||
|
|
|
@ -101,22 +101,22 @@ class TestMakeHorizontalTimeSeries:
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_are_dataframes(
|
def test_time_series_are_series(
|
||||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||||
):
|
):
|
||||||
"""The time series come in a one-column `pd.DataFrame`."""
|
"""The time series come as a `pd.Series`."""
|
||||||
result = order_history.make_horizontal_time_series(
|
result = order_history.make_horizontal_time_series(
|
||||||
pixel_id=good_pixel_id,
|
pixel_id=good_pixel_id,
|
||||||
predict_at=good_predict_at,
|
predict_at=good_predict_at,
|
||||||
train_horizon=train_horizon,
|
train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
assert isinstance(training_df, pd.DataFrame)
|
assert isinstance(training_ts, pd.Series)
|
||||||
assert training_df.columns == ['total_orders']
|
assert training_ts.name == 'total_orders'
|
||||||
assert isinstance(actual_df, pd.DataFrame)
|
assert isinstance(actuals_ts, pd.Series)
|
||||||
assert actual_df.columns == ['total_orders']
|
assert actuals_ts.name == 'total_orders'
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_have_correct_length(
|
def test_time_series_have_correct_length(
|
||||||
|
@ -132,10 +132,10 @@ class TestMakeHorizontalTimeSeries:
|
||||||
train_horizon=train_horizon,
|
train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
assert len(training_df) == 7 * train_horizon
|
assert len(training_ts) == 7 * train_horizon
|
||||||
assert len(actual_df) == 1
|
assert len(actuals_ts) == 1
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_frequency_is_number_of_weekdays(
|
def test_frequency_is_number_of_weekdays(
|
||||||
|
@ -194,22 +194,22 @@ class TestMakeVerticalTimeSeries:
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_are_dataframes(
|
def test_time_series_are_series(
|
||||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||||
):
|
):
|
||||||
"""The time series come in a one-column `pd.DataFrame`."""
|
"""The time series come as `pd.Series`."""
|
||||||
result = order_history.make_vertical_time_series(
|
result = order_history.make_vertical_time_series(
|
||||||
pixel_id=good_pixel_id,
|
pixel_id=good_pixel_id,
|
||||||
predict_day=good_predict_at.date(),
|
predict_day=good_predict_at.date(),
|
||||||
train_horizon=train_horizon,
|
train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
assert isinstance(training_df, pd.DataFrame)
|
assert isinstance(training_ts, pd.Series)
|
||||||
assert training_df.columns == ['total_orders']
|
assert training_ts.name == 'total_orders'
|
||||||
assert isinstance(actual_df, pd.DataFrame)
|
assert isinstance(actuals_ts, pd.Series)
|
||||||
assert actual_df.columns == ['total_orders']
|
assert actuals_ts.name == 'total_orders'
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_have_correct_length(
|
def test_time_series_have_correct_length(
|
||||||
|
@ -229,7 +229,7 @@ class TestMakeVerticalTimeSeries:
|
||||||
train_horizon=train_horizon,
|
train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
n_daily_time_steps = (
|
n_daily_time_steps = (
|
||||||
60
|
60
|
||||||
|
@ -237,8 +237,8 @@ class TestMakeVerticalTimeSeries:
|
||||||
// test_config.LONG_TIME_STEP
|
// test_config.LONG_TIME_STEP
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
|
assert len(training_ts) == 7 * n_daily_time_steps * train_horizon
|
||||||
assert len(actual_df) == n_daily_time_steps
|
assert len(actuals_ts) == n_daily_time_steps
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_frequency_is_number_number_of_weekly_time_steps(
|
def test_frequency_is_number_number_of_weekly_time_steps(
|
||||||
|
@ -305,22 +305,22 @@ class TestMakeRealTimeTimeSeries:
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_are_dataframes(
|
def test_time_series_are_series(
|
||||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||||
):
|
):
|
||||||
"""The time series come in a one-column `pd.DataFrame`."""
|
"""The time series come as `pd.Series`."""
|
||||||
result = order_history.make_real_time_time_series(
|
result = order_history.make_real_time_time_series(
|
||||||
pixel_id=good_pixel_id,
|
pixel_id=good_pixel_id,
|
||||||
predict_at=good_predict_at,
|
predict_at=good_predict_at,
|
||||||
train_horizon=train_horizon,
|
train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
assert isinstance(training_df, pd.DataFrame)
|
assert isinstance(training_ts, pd.Series)
|
||||||
assert training_df.columns == ['total_orders']
|
assert training_ts.name == 'total_orders'
|
||||||
assert isinstance(actual_df, pd.DataFrame)
|
assert isinstance(actuals_ts, pd.Series)
|
||||||
assert actual_df.columns == ['total_orders']
|
assert actuals_ts.name == 'total_orders'
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_have_correct_length1(
|
def test_time_series_have_correct_length1(
|
||||||
|
@ -345,7 +345,7 @@ class TestMakeRealTimeTimeSeries:
|
||||||
pixel_id=good_pixel_id, predict_at=predict_at, train_horizon=train_horizon,
|
pixel_id=good_pixel_id, predict_at=predict_at, train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
n_daily_time_steps = (
|
n_daily_time_steps = (
|
||||||
60
|
60
|
||||||
|
@ -353,8 +353,8 @@ class TestMakeRealTimeTimeSeries:
|
||||||
// test_config.LONG_TIME_STEP
|
// test_config.LONG_TIME_STEP
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
|
assert len(training_ts) == 7 * n_daily_time_steps * train_horizon
|
||||||
assert len(actual_df) == 1
|
assert len(actuals_ts) == 1
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_time_series_have_correct_length2(
|
def test_time_series_have_correct_length2(
|
||||||
|
@ -378,7 +378,7 @@ class TestMakeRealTimeTimeSeries:
|
||||||
train_horizon=train_horizon,
|
train_horizon=train_horizon,
|
||||||
)
|
)
|
||||||
|
|
||||||
training_df, _, actual_df = result
|
training_ts, _, actuals_ts = result
|
||||||
|
|
||||||
n_daily_time_steps = (
|
n_daily_time_steps = (
|
||||||
60
|
60
|
||||||
|
@ -390,10 +390,10 @@ class TestMakeRealTimeTimeSeries:
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
len(training_df)
|
len(training_ts)
|
||||||
== 7 * n_daily_time_steps * train_horizon + n_time_steps_before
|
== 7 * n_daily_time_steps * train_horizon + n_time_steps_before
|
||||||
)
|
)
|
||||||
assert len(actual_df) == 1
|
assert len(actuals_ts) == 1
|
||||||
|
|
||||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||||
def test_frequency_is_number_number_of_weekly_time_steps(
|
def test_frequency_is_number_number_of_weekly_time_steps(
|
||||||
|
|
Loading…
Reference in a new issue