Return the resulting time series as pd.Series

This commit is contained in:
Alexander Hess 2021-01-10 16:11:40 +01:00
parent 9196c88ed4
commit 84876047c1
Signed by: alexander
GPG key ID: 344EA5AB10D868E0
2 changed files with 56 additions and 52 deletions

View file

@ -149,7 +149,7 @@ class OrderHistory:
def make_horizontal_time_series( # noqa:WPS210
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
) -> Tuple[pd.DataFrame, int, int]:
) -> Tuple[pd.Series, int, pd.Series]:
"""Slice a horizontal time series out of the `.totals`.
Create a time series covering `train_horizon` weeks that can be used
@ -203,19 +203,20 @@ class OrderHistory:
frequency = 7
# Take only the counts at the `predict_at` time.
training_df = intra_pixel.loc[
first_start_at : last_start_at : self._n_daily_time_steps # type: ignore
training_ts = intra_pixel.loc[
first_start_at : last_start_at : self._n_daily_time_steps, # type: ignore
'total_orders',
]
if len(training_df) != frequency * train_horizon:
if len(training_ts) != frequency * train_horizon:
raise RuntimeError('Not enough historic data for `predict_at`')
actual_df = intra_pixel.loc[[predict_at]]
actuals_ts = intra_pixel.loc[[predict_at], 'total_orders']
return training_df, frequency, actual_df
return training_ts, frequency, actuals_ts
def make_vertical_time_series( # noqa:WPS210
self, pixel_id: int, predict_day: dt.date, train_horizon: int,
) -> Tuple[pd.DataFrame, int, pd.DataFrame]:
) -> Tuple[pd.Series, int, pd.Series]:
"""Slice a vertical time series out of the `.totals`.
Create a time series covering `train_horizon` weeks that can be used
@ -268,10 +269,11 @@ class OrderHistory:
frequency = 7 * self._n_daily_time_steps
# Take all the counts between `first_train_day` and `last_train_day`.
training_df = intra_pixel.loc[
first_start_at:last_start_at # type: ignore
training_ts = intra_pixel.loc[
first_start_at:last_start_at, # type: ignore
'total_orders',
]
if len(training_df) != frequency * train_horizon:
if len(training_ts) != frequency * train_horizon:
raise RuntimeError('Not enough historic data for `predict_day`')
first_prediction_at = dt.datetime(
@ -289,15 +291,16 @@ class OrderHistory:
0,
) - dt.timedelta(minutes=self._time_step)
actuals_df = intra_pixel.loc[
first_prediction_at:last_prediction_at # type: ignore
actuals_ts = intra_pixel.loc[
first_prediction_at:last_prediction_at, # type: ignore
'total_orders',
]
return training_df, frequency, actuals_df
return training_ts, frequency, actuals_ts
def make_real_time_time_series( # noqa:WPS210
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
) -> Tuple[pd.DataFrame, int, int]:
) -> Tuple[pd.Series, int, pd.Series]:
"""Slice a vertical real-time time series out of the `.totals`.
Create a time series covering `train_horizon` weeks that can be used
@ -361,8 +364,9 @@ class OrderHistory:
# Take all the counts between `first_train_day` and `last_train_day`,
# including the ones on the `predict_at` day prior to `predict_at`.
training_df = intra_pixel.loc[
first_start_at:last_start_at # type: ignore
training_ts = intra_pixel.loc[
first_start_at:last_start_at, # type: ignore
'total_orders',
]
n_time_steps_on_predict_day = (
(
@ -378,9 +382,9 @@ class OrderHistory:
// 60 # -> minutes
// self._time_step
)
if len(training_df) != frequency * train_horizon + n_time_steps_on_predict_day:
if len(training_ts) != frequency * train_horizon + n_time_steps_on_predict_day:
raise RuntimeError('Not enough historic data for `predict_day`')
actual_df = intra_pixel.loc[[predict_at]]
actuals_ts = intra_pixel.loc[[predict_at], 'total_orders']
return training_df, frequency, actual_df
return training_ts, frequency, actuals_ts

View file

@ -101,22 +101,22 @@ class TestMakeHorizontalTimeSeries:
)
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_are_dataframes(
def test_time_series_are_series(
self, order_history, good_pixel_id, good_predict_at, train_horizon,
):
"""The time series come in a one-column `pd.DataFrame`."""
"""The time series come as a `pd.Series`."""
result = order_history.make_horizontal_time_series(
pixel_id=good_pixel_id,
predict_at=good_predict_at,
train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
assert isinstance(training_df, pd.DataFrame)
assert training_df.columns == ['total_orders']
assert isinstance(actual_df, pd.DataFrame)
assert actual_df.columns == ['total_orders']
assert isinstance(training_ts, pd.Series)
assert training_ts.name == 'total_orders'
assert isinstance(actuals_ts, pd.Series)
assert actuals_ts.name == 'total_orders'
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_have_correct_length(
@ -132,10 +132,10 @@ class TestMakeHorizontalTimeSeries:
train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
assert len(training_df) == 7 * train_horizon
assert len(actual_df) == 1
assert len(training_ts) == 7 * train_horizon
assert len(actuals_ts) == 1
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_frequency_is_number_of_weekdays(
@ -194,22 +194,22 @@ class TestMakeVerticalTimeSeries:
)
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_are_dataframes(
def test_time_series_are_series(
self, order_history, good_pixel_id, good_predict_at, train_horizon,
):
"""The time series come in a one-column `pd.DataFrame`."""
"""The time series come as `pd.Series`."""
result = order_history.make_vertical_time_series(
pixel_id=good_pixel_id,
predict_day=good_predict_at.date(),
train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
assert isinstance(training_df, pd.DataFrame)
assert training_df.columns == ['total_orders']
assert isinstance(actual_df, pd.DataFrame)
assert actual_df.columns == ['total_orders']
assert isinstance(training_ts, pd.Series)
assert training_ts.name == 'total_orders'
assert isinstance(actuals_ts, pd.Series)
assert actuals_ts.name == 'total_orders'
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_have_correct_length(
@ -229,7 +229,7 @@ class TestMakeVerticalTimeSeries:
train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
n_daily_time_steps = (
60
@ -237,8 +237,8 @@ class TestMakeVerticalTimeSeries:
// test_config.LONG_TIME_STEP
)
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
assert len(actual_df) == n_daily_time_steps
assert len(training_ts) == 7 * n_daily_time_steps * train_horizon
assert len(actuals_ts) == n_daily_time_steps
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_frequency_is_number_number_of_weekly_time_steps(
@ -305,22 +305,22 @@ class TestMakeRealTimeTimeSeries:
)
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_are_dataframes(
def test_time_series_are_series(
self, order_history, good_pixel_id, good_predict_at, train_horizon,
):
"""The time series come in a one-column `pd.DataFrame`."""
"""The time series come as `pd.Series`."""
result = order_history.make_real_time_time_series(
pixel_id=good_pixel_id,
predict_at=good_predict_at,
train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
assert isinstance(training_df, pd.DataFrame)
assert training_df.columns == ['total_orders']
assert isinstance(actual_df, pd.DataFrame)
assert actual_df.columns == ['total_orders']
assert isinstance(training_ts, pd.Series)
assert training_ts.name == 'total_orders'
assert isinstance(actuals_ts, pd.Series)
assert actuals_ts.name == 'total_orders'
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_have_correct_length1(
@ -345,7 +345,7 @@ class TestMakeRealTimeTimeSeries:
pixel_id=good_pixel_id, predict_at=predict_at, train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
n_daily_time_steps = (
60
@ -353,8 +353,8 @@ class TestMakeRealTimeTimeSeries:
// test_config.LONG_TIME_STEP
)
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
assert len(actual_df) == 1
assert len(training_ts) == 7 * n_daily_time_steps * train_horizon
assert len(actuals_ts) == 1
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_time_series_have_correct_length2(
@ -378,7 +378,7 @@ class TestMakeRealTimeTimeSeries:
train_horizon=train_horizon,
)
training_df, _, actual_df = result
training_ts, _, actuals_ts = result
n_daily_time_steps = (
60
@ -390,10 +390,10 @@ class TestMakeRealTimeTimeSeries:
)
assert (
len(training_df)
len(training_ts)
== 7 * n_daily_time_steps * train_horizon + n_time_steps_before
)
assert len(actual_df) == 1
assert len(actuals_ts) == 1
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
def test_frequency_is_number_number_of_weekly_time_steps(