Return the resulting time series as pd.Series
This commit is contained in:
parent
9196c88ed4
commit
84876047c1
2 changed files with 56 additions and 52 deletions
|
@ -149,7 +149,7 @@ class OrderHistory:
|
|||
|
||||
def make_horizontal_time_series( # noqa:WPS210
|
||||
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
|
||||
) -> Tuple[pd.DataFrame, int, int]:
|
||||
) -> Tuple[pd.Series, int, pd.Series]:
|
||||
"""Slice a horizontal time series out of the `.totals`.
|
||||
|
||||
Create a time series covering `train_horizon` weeks that can be used
|
||||
|
@ -203,19 +203,20 @@ class OrderHistory:
|
|||
frequency = 7
|
||||
|
||||
# Take only the counts at the `predict_at` time.
|
||||
training_df = intra_pixel.loc[
|
||||
first_start_at : last_start_at : self._n_daily_time_steps # type: ignore
|
||||
training_ts = intra_pixel.loc[
|
||||
first_start_at : last_start_at : self._n_daily_time_steps, # type: ignore
|
||||
'total_orders',
|
||||
]
|
||||
if len(training_df) != frequency * train_horizon:
|
||||
if len(training_ts) != frequency * train_horizon:
|
||||
raise RuntimeError('Not enough historic data for `predict_at`')
|
||||
|
||||
actual_df = intra_pixel.loc[[predict_at]]
|
||||
actuals_ts = intra_pixel.loc[[predict_at], 'total_orders']
|
||||
|
||||
return training_df, frequency, actual_df
|
||||
return training_ts, frequency, actuals_ts
|
||||
|
||||
def make_vertical_time_series( # noqa:WPS210
|
||||
self, pixel_id: int, predict_day: dt.date, train_horizon: int,
|
||||
) -> Tuple[pd.DataFrame, int, pd.DataFrame]:
|
||||
) -> Tuple[pd.Series, int, pd.Series]:
|
||||
"""Slice a vertical time series out of the `.totals`.
|
||||
|
||||
Create a time series covering `train_horizon` weeks that can be used
|
||||
|
@ -268,10 +269,11 @@ class OrderHistory:
|
|||
frequency = 7 * self._n_daily_time_steps
|
||||
|
||||
# Take all the counts between `first_train_day` and `last_train_day`.
|
||||
training_df = intra_pixel.loc[
|
||||
first_start_at:last_start_at # type: ignore
|
||||
training_ts = intra_pixel.loc[
|
||||
first_start_at:last_start_at, # type: ignore
|
||||
'total_orders',
|
||||
]
|
||||
if len(training_df) != frequency * train_horizon:
|
||||
if len(training_ts) != frequency * train_horizon:
|
||||
raise RuntimeError('Not enough historic data for `predict_day`')
|
||||
|
||||
first_prediction_at = dt.datetime(
|
||||
|
@ -289,15 +291,16 @@ class OrderHistory:
|
|||
0,
|
||||
) - dt.timedelta(minutes=self._time_step)
|
||||
|
||||
actuals_df = intra_pixel.loc[
|
||||
first_prediction_at:last_prediction_at # type: ignore
|
||||
actuals_ts = intra_pixel.loc[
|
||||
first_prediction_at:last_prediction_at, # type: ignore
|
||||
'total_orders',
|
||||
]
|
||||
|
||||
return training_df, frequency, actuals_df
|
||||
return training_ts, frequency, actuals_ts
|
||||
|
||||
def make_real_time_time_series( # noqa:WPS210
|
||||
self, pixel_id: int, predict_at: dt.datetime, train_horizon: int,
|
||||
) -> Tuple[pd.DataFrame, int, int]:
|
||||
) -> Tuple[pd.Series, int, pd.Series]:
|
||||
"""Slice a vertical real-time time series out of the `.totals`.
|
||||
|
||||
Create a time series covering `train_horizon` weeks that can be used
|
||||
|
@ -361,8 +364,9 @@ class OrderHistory:
|
|||
|
||||
# Take all the counts between `first_train_day` and `last_train_day`,
|
||||
# including the ones on the `predict_at` day prior to `predict_at`.
|
||||
training_df = intra_pixel.loc[
|
||||
first_start_at:last_start_at # type: ignore
|
||||
training_ts = intra_pixel.loc[
|
||||
first_start_at:last_start_at, # type: ignore
|
||||
'total_orders',
|
||||
]
|
||||
n_time_steps_on_predict_day = (
|
||||
(
|
||||
|
@ -378,9 +382,9 @@ class OrderHistory:
|
|||
// 60 # -> minutes
|
||||
// self._time_step
|
||||
)
|
||||
if len(training_df) != frequency * train_horizon + n_time_steps_on_predict_day:
|
||||
if len(training_ts) != frequency * train_horizon + n_time_steps_on_predict_day:
|
||||
raise RuntimeError('Not enough historic data for `predict_day`')
|
||||
|
||||
actual_df = intra_pixel.loc[[predict_at]]
|
||||
actuals_ts = intra_pixel.loc[[predict_at], 'total_orders']
|
||||
|
||||
return training_df, frequency, actual_df
|
||||
return training_ts, frequency, actuals_ts
|
||||
|
|
|
@ -101,22 +101,22 @@ class TestMakeHorizontalTimeSeries:
|
|||
)
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_are_dataframes(
|
||||
def test_time_series_are_series(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The time series come in a one-column `pd.DataFrame`."""
|
||||
"""The time series come as a `pd.Series`."""
|
||||
result = order_history.make_horizontal_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_at=good_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
assert isinstance(training_df, pd.DataFrame)
|
||||
assert training_df.columns == ['total_orders']
|
||||
assert isinstance(actual_df, pd.DataFrame)
|
||||
assert actual_df.columns == ['total_orders']
|
||||
assert isinstance(training_ts, pd.Series)
|
||||
assert training_ts.name == 'total_orders'
|
||||
assert isinstance(actuals_ts, pd.Series)
|
||||
assert actuals_ts.name == 'total_orders'
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length(
|
||||
|
@ -132,10 +132,10 @@ class TestMakeHorizontalTimeSeries:
|
|||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
assert len(training_df) == 7 * train_horizon
|
||||
assert len(actual_df) == 1
|
||||
assert len(training_ts) == 7 * train_horizon
|
||||
assert len(actuals_ts) == 1
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_frequency_is_number_of_weekdays(
|
||||
|
@ -194,22 +194,22 @@ class TestMakeVerticalTimeSeries:
|
|||
)
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_are_dataframes(
|
||||
def test_time_series_are_series(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The time series come in a one-column `pd.DataFrame`."""
|
||||
"""The time series come as `pd.Series`."""
|
||||
result = order_history.make_vertical_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_day=good_predict_at.date(),
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
assert isinstance(training_df, pd.DataFrame)
|
||||
assert training_df.columns == ['total_orders']
|
||||
assert isinstance(actual_df, pd.DataFrame)
|
||||
assert actual_df.columns == ['total_orders']
|
||||
assert isinstance(training_ts, pd.Series)
|
||||
assert training_ts.name == 'total_orders'
|
||||
assert isinstance(actuals_ts, pd.Series)
|
||||
assert actuals_ts.name == 'total_orders'
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length(
|
||||
|
@ -229,7 +229,7 @@ class TestMakeVerticalTimeSeries:
|
|||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
|
@ -237,8 +237,8 @@ class TestMakeVerticalTimeSeries:
|
|||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
|
||||
assert len(actual_df) == n_daily_time_steps
|
||||
assert len(training_ts) == 7 * n_daily_time_steps * train_horizon
|
||||
assert len(actuals_ts) == n_daily_time_steps
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_frequency_is_number_number_of_weekly_time_steps(
|
||||
|
@ -305,22 +305,22 @@ class TestMakeRealTimeTimeSeries:
|
|||
)
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_are_dataframes(
|
||||
def test_time_series_are_series(
|
||||
self, order_history, good_pixel_id, good_predict_at, train_horizon,
|
||||
):
|
||||
"""The time series come in a one-column `pd.DataFrame`."""
|
||||
"""The time series come as `pd.Series`."""
|
||||
result = order_history.make_real_time_time_series(
|
||||
pixel_id=good_pixel_id,
|
||||
predict_at=good_predict_at,
|
||||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
assert isinstance(training_df, pd.DataFrame)
|
||||
assert training_df.columns == ['total_orders']
|
||||
assert isinstance(actual_df, pd.DataFrame)
|
||||
assert actual_df.columns == ['total_orders']
|
||||
assert isinstance(training_ts, pd.Series)
|
||||
assert training_ts.name == 'total_orders'
|
||||
assert isinstance(actuals_ts, pd.Series)
|
||||
assert actuals_ts.name == 'total_orders'
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length1(
|
||||
|
@ -345,7 +345,7 @@ class TestMakeRealTimeTimeSeries:
|
|||
pixel_id=good_pixel_id, predict_at=predict_at, train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
|
@ -353,8 +353,8 @@ class TestMakeRealTimeTimeSeries:
|
|||
// test_config.LONG_TIME_STEP
|
||||
)
|
||||
|
||||
assert len(training_df) == 7 * n_daily_time_steps * train_horizon
|
||||
assert len(actual_df) == 1
|
||||
assert len(training_ts) == 7 * n_daily_time_steps * train_horizon
|
||||
assert len(actuals_ts) == 1
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_time_series_have_correct_length2(
|
||||
|
@ -378,7 +378,7 @@ class TestMakeRealTimeTimeSeries:
|
|||
train_horizon=train_horizon,
|
||||
)
|
||||
|
||||
training_df, _, actual_df = result
|
||||
training_ts, _, actuals_ts = result
|
||||
|
||||
n_daily_time_steps = (
|
||||
60
|
||||
|
@ -390,10 +390,10 @@ class TestMakeRealTimeTimeSeries:
|
|||
)
|
||||
|
||||
assert (
|
||||
len(training_df)
|
||||
len(training_ts)
|
||||
== 7 * n_daily_time_steps * train_horizon + n_time_steps_before
|
||||
)
|
||||
assert len(actual_df) == 1
|
||||
assert len(actuals_ts) == 1
|
||||
|
||||
@pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS)
|
||||
def test_frequency_is_number_number_of_weekly_time_steps(
|
||||
|
|
Loading…
Reference in a new issue