diff --git a/src/urban_meal_delivery/forecasts/timify.py b/src/urban_meal_delivery/forecasts/timify.py index 6906d24..0220a58 100644 --- a/src/urban_meal_delivery/forecasts/timify.py +++ b/src/urban_meal_delivery/forecasts/timify.py @@ -149,7 +149,7 @@ class OrderHistory: def make_horizontal_time_series( # noqa:WPS210 self, pixel_id: int, predict_at: dt.datetime, train_horizon: int, - ) -> Tuple[pd.DataFrame, int, int]: + ) -> Tuple[pd.Series, int, pd.Series]: """Slice a horizontal time series out of the `.totals`. Create a time series covering `train_horizon` weeks that can be used @@ -203,19 +203,20 @@ class OrderHistory: frequency = 7 # Take only the counts at the `predict_at` time. - training_df = intra_pixel.loc[ - first_start_at : last_start_at : self._n_daily_time_steps # type: ignore + training_ts = intra_pixel.loc[ + first_start_at : last_start_at : self._n_daily_time_steps, # type: ignore + 'total_orders', ] - if len(training_df) != frequency * train_horizon: + if len(training_ts) != frequency * train_horizon: raise RuntimeError('Not enough historic data for `predict_at`') - actual_df = intra_pixel.loc[[predict_at]] + actuals_ts = intra_pixel.loc[[predict_at], 'total_orders'] - return training_df, frequency, actual_df + return training_ts, frequency, actuals_ts def make_vertical_time_series( # noqa:WPS210 self, pixel_id: int, predict_day: dt.date, train_horizon: int, - ) -> Tuple[pd.DataFrame, int, pd.DataFrame]: + ) -> Tuple[pd.Series, int, pd.Series]: """Slice a vertical time series out of the `.totals`. Create a time series covering `train_horizon` weeks that can be used @@ -268,10 +269,11 @@ class OrderHistory: frequency = 7 * self._n_daily_time_steps # Take all the counts between `first_train_day` and `last_train_day`. - training_df = intra_pixel.loc[ - first_start_at:last_start_at # type: ignore + training_ts = intra_pixel.loc[ + first_start_at:last_start_at, # type: ignore + 'total_orders', ] - if len(training_df) != frequency * train_horizon: + if len(training_ts) != frequency * train_horizon: raise RuntimeError('Not enough historic data for `predict_day`') first_prediction_at = dt.datetime( @@ -289,15 +291,16 @@ class OrderHistory: 0, ) - dt.timedelta(minutes=self._time_step) - actuals_df = intra_pixel.loc[ - first_prediction_at:last_prediction_at # type: ignore + actuals_ts = intra_pixel.loc[ + first_prediction_at:last_prediction_at, # type: ignore + 'total_orders', ] - return training_df, frequency, actuals_df + return training_ts, frequency, actuals_ts def make_real_time_time_series( # noqa:WPS210 self, pixel_id: int, predict_at: dt.datetime, train_horizon: int, - ) -> Tuple[pd.DataFrame, int, int]: + ) -> Tuple[pd.Series, int, pd.Series]: """Slice a vertical real-time time series out of the `.totals`. Create a time series covering `train_horizon` weeks that can be used @@ -361,8 +364,9 @@ class OrderHistory: # Take all the counts between `first_train_day` and `last_train_day`, # including the ones on the `predict_at` day prior to `predict_at`. - training_df = intra_pixel.loc[ - first_start_at:last_start_at # type: ignore + training_ts = intra_pixel.loc[ + first_start_at:last_start_at, # type: ignore + 'total_orders', ] n_time_steps_on_predict_day = ( ( @@ -378,9 +382,9 @@ class OrderHistory: // 60 # -> minutes // self._time_step ) - if len(training_df) != frequency * train_horizon + n_time_steps_on_predict_day: + if len(training_ts) != frequency * train_horizon + n_time_steps_on_predict_day: raise RuntimeError('Not enough historic data for `predict_day`') - actual_df = intra_pixel.loc[[predict_at]] + actuals_ts = intra_pixel.loc[[predict_at], 'total_orders'] - return training_df, frequency, actual_df + return training_ts, frequency, actuals_ts diff --git a/tests/forecasts/timify/test_make_time_series.py b/tests/forecasts/timify/test_make_time_series.py index 98e2c34..4dc187d 100644 --- a/tests/forecasts/timify/test_make_time_series.py +++ b/tests/forecasts/timify/test_make_time_series.py @@ -101,22 +101,22 @@ class TestMakeHorizontalTimeSeries: ) @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) - def test_time_series_are_dataframes( + def test_time_series_are_series( self, order_history, good_pixel_id, good_predict_at, train_horizon, ): - """The time series come in a one-column `pd.DataFrame`.""" + """The time series come as a `pd.Series`.""" result = order_history.make_horizontal_time_series( pixel_id=good_pixel_id, predict_at=good_predict_at, train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result - assert isinstance(training_df, pd.DataFrame) - assert training_df.columns == ['total_orders'] - assert isinstance(actual_df, pd.DataFrame) - assert actual_df.columns == ['total_orders'] + assert isinstance(training_ts, pd.Series) + assert training_ts.name == 'total_orders' + assert isinstance(actuals_ts, pd.Series) + assert actuals_ts.name == 'total_orders' @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_time_series_have_correct_length( @@ -132,10 +132,10 @@ class TestMakeHorizontalTimeSeries: train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result - assert len(training_df) == 7 * train_horizon - assert len(actual_df) == 1 + assert len(training_ts) == 7 * train_horizon + assert len(actuals_ts) == 1 @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_frequency_is_number_of_weekdays( @@ -194,22 +194,22 @@ class TestMakeVerticalTimeSeries: ) @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) - def test_time_series_are_dataframes( + def test_time_series_are_series( self, order_history, good_pixel_id, good_predict_at, train_horizon, ): - """The time series come in a one-column `pd.DataFrame`.""" + """The time series come as `pd.Series`.""" result = order_history.make_vertical_time_series( pixel_id=good_pixel_id, predict_day=good_predict_at.date(), train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result - assert isinstance(training_df, pd.DataFrame) - assert training_df.columns == ['total_orders'] - assert isinstance(actual_df, pd.DataFrame) - assert actual_df.columns == ['total_orders'] + assert isinstance(training_ts, pd.Series) + assert training_ts.name == 'total_orders' + assert isinstance(actuals_ts, pd.Series) + assert actuals_ts.name == 'total_orders' @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_time_series_have_correct_length( @@ -229,7 +229,7 @@ class TestMakeVerticalTimeSeries: train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result n_daily_time_steps = ( 60 @@ -237,8 +237,8 @@ class TestMakeVerticalTimeSeries: // test_config.LONG_TIME_STEP ) - assert len(training_df) == 7 * n_daily_time_steps * train_horizon - assert len(actual_df) == n_daily_time_steps + assert len(training_ts) == 7 * n_daily_time_steps * train_horizon + assert len(actuals_ts) == n_daily_time_steps @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_frequency_is_number_number_of_weekly_time_steps( @@ -305,22 +305,22 @@ class TestMakeRealTimeTimeSeries: ) @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) - def test_time_series_are_dataframes( + def test_time_series_are_series( self, order_history, good_pixel_id, good_predict_at, train_horizon, ): - """The time series come in a one-column `pd.DataFrame`.""" + """The time series come as `pd.Series`.""" result = order_history.make_real_time_time_series( pixel_id=good_pixel_id, predict_at=good_predict_at, train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result - assert isinstance(training_df, pd.DataFrame) - assert training_df.columns == ['total_orders'] - assert isinstance(actual_df, pd.DataFrame) - assert actual_df.columns == ['total_orders'] + assert isinstance(training_ts, pd.Series) + assert training_ts.name == 'total_orders' + assert isinstance(actuals_ts, pd.Series) + assert actuals_ts.name == 'total_orders' @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_time_series_have_correct_length1( @@ -345,7 +345,7 @@ class TestMakeRealTimeTimeSeries: pixel_id=good_pixel_id, predict_at=predict_at, train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result n_daily_time_steps = ( 60 @@ -353,8 +353,8 @@ class TestMakeRealTimeTimeSeries: // test_config.LONG_TIME_STEP ) - assert len(training_df) == 7 * n_daily_time_steps * train_horizon - assert len(actual_df) == 1 + assert len(training_ts) == 7 * n_daily_time_steps * train_horizon + assert len(actuals_ts) == 1 @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_time_series_have_correct_length2( @@ -378,7 +378,7 @@ class TestMakeRealTimeTimeSeries: train_horizon=train_horizon, ) - training_df, _, actual_df = result + training_ts, _, actuals_ts = result n_daily_time_steps = ( 60 @@ -390,10 +390,10 @@ class TestMakeRealTimeTimeSeries: ) assert ( - len(training_df) + len(training_ts) == 7 * n_daily_time_steps * train_horizon + n_time_steps_before ) - assert len(actual_df) == 1 + assert len(actuals_ts) == 1 @pytest.mark.parametrize('train_horizon', test_config.TRAIN_HORIZONS) def test_frequency_is_number_number_of_weekly_time_steps(