From 796fdc919c3ae4d12961cb55ae0d86c74450ec16 Mon Sep 17 00:00:00 2001
From: Alexander Hess <alexander@webartifex.biz>
Date: Mon, 1 Feb 2021 15:46:52 +0100
Subject: [PATCH] Add `Forecast.from_dataframe()` constructor

- this alternative constructor takes the `pd.DataFrame`s from the
  `*Model.predict()` methods and converts them into ORM models
---
 src/urban_meal_delivery/db/forecasts.py |  64 ++++++++++++++-
 tests/db/test_forecasts.py              | 104 +++++++++++++++++++++---
 2 files changed, 157 insertions(+), 11 deletions(-)

diff --git a/src/urban_meal_delivery/db/forecasts.py b/src/urban_meal_delivery/db/forecasts.py
index 352320e..d453fcd 100644
--- a/src/urban_meal_delivery/db/forecasts.py
+++ b/src/urban_meal_delivery/db/forecasts.py
@@ -1,5 +1,10 @@
 """Provide the ORM's `Forecast` model."""
 
+from __future__ import annotations
+
+from typing import List
+
+import pandas as pd
 import sqlalchemy as sa
 from sqlalchemy import orm
 from sqlalchemy.dialects import postgresql
@@ -10,7 +15,8 @@ from urban_meal_delivery.db import meta
 class Forecast(meta.Base):
     """A demand forecast for a `.pixel` and `.time_step` pair.
 
-    This table is denormalized on purpose to keep things simple.
+    This table is denormalized on purpose to keep things simple. In particular,
+    the `.model` and `.actual` hold redundant values.
     """
 
     __tablename__ = 'forecasts'
@@ -133,3 +139,59 @@ class Forecast(meta.Base):
             n_y=self.pixel.n_y,
             start_at=self.start_at,
         )
+
+    @classmethod
+    def from_dataframe(  # noqa:WPS211
+        cls,
+        pixel: db.Pixel,
+        time_step: int,
+        training_horizon: int,
+        model: str,
+        data: pd.Dataframe,
+    ) -> List[db.Forecast]:
+        """Convert results from the forecasting `*Model`s into `Forecast` objects.
+
+        This is an alternative constructor method.
+
+        Background: The functions in `urban_meal_delivery.forecasts.methods`
+        return `pd.Dataframe`s with "start_at" (i.e., `pd.Timestamp` objects)
+        values in the index and five columns "prediction", "low80", "high80",
+        "low95", and "high95" with `np.float` values. The `*Model.predic()`
+        methods in `urban_meal_delivery.forecasts.models` then add an "actual"
+        column. This constructor converts these results into ORM models.
+        Also, the `np.float` values are cast as plain `float` ones as
+        otherwise SQLAlchemy and the database would complain.
+
+        Args:
+            pixel: in which the forecast is made
+            time_step: length of one time step in minutes
+            training_horizon: length of the training horizon in weeks
+            model: name of the forecasting model
+            data: a `pd.Dataframe` as described above (i.e.,
+                with the six columns holding `float`s)
+
+        Returns:
+            forecasts: the `data` as `Forecast` objects
+        """  # noqa:RST215
+        forecasts = []
+
+        for timestamp_idx in data.index:
+            forecast = cls(
+                pixel=pixel,
+                start_at=timestamp_idx.to_pydatetime(),
+                time_step=time_step,
+                training_horizon=training_horizon,
+                model=model,
+                actual=int(data.loc[timestamp_idx, 'actual']),
+                prediction=round(data.loc[timestamp_idx, 'prediction'], 5),
+                low80=round(data.loc[timestamp_idx, 'low80'], 5),
+                high80=round(data.loc[timestamp_idx, 'high80'], 5),
+                low95=round(data.loc[timestamp_idx, 'low95'], 5),
+                high95=round(data.loc[timestamp_idx, 'high95'], 5),
+            )
+            forecasts.append(forecast)
+
+        return forecasts
+
+
+from urban_meal_delivery import db  # noqa:E402  isort:skip
diff --git a/tests/db/test_forecasts.py b/tests/db/test_forecasts.py
index 8cf9703..a2cd1bb 100644
--- a/tests/db/test_forecasts.py
+++ b/tests/db/test_forecasts.py
@@ -1,23 +1,35 @@
 """Test the ORM's `Forecast` model."""
 
-import datetime
+import datetime as dt
 
+import pandas as pd
 import pytest
 import sqlalchemy as sqla
 from sqlalchemy import exc as sa_exc
 
+from tests import config as test_config
 from urban_meal_delivery import db
 
 
+MODEL = 'hets'
+
+
 @pytest.fixture
 def forecast(pixel):
-    """A `forecast` made in the `pixel`."""
+    """A `forecast` made in the `pixel` at `NOON`."""
+    start_at = dt.datetime(
+        test_config.END.year,
+        test_config.END.month,
+        test_config.END.day,
+        test_config.NOON,
+    )
+
     return db.Forecast(
         pixel=pixel,
-        start_at=datetime.datetime(2020, 1, 1, 12, 0),
-        time_step=60,
-        training_horizon=8,
-        model='hets',
+        start_at=start_at,
+        time_step=test_config.LONG_TIME_STEP,
+        training_horizon=test_config.LONG_TRAIN_HORIZON,
+        model=MODEL,
         actual=12,
         prediction=12.3,
         low80=1.23,
@@ -76,7 +88,7 @@ class TestConstraints:
         self, db_session, forecast, hour,
     ):
         """Insert an instance with invalid data."""
-        forecast.start_at = datetime.datetime(
+        forecast.start_at = dt.datetime(
             forecast.start_at.year,
             forecast.start_at.month,
             forecast.start_at.day,
@@ -91,7 +103,7 @@ class TestConstraints:
 
     def test_invalid_start_at_not_quarter_of_hour(self, db_session, forecast):
         """Insert an instance with invalid data."""
-        forecast.start_at += datetime.timedelta(minutes=1)
+        forecast.start_at += dt.timedelta(minutes=1)
         db_session.add(forecast)
 
         with pytest.raises(
@@ -101,7 +113,7 @@ class TestConstraints:
 
     def test_invalid_start_at_seconds_set(self, db_session, forecast):
         """Insert an instance with invalid data."""
-        forecast.start_at += datetime.timedelta(seconds=1)
+        forecast.start_at += dt.timedelta(seconds=1)
         db_session.add(forecast)
 
         with pytest.raises(
@@ -111,7 +123,7 @@ class TestConstraints:
 
     def test_invalid_start_at_microseconds_set(self, db_session, forecast):
         """Insert an instance with invalid data."""
-        forecast.start_at += datetime.timedelta(microseconds=1)
+        forecast.start_at += dt.timedelta(microseconds=1)
         db_session.add(forecast)
 
         with pytest.raises(
@@ -419,3 +431,75 @@ class TestConstraints:
 
         with pytest.raises(sa_exc.IntegrityError, match='duplicate key value'):
             db_session.commit()
+
+
+class TestFromDataFrameConstructor:
+    """Test the alternative `Forecast.from_dataframe()` constructor."""
+
+    @pytest.fixture
+    def prediction_data(self):
+        """A `pd.DataFrame` as returned by `*Model.predict()` ...
+
+        ... and used as the `data` argument to `Forecast.from_dataframe()`.
+
+        We assume the `data` come from some vertical forecasting `*Model`
+        and contain several rows (= `3` in this example) corresponding
+        to different time steps centered around `NOON`.
+        """
+        noon_start_at = dt.datetime(
+            test_config.END.year,
+            test_config.END.month,
+            test_config.END.day,
+            test_config.NOON,
+        )
+
+        index = pd.Index(
+            [
+                noon_start_at - dt.timedelta(minutes=test_config.LONG_TIME_STEP),
+                noon_start_at,
+                noon_start_at + dt.timedelta(minutes=test_config.LONG_TIME_STEP),
+            ],
+        )
+        index.name = 'start_at'
+
+        return pd.DataFrame(
+            data={
+                'actual': (11, 12, 13),
+                'prediction': (11.3, 12.3, 13.3),
+                'low80': (1.123, 1.23, 1.323),
+                'high80': (112.34, 123.4, 132.34),
+                'low95': (0.1123, 0.123, 0.1323),
+                'high95': (1123.45, 1234.5, 1323.45),
+            },
+            index=index,
+        )
+
+    def test_convert_dataframe_into_orm_objects(self, pixel, prediction_data):
+        """Call `Forecast.from_dataframe()`."""
+        forecasts = db.Forecast.from_dataframe(
+            pixel=pixel,
+            time_step=test_config.LONG_TIME_STEP,
+            training_horizon=test_config.LONG_TRAIN_HORIZON,
+            model=MODEL,
+            data=prediction_data,
+        )
+
+        assert len(forecasts) == 3
+        for forecast in forecasts:
+            assert isinstance(forecast, db.Forecast)
+
+    @pytest.mark.db
+    def test_persist_predictions_into_database(
+        self, db_session, pixel, prediction_data,
+    ):
+        """Call `Forecast.from_dataframe()` and persist the results."""
+        forecasts = db.Forecast.from_dataframe(
+            pixel=pixel,
+            time_step=test_config.LONG_TIME_STEP,
+            training_horizon=test_config.LONG_TRAIN_HORIZON,
+            model=MODEL,
+            data=prediction_data,
+        )
+
+        db_session.add_all(forecasts)
+        db_session.commit()