114 lines
3.9 KiB
Python
114 lines
3.9 KiB
Python
|
|
"""Obtain and work with time series data."""
|
||
|
|
|
||
|
|
import datetime
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
from urban_meal_delivery import config
|
||
|
|
from urban_meal_delivery import db
|
||
|
|
|
||
|
|
|
||
|
|
def aggregate_orders(grid: db.Grid, time_step: int) -> pd.DataFrame: # pragma: no cover
|
||
|
|
"""Obtain a time series of the ad-hoc `Order` totals.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
grid: pixel grid used to aggregate orders spatially
|
||
|
|
time_step: interval length (in minutes) into which orders are aggregated
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
order_totals: `DataFrame` with a `MultiIndex` of the "pixel_id"s and
|
||
|
|
beginnings of the intervals (i.e., "start_at"s); the sole column
|
||
|
|
with data is "total_orders"
|
||
|
|
"""
|
||
|
|
# `data` is probably missing "pixel_id"-"start_at" pairs.
|
||
|
|
# This happens whenever there is no demand in the `Pixel` in the given `time_step`.
|
||
|
|
data = pd.read_sql_query(
|
||
|
|
f"""-- # noqa:WPS221
|
||
|
|
SELECT
|
||
|
|
pixel_id,
|
||
|
|
start_at,
|
||
|
|
COUNT(*) AS total_orders
|
||
|
|
FROM (
|
||
|
|
SELECT
|
||
|
|
pixel_id,
|
||
|
|
placed_at_without_seconds - minutes_to_be_cut AS start_at
|
||
|
|
FROM (
|
||
|
|
SELECT
|
||
|
|
pixels.pixel_id,
|
||
|
|
DATE_TRUNC('MINUTE', orders.placed_at) AS placed_at_without_seconds,
|
||
|
|
((
|
||
|
|
EXTRACT(MINUTES FROM orders.placed_at)::INTEGER % {time_step}
|
||
|
|
)::TEXT || ' MINUTES')::INTERVAL
|
||
|
|
AS minutes_to_be_cut
|
||
|
|
FROM (
|
||
|
|
SELECT
|
||
|
|
id,
|
||
|
|
placed_at,
|
||
|
|
pickup_address_id
|
||
|
|
FROM
|
||
|
|
{config.CLEAN_SCHEMA}.orders
|
||
|
|
INNER JOIN (
|
||
|
|
SELECT
|
||
|
|
id AS address_id
|
||
|
|
FROM
|
||
|
|
{config.CLEAN_SCHEMA}.addresses
|
||
|
|
WHERE
|
||
|
|
city_id = {grid.city.id}
|
||
|
|
) AS in_city
|
||
|
|
ON orders.pickup_address_id = in_city.address_id
|
||
|
|
WHERE
|
||
|
|
ad_hoc IS TRUE
|
||
|
|
) AS
|
||
|
|
orders
|
||
|
|
INNER JOIN (
|
||
|
|
SELECT
|
||
|
|
address_id,
|
||
|
|
pixel_id
|
||
|
|
FROM
|
||
|
|
{config.CLEAN_SCHEMA}.addresses_pixels
|
||
|
|
WHERE
|
||
|
|
grid_id = {grid.id}
|
||
|
|
AND
|
||
|
|
city_id = {grid.city.id} -- city_id is redundant -> sanity check
|
||
|
|
) AS pixels
|
||
|
|
ON orders.pickup_address_id = pixels.address_id
|
||
|
|
) AS placed_at_aggregated_into_start_at
|
||
|
|
) AS pixel_start_at_combinations
|
||
|
|
GROUP BY
|
||
|
|
pixel_id,
|
||
|
|
start_at
|
||
|
|
ORDER BY
|
||
|
|
pixel_id,
|
||
|
|
start_at;
|
||
|
|
""",
|
||
|
|
con=db.connection,
|
||
|
|
index_col=['pixel_id', 'start_at'],
|
||
|
|
)
|
||
|
|
|
||
|
|
if data.empty:
|
||
|
|
return data
|
||
|
|
|
||
|
|
# Calculate the first and last "start_at" value ...
|
||
|
|
start_day = data.index.levels[1].min().date()
|
||
|
|
start = datetime.datetime(
|
||
|
|
start_day.year, start_day.month, start_day.day, config.SERVICE_START,
|
||
|
|
)
|
||
|
|
end_day = data.index.levels[1].max().date()
|
||
|
|
end = datetime.datetime(
|
||
|
|
end_day.year, end_day.month, end_day.day, config.SERVICE_END,
|
||
|
|
)
|
||
|
|
|
||
|
|
# ... and all possible `tuple`s of "pixel_id"-"start_at" combinations.
|
||
|
|
# The "start_at" values must lie within the operating hours.
|
||
|
|
gen = (
|
||
|
|
(pixel_id, start_at)
|
||
|
|
for pixel_id in sorted(data.index.levels[0])
|
||
|
|
for start_at in pd.date_range(start, end, freq=f'{time_step}T')
|
||
|
|
if config.SERVICE_START <= start_at.time().hour < config.SERVICE_END
|
||
|
|
)
|
||
|
|
|
||
|
|
# Re-index `data` filling in `0`s where there is no demand.
|
||
|
|
index = pd.MultiIndex.from_tuples(gen)
|
||
|
|
index.names = ['pixel_id', 'start_at']
|
||
|
|
|
||
|
|
return data.reindex(index, fill_value=0)
|