Make Grid.gridify() use only pickup addresses

- ensure a `Restaurant` only has one unique `Order.pickup_address`
- rework `Grid.gridify()` so that only pickup addresses are assigned
  into `Pixel`s
- include database migrations to ensure the data adhere to these
  tighter constraints
This commit is contained in:
Alexander Hess 2021-01-24 18:57:44 +01:00
parent 0c1ff5338d
commit 1bfc7db916
Signed by: alexander
GPG key ID: 344EA5AB10D868E0
11 changed files with 519 additions and 61 deletions

View file

@ -0,0 +1,398 @@
"""Remove orders from restaurants with invalid location ...
... and also de-duplicate a couple of redundant addresses.
Revision: #e86290e7305e at 2021-01-23 15:56:59
Revises: #26711cd3f9b9
1) Remove orders
Some restaurants have orders to be picked up at an address that
not their primary address. That is ok if that address is the location
of a second franchise. However, for a small number of restaurants
there is only exactly one order at that other address that often is
located far away from the restaurant's primary location. It looks
like a restaurant signed up with some invalid location that was then
corrected into the primary one.
Use the following SQL statement to obtain a list of these locations
before this migration is run:
SELECT
orders.pickup_address_id,
COUNT(*) AS n_orders,
MIN(placed_at) as first_order_at,
MAX(placed_at) as last_order_at
FROM
{config.CLEAN_SCHEMA}.orders
LEFT OUTER JOIN
{config.CLEAN_SCHEMA}.restaurants
ON orders.restaurant_id = restaurants.id
WHERE
orders.pickup_address_id <> restaurants.address_id
GROUP BY
pickup_address_id;
50 orders with such weird pickup addresses are removed with this migration.
2) De-duplicate addresses
Five restaurants have two pickup addresses that are actually the same location.
The following SQL statement shows them before this migration is run:
SELECT
orders.restaurant_id,
restaurants.name,
restaurants.address_id AS primary_address_id,
addresses.id AS address_id,
addresses.street,
COUNT(*) AS n_orders
FROM
{config.CLEAN_SCHEMA}.orders
LEFT OUTER JOIN
{config.CLEAN_SCHEMA}.addresses ON orders.pickup_address_id = addresses.id
LEFT OUTER JOIN
{config.CLEAN_SCHEMA}.restaurants ON orders.restaurant_id = restaurants.id
WHERE
orders.restaurant_id IN (
SELECT
restaurant_id
FROM (
SELECT DISTINCT
restaurant_id,
pickup_address_id
FROM
{config.CLEAN_SCHEMA}.orders
) AS restaurant_locations
GROUP BY
restaurant_id
HAVING
COUNT(pickup_address_id) > 1
)
GROUP BY
orders.restaurant_id,
restaurants.name,
restaurants.address_id,
addresses.id,
addresses.street
ORDER BY
orders.restaurant_id,
restaurants.name,
restaurants.address_id,
addresses.id,
addresses.street;
3) Remove addresses without any association
After steps 1) and 2) some addresses are not associated with a restaurant any more.
The following SQL statement lists them before this migration is run:
SELECT
id,
street,
zip_code,
city
FROM
{config.CLEAN_SCHEMA}.addresses
WHERE
id NOT IN (
SELECT DISTINCT
pickup_address_id AS id
FROM
{config.CLEAN_SCHEMA}.orders
UNION
SELECT DISTINCT
delivery_address_id AS id
FROM
{config.CLEAN_SCHEMA}.orders
UNION
SELECT DISTINCT
address_id AS id
FROM
{config.CLEAN_SCHEMA}.restaurants
);
4) Ensure every `Restaurant` has exactly one `Address`.
Replace the current `ForeignKeyConstraint` to from `Order` to `Restaurant`
with one that also includes the `Order.pickup_address_id`.
"""
import os
from alembic import op
from urban_meal_delivery import configuration
revision = 'e86290e7305e'
down_revision = '26711cd3f9b9'
branch_labels = None
depends_on = None
config = configuration.make_config('testing' if os.getenv('TESTING') else 'production')
def upgrade():
"""Upgrade to revision e86290e7305e."""
# 1) Remove orders
op.execute(
f"""
DELETE
FROM
{config.CLEAN_SCHEMA}.orders
WHERE pickup_address_id IN (
SELECT
orders.pickup_address_id
FROM
{config.CLEAN_SCHEMA}.orders
LEFT OUTER JOIN
{config.CLEAN_SCHEMA}.restaurants
ON orders.restaurant_id = restaurants.id
WHERE
orders.pickup_address_id <> restaurants.address_id
GROUP BY
orders.pickup_address_id
HAVING
COUNT(*) = 1
);
""",
)
# 2) De-duplicate addresses
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.orders
SET
pickup_address_id = 353
WHERE
pickup_address_id = 548916;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.orders
SET
pickup_address_id = 4850
WHERE
pickup_address_id = 6415;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.orders
SET
pickup_address_id = 16227
WHERE
pickup_address_id = 44627;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.orders
SET
pickup_address_id = 44458
WHERE
pickup_address_id = 534543;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.orders
SET
pickup_address_id = 289997
WHERE
pickup_address_id = 309525;
""",
)
# 3) Remove addresses
op.execute(
f"""
DELETE
FROM
{config.CLEAN_SCHEMA}.addresses_pixels
WHERE
address_id NOT IN (
SELECT DISTINCT
pickup_address_id AS id
FROM
{config.CLEAN_SCHEMA}.orders
UNION
SELECT DISTINCT
delivery_address_id AS id
FROM
{config.CLEAN_SCHEMA}.orders
UNION
SELECT DISTINCT
address_id AS id
FROM
{config.CLEAN_SCHEMA}.restaurants
);
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.addresses
SET
primary_id = 302883
WHERE
primary_id = 43526;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.addresses
SET
primary_id = 47597
WHERE
primary_id = 43728;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.addresses
SET
primary_id = 159631
WHERE
primary_id = 43942;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.addresses
SET
primary_id = 275651
WHERE
primary_id = 44759;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.addresses
SET
primary_id = 156685
WHERE
primary_id = 50599;
""",
)
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.addresses
SET
primary_id = 480206
WHERE
primary_id = 51774;
""",
)
op.execute(
f"""
DELETE
FROM
{config.CLEAN_SCHEMA}.addresses
WHERE
id NOT IN (
SELECT DISTINCT
pickup_address_id AS id
FROM
{config.CLEAN_SCHEMA}.orders
UNION
SELECT DISTINCT
delivery_address_id AS id
FROM
{config.CLEAN_SCHEMA}.orders
UNION
SELECT DISTINCT
address_id AS id
FROM
{config.CLEAN_SCHEMA}.restaurants
);
""",
)
# 4) Ensure every `Restaurant` has only one `Order.pickup_address`.
op.execute(
f"""
UPDATE
{config.CLEAN_SCHEMA}.orders
SET
pickup_address_id = 53733
WHERE
pickup_address_id = 54892;
""",
)
op.execute(
f"""
DELETE
FROM
{config.CLEAN_SCHEMA}.addresses
WHERE
id = 54892;
""",
)
op.create_unique_constraint(
'uq_restaurants_on_id_address_id',
'restaurants',
['id', 'address_id'],
schema=config.CLEAN_SCHEMA,
)
op.create_foreign_key(
op.f('fk_orders_to_restaurants_via_restaurant_id_pickup_address_id'),
'orders',
'restaurants',
['restaurant_id', 'pickup_address_id'],
['id', 'address_id'],
source_schema=config.CLEAN_SCHEMA,
referent_schema=config.CLEAN_SCHEMA,
onupdate='RESTRICT',
ondelete='RESTRICT',
)
op.drop_constraint(
'fk_orders_to_restaurants_via_restaurant_id',
'orders',
type_='foreignkey',
schema=config.CLEAN_SCHEMA,
)
def downgrade():
"""Downgrade to revision 26711cd3f9b9."""
op.create_foreign_key(
op.f('fk_orders_to_restaurants_via_restaurant_id'),
'orders',
'restaurants',
['restaurant_id'],
['id'],
source_schema=config.CLEAN_SCHEMA,
referent_schema=config.CLEAN_SCHEMA,
onupdate='RESTRICT',
ondelete='RESTRICT',
)
op.drop_constraint(
'fk_orders_to_restaurants_via_restaurant_id_pickup_address_id',
'orders',
type_='foreignkey',
schema=config.CLEAN_SCHEMA,
)
op.drop_constraint(
'uq_restaurants_on_id_address_id',
'restaurants',
type_='unique',
schema=config.CLEAN_SCHEMA,
)

View file

@ -121,6 +121,8 @@ per-file-ignores =
migrations/versions/*.py:
# Type annotations are not strictly enforced.
ANN0, ANN2,
# Do not worry about SQL injection here.
S608,
# File names of revisions are ok.
WPS114,WPS118,
# Revisions may have too many expressions.

View file

@ -8,7 +8,7 @@ from urban_meal_delivery.console import decorators
@click.command()
@decorators.db_revision('888e352d7526')
@decorators.db_revision('e86290e7305e')
def gridify() -> None: # pragma: no cover note:b1f68d24
"""Create grids for all cities.

View file

@ -52,7 +52,7 @@ class Address(meta.Base):
# Relationships
city = orm.relationship('City', back_populates='addresses')
restaurant = orm.relationship('Restaurant', back_populates='address', uselist=False)
restaurants = orm.relationship('Restaurant', back_populates='address')
orders_picked_up = orm.relationship(
'Order',
back_populates='pickup_address',

View file

@ -54,11 +54,12 @@ class Grid(meta.Base):
return round((self.side_length ** 2) / 1_000_000, 1)
@classmethod
def gridify(cls, city: db.City, side_length: int) -> db.Grid:
def gridify(cls, city: db.City, side_length: int) -> db.Grid: # noqa:WPS210
"""Create a fully populated `Grid` for a `city`.
The `Grid` contains only `Pixel`s that have at least one `Address`.
`Address` objects outside the `city`'s viewport are discarded.
The `Grid` contains only `Pixel`s that have at least one
`Order.pickup_address`. `Address` objects outside the `.city`'s
viewport are discarded.
Args:
city: city for which the grid is created
@ -72,7 +73,14 @@ class Grid(meta.Base):
# `Pixel`s grouped by `.n_x`-`.n_y` coordinates.
pixels = {}
for address in city.addresses:
pickup_addresses = ( # noqa:ECE:001
db.session.query(db.Address)
.join(db.Order, db.Address.id == db.Order.pickup_address_id)
.filter(db.Address.city == city)
.all()
)
for address in pickup_addresses:
# Check if an `address` is not within the `city`'s viewport, ...
not_within_city_viewport = (
address.x < 0

View file

@ -79,12 +79,6 @@ class Order(meta.Base): # noqa:WPS214
sa.ForeignKeyConstraint(
['customer_id'], ['customers.id'], onupdate='RESTRICT', ondelete='RESTRICT',
),
sa.ForeignKeyConstraint(
['restaurant_id'],
['restaurants.id'],
onupdate='RESTRICT',
ondelete='RESTRICT',
),
sa.ForeignKeyConstraint(
['courier_id'], ['couriers.id'], onupdate='RESTRICT', ondelete='RESTRICT',
),
@ -94,6 +88,14 @@ class Order(meta.Base): # noqa:WPS214
onupdate='RESTRICT',
ondelete='RESTRICT',
),
sa.ForeignKeyConstraint(
# This foreign key ensures that there is only
# one `.pickup_address` per `.restaurant`
['restaurant_id', 'pickup_address_id'],
['restaurants.id', 'restaurants.address_id'],
onupdate='RESTRICT',
ondelete='RESTRICT',
),
sa.ForeignKeyConstraint(
['delivery_address_id'],
['addresses.id'],
@ -302,7 +304,11 @@ class Order(meta.Base): # noqa:WPS214
# Relationships
customer = orm.relationship('Customer', back_populates='orders')
restaurant = orm.relationship('Restaurant', back_populates='orders')
restaurant = orm.relationship(
'Restaurant',
back_populates='orders',
primaryjoin='Restaurant.id == Order.restaurant_id',
)
courier = orm.relationship('Courier', back_populates='orders')
pickup_address = orm.relationship(
'Address',

View file

@ -12,7 +12,7 @@ class Pixel(meta.Base):
Square pixels aggregate `Address` objects within a `City`.
Every `Address` belongs to exactly one `Pixel` in a `Grid`.
Every `Pixel` has a unique "coordinate" within the `Grid`.
Every `Pixel` has a unique `n_x`-`n_y` coordinate within the `Grid`.
"""
__tablename__ = 'pixels'

View file

@ -34,10 +34,12 @@ class Restaurant(meta.Base):
'0 <= estimated_prep_duration AND estimated_prep_duration <= 2400',
name='realistic_estimated_prep_duration',
),
# Needed by a `ForeignKeyConstraint` in `Order`.
sa.UniqueConstraint('id', 'address_id'),
)
# Relationships
address = orm.relationship('Address', back_populates='restaurant')
address = orm.relationship('Address', back_populates='restaurants')
orders = orm.relationship('Order', back_populates='restaurant')
def __repr__(self) -> str:

View file

@ -8,24 +8,31 @@ from urban_meal_delivery.console import gridify
@pytest.mark.db
def test_four_pixels_with_two_addresses(
cli, db_session, monkeypatch, city, make_address,
def test_two_pixels_with_two_addresses( # noqa:WPS211
cli, db_session, monkeypatch, city, make_address, make_restaurant, make_order,
):
"""Two `Address` objects in distinct `Pixel` objects.
This is roughly the same test case as
`tests.db.test_grids.test_four_pixels_with_two_addresses`.
`tests.db.test_grids.test_two_pixels_with_two_addresses`.
The difference is that the result is written to the database.
"""
# Create two `Address` objects in distinct `Pixel`s.
city.addresses = [
# One `Address` in the lower-left `Pixel`, ...
make_address(latitude=48.8357377, longitude=2.2517412),
# ... and another one in the upper-right one.
make_address(latitude=48.8898312, longitude=2.4357622),
]
# One `Address` in the lower-left `Pixel`, ...
address1 = make_address(latitude=48.8357377, longitude=2.2517412)
# ... and another one in the upper-right one.
address2 = make_address(latitude=48.8898312, longitude=2.4357622)
db_session.add(city)
# Locate a `Restaurant` at the two `Address` objects and
# place one `Order` for each of them so that the `Address`
# objects are used as `Order.pickup_address`s.
restaurant1 = make_restaurant(address=address1)
restaurant2 = make_restaurant(address=address2)
order1 = make_order(restaurant=restaurant1)
order2 = make_order(restaurant=restaurant2)
db_session.add(order1)
db_session.add(order2)
db_session.commit()
side_length = max(city.total_x // 2, city.total_y // 2) + 1

View file

@ -74,18 +74,30 @@ class TestProperties:
class TestGridification:
"""Test the `Grid.gridify()` constructor."""
@pytest.mark.no_cover
def test_one_pixel_without_addresses(self, city):
"""At the very least, there must be one `Pixel` ...
@pytest.fixture
def addresses_mock(self, mocker, monkeypatch):
"""A `Mock` whose `.return_value` are to be set ...
... if the `side_length` is greater than both the
horizontal and vertical distances of the viewport.
... to the addresses that are gridified. The addresses are
all considered `Order.pickup_address` attributes for some orders.
"""
mock = mocker.Mock()
query = ( # noqa:ECE001
mock.query.return_value.join.return_value.filter.return_value.all # noqa:E501,WPS219
)
monkeypatch.setattr(db, 'session', mock)
return query
@pytest.mark.no_cover
def test_no_pixel_without_addresses(self, city, addresses_mock):
"""Without orders, there are no `Pixel` objects on the `grid`.
This test case skips the `for`-loop inside `Grid.gridify()`.
Interestingly, coverage.py does not see this.
"""
city.addresses = []
addresses_mock.return_value = []
# The chosen `side_length` would result in one `Pixel` if there were orders.
# `+1` as otherwise there would be a second pixel in one direction.
side_length = max(city.total_x, city.total_y) + 1
@ -94,13 +106,13 @@ class TestGridification:
assert isinstance(result, db.Grid)
assert len(result.pixels) == 0 # noqa:WPS507
def test_one_pixel_with_one_address(self, city, address):
def test_one_pixel_with_one_address(self, city, order, addresses_mock):
"""At the very least, there must be one `Pixel` ...
... if the `side_length` is greater than both the
horizontal and vertical distances of the viewport.
"""
city.addresses = [address]
addresses_mock.return_value = [order.pickup_address]
# `+1` as otherwise there would be a second pixel in one direction.
side_length = max(city.total_x, city.total_y) + 1
@ -110,7 +122,7 @@ class TestGridification:
assert isinstance(result, db.Grid)
assert len(result.pixels) == 1
def test_one_pixel_with_two_addresses(self, city, make_address):
def test_one_pixel_with_two_addresses(self, city, make_order, addresses_mock):
"""At the very least, there must be one `Pixel` ...
... if the `side_length` is greater than both the
@ -119,7 +131,8 @@ class TestGridification:
This test case is necessary as `test_one_pixel_with_one_address`
does not have to re-use an already created `Pixel` object internally.
"""
city.addresses = [make_address(), make_address()]
orders = [make_order(), make_order()]
addresses_mock.return_value = [order.pickup_address for order in orders]
# `+1` as otherwise there would be a second pixel in one direction.
side_length = max(city.total_x, city.total_y) + 1
@ -129,12 +142,11 @@ class TestGridification:
assert isinstance(result, db.Grid)
assert len(result.pixels) == 1
def test_one_pixel_with_address_too_far_south(self, city, address):
def test_no_pixel_with_one_address_too_far_south(self, city, order, addresses_mock):
"""An `address` outside the `city`'s viewport is discarded."""
# Move the `address` just below `city.southwest`.
address.latitude = city.southwest.latitude - 0.1
city.addresses = [address]
order.pickup_address.latitude = city.southwest.latitude - 0.1
addresses_mock.return_value = [order.pickup_address]
# `+1` as otherwise there would be a second pixel in one direction.
side_length = max(city.total_x, city.total_y) + 1
@ -145,16 +157,15 @@ class TestGridification:
assert len(result.pixels) == 0 # noqa:WPS507
@pytest.mark.no_cover
def test_one_pixel_with_address_too_far_west(self, city, address):
def test_no_pixel_with_one_address_too_far_west(self, city, order, addresses_mock):
"""An `address` outside the `city`'s viewport is discarded.
This test is a logical sibling to `test_one_pixel_with_address_too_far_south`
and therefore redundant.
This test is a logical sibling to
`test_no_pixel_with_one_address_too_far_south` and therefore redundant.
"""
# Move the `address` just left to `city.southwest`.
address.longitude = city.southwest.longitude - 0.1
city.addresses = [address]
order.pickup_address.longitude = city.southwest.longitude - 0.1
addresses_mock.return_value = [order.pickup_address]
# `+1` as otherwise there would be a second pixel in one direction.
side_length = max(city.total_x, city.total_y) + 1
@ -165,13 +176,13 @@ class TestGridification:
assert len(result.pixels) == 0 # noqa:WPS507
@pytest.mark.no_cover
def test_four_pixels_with_two_addresses(self, city, make_address):
def test_two_pixels_with_two_addresses(self, city, make_address, addresses_mock):
"""Two `Address` objects in distinct `Pixel` objects.
This test is more of a sanity check.
"""
# Create two `Address` objects in distinct `Pixel`s.
city.addresses = [
addresses_mock.return_value = [
# One `Address` in the lower-left `Pixel`, ...
make_address(latitude=48.8357377, longitude=2.2517412),
# ... and another one in the upper-right one.
@ -194,7 +205,9 @@ class TestGridification:
@pytest.mark.db
@pytest.mark.no_cover
@pytest.mark.parametrize('side_length', [250, 500, 1_000, 2_000, 4_000, 8_000])
def test_make_random_grids(self, db_session, city, make_address, side_length):
def test_make_random_grids( # noqa:WPS211
self, db_session, city, make_address, make_restaurant, make_order, side_length,
):
"""With 100 random `Address` objects, a grid must have ...
... between 1 and a deterministic upper bound of `Pixel` objects.
@ -202,7 +215,10 @@ class TestGridification:
This test creates confidence that the created `Grid`
objects adhere to the database constraints.
"""
city.addresses = [make_address() for _ in range(100)]
addresses = [make_address() for _ in range(100)]
restaurants = [make_restaurant(address=address) for address in addresses]
orders = [make_order(restaurant=restaurant) for restaurant in restaurants]
db_session.add_all(orders)
n_pixels_x = (city.total_x // side_length) + 1
n_pixels_y = (city.total_y // side_length) + 1

View file

@ -17,16 +17,34 @@ class TestAggregateOrders:
"""
@pytest.fixture
def one_pixel_grid(self, db_session, city, restaurant):
def addresses_mock(self, mocker, monkeypatch):
"""A `Mock` whose `.return_value` are to be set ...
... to the addresses that are gridified. The addresses are
all considered `Order.pickup_address` attributes for some orders.
Note: This fixture also exists in `tests.db.test_grids`.
"""
mock = mocker.Mock()
query = ( # noqa:ECE001
mock.query.return_value.join.return_value.filter.return_value.all # noqa:E501,WPS219
)
monkeypatch.setattr(db, 'session', mock)
return query
@pytest.fixture
def one_pixel_grid(self, db_session, city, restaurant, addresses_mock):
"""A persisted `Grid` with one `Pixel`.
`restaurant` must be a dependency as otherwise
its `.address` is not put into the database.
`restaurant` must be a dependency as otherwise the `restaurant.address`
is not put into the database as an `Order.pickup_address`.
"""
addresses_mock.return_value = [restaurant.address]
# `+1` as otherwise there would be a second pixel in one direction.
side_length = max(city.total_x, city.total_y) + 1
grid = db.Grid.gridify(city=city, side_length=side_length)
db_session.add(grid)
assert len(grid.pixels) == 1 # sanity check
@ -272,17 +290,17 @@ class TestAggregateOrders:
assert result['total_orders'].sum() == 18
@pytest.fixture
def two_pixel_grid(self, db_session, city, make_address, make_restaurant):
"""A persisted `Grid` with two `Pixel` objects.
`restaurant` must be a dependency as otherwise
its `.address` is not put into the database.
"""
def two_pixel_grid( # noqa:WPS211
self, db_session, city, make_address, make_restaurant, addresses_mock,
):
"""A persisted `Grid` with two `Pixel` objects."""
# One `Address` in the lower-left `Pixel`, ...
address1 = make_address(latitude=48.8357377, longitude=2.2517412)
# ... and another one in the upper-right one.
address2 = make_address(latitude=48.8898312, longitude=2.4357622)
addresses_mock.return_value = [address1, address2]
# Create `Restaurant`s at the two addresses.
make_restaurant(address=address1)
make_restaurant(address=address2)
@ -307,7 +325,8 @@ class TestAggregateOrders:
In total, there are 30 orders.
"""
address1, address2 = two_pixel_grid.city.addresses
restaurant1, restaurant2 = address1.restaurant, address2.restaurant
# Rarely, an `Address` may have several `Restaurant`s in the real dataset.
restaurant1, restaurant2 = address1.restaurants[0], address2.restaurants[0]
# Create one order every other hour for `restaurant1`.
for hour in range(11, 23, 2):