Add DistanceMatrix class

- the class stores the data of a distance matrix between all addresses
  + air distances
  + bicycle distances
- in addition, the "path" returned by the Google Directions API are
  also stored as a JSON serialized sequence of latitude-longitude pairs
- we assume a symmetric graph
This commit is contained in:
Alexander Hess 2021-03-02 14:36:07 +01:00
parent 28368cc30a
commit cc75307e5a
Signed by: alexander
GPG key ID: 344EA5AB10D868E0
7 changed files with 442 additions and 2 deletions

View file

@ -0,0 +1,96 @@
"""Add distance matrix.
Revision: #b4dd0b8903a5 at 2021-03-01 16:14:06
Revises: #8bfb928a31f8
"""
import os
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from urban_meal_delivery import configuration
revision = 'b4dd0b8903a5'
down_revision = '8bfb928a31f8'
branch_labels = None
depends_on = None
config = configuration.make_config('testing' if os.getenv('TESTING') else 'production')
def upgrade():
"""Upgrade to revision b4dd0b8903a5."""
op.create_table(
'addresses_addresses',
sa.Column('first_address_id', sa.Integer(), nullable=False),
sa.Column('second_address_id', sa.Integer(), nullable=False),
sa.Column('city_id', sa.SmallInteger(), nullable=False),
sa.Column('air_distance', sa.Integer(), nullable=False),
sa.Column('bicycle_distance', sa.Integer(), nullable=True),
sa.Column('bicycle_duration', sa.Integer(), nullable=True),
sa.Column('directions', postgresql.JSON(), nullable=True),
sa.PrimaryKeyConstraint(
'first_address_id',
'second_address_id',
name=op.f('pk_addresses_addresses'),
),
sa.ForeignKeyConstraint(
['first_address_id', 'city_id'],
[
f'{config.CLEAN_SCHEMA}.addresses.id',
f'{config.CLEAN_SCHEMA}.addresses.city_id',
],
name=op.f(
'fk_addresses_addresses_to_addresses_via_first_address_id_city_id',
),
onupdate='RESTRICT',
ondelete='RESTRICT',
),
sa.ForeignKeyConstraint(
['second_address_id', 'city_id'],
[
f'{config.CLEAN_SCHEMA}.addresses.id',
f'{config.CLEAN_SCHEMA}.addresses.city_id',
],
name=op.f(
'fk_addresses_addresses_to_addresses_via_second_address_id_city_id',
),
onupdate='RESTRICT',
ondelete='RESTRICT',
),
sa.UniqueConstraint(
'first_address_id',
'second_address_id',
name=op.f('uq_addresses_addresses_on_first_address_id_second_address_id'),
),
sa.CheckConstraint(
'first_address_id < second_address_id',
name=op.f('ck_addresses_addresses_on_distances_are_symmetric_for_bicycles'),
),
sa.CheckConstraint(
'0 <= air_distance AND air_distance < 20000',
name=op.f('ck_addresses_addresses_on_realistic_air_distance'),
),
sa.CheckConstraint(
'bicycle_distance < 25000',
name=op.f('ck_addresses_addresses_on_realistic_bicycle_distance'),
),
sa.CheckConstraint(
'air_distance <= bicycle_distance',
name=op.f('ck_addresses_addresses_on_air_distance_is_shortest'),
),
sa.CheckConstraint(
'0 <= bicycle_duration AND bicycle_duration <= 3600',
name=op.f('ck_addresses_addresses_on_realistic_bicycle_travel_time'),
),
schema=config.CLEAN_SCHEMA,
)
def downgrade():
"""Downgrade to revision 8bfb928a31f8."""
op.drop_table('addresses_addresses', schema=config.CLEAN_SCHEMA)

View file

@ -1,6 +1,7 @@
"""Provide the ORM models and a connection to the database.""" """Provide the ORM models and a connection to the database."""
from urban_meal_delivery.db.addresses import Address from urban_meal_delivery.db.addresses import Address
from urban_meal_delivery.db.addresses_addresses import DistanceMatrix
from urban_meal_delivery.db.addresses_pixels import AddressPixelAssociation from urban_meal_delivery.db.addresses_pixels import AddressPixelAssociation
from urban_meal_delivery.db.cities import City from urban_meal_delivery.db.cities import City
from urban_meal_delivery.db.connection import connection from urban_meal_delivery.db.connection import connection

View file

@ -57,6 +57,16 @@ class Address(meta.Base):
# Relationships # Relationships
city = orm.relationship('City', back_populates='addresses') city = orm.relationship('City', back_populates='addresses')
_distances1 = orm.relationship(
'DistanceMatrix',
back_populates='first_address',
foreign_keys='[DistanceMatrix.first_address_id, DistanceMatrix.city_id]',
)
_distances2 = orm.relationship(
'DistanceMatrix',
back_populates='second_address',
foreign_keys='[DistanceMatrix.second_address_id, DistanceMatrix.city_id]',
)
restaurants = orm.relationship('Restaurant', back_populates='address') restaurants = orm.relationship('Restaurant', back_populates='address')
orders_picked_up = orm.relationship( orders_picked_up = orm.relationship(
'Order', 'Order',

View file

@ -0,0 +1,114 @@
"""Model for the relationship between two `Address` objects (= distance matrix)."""
import json
from typing import List
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.dialects import postgresql
from urban_meal_delivery.db import meta
from urban_meal_delivery.db import utils
class DistanceMatrix(meta.Base):
"""Distance matrix between `Address` objects.
Models the pairwise distances between two `Address` objects,
including directions for a `Courier` to get from one `Address` to another.
As the couriers are on bicycles, we model the distance matrix
as a symmetric graph (i.e., same distance in both directions).
Implements an association pattern between `Address` and `Address`.
Further info:
https://docs.sqlalchemy.org/en/stable/orm/basic_relationships.html#association-object # noqa:E501
"""
__tablename__ = 'addresses_addresses'
# Columns
first_address_id = sa.Column(sa.Integer, primary_key=True)
second_address_id = sa.Column(sa.Integer, primary_key=True)
city_id = sa.Column(sa.SmallInteger, nullable=False)
# Distances are measured in meters.
air_distance = sa.Column(sa.Integer, nullable=False)
bicycle_distance = sa.Column(sa.Integer, nullable=True)
# The duration is measured in seconds.
bicycle_duration = sa.Column(sa.Integer, nullable=True)
# An array of latitude-longitude pairs approximating a courier's way.
directions = sa.Column(postgresql.JSON, nullable=True)
# Constraints
__table_args__ = (
# The two `Address` objects must be in the same `.city`.
sa.ForeignKeyConstraint(
['first_address_id', 'city_id'],
['addresses.id', 'addresses.city_id'],
onupdate='RESTRICT',
ondelete='RESTRICT',
),
sa.ForeignKeyConstraint(
['second_address_id', 'city_id'],
['addresses.id', 'addresses.city_id'],
onupdate='RESTRICT',
ondelete='RESTRICT',
),
# Each `Address`-`Address` pair only has one distance.
sa.UniqueConstraint('first_address_id', 'second_address_id'),
sa.CheckConstraint(
'first_address_id < second_address_id',
name='distances_are_symmetric_for_bicycles',
),
sa.CheckConstraint(
'0 <= air_distance AND air_distance < 20000', name='realistic_air_distance',
),
sa.CheckConstraint(
'bicycle_distance < 25000', # `.bicycle_distance` may not be negatative
name='realistic_bicycle_distance', # due to the constraint below.
),
sa.CheckConstraint(
'air_distance <= bicycle_distance', name='air_distance_is_shortest',
),
sa.CheckConstraint(
'0 <= bicycle_duration AND bicycle_duration <= 3600',
name='realistic_bicycle_travel_time',
),
)
# Relationships
first_address = orm.relationship(
'Address',
back_populates='_distances1',
foreign_keys='[DistanceMatrix.first_address_id, DistanceMatrix.city_id]',
)
second_address = orm.relationship(
'Address',
back_populates='_distances2',
foreign_keys='[DistanceMatrix.second_address_id, DistanceMatrix.city_id]',
)
# We do not implement a `.__init__()` method and leave that to SQLAlchemy.
# Instead, we use `hasattr()` to check for uninitialized attributes. grep:86ffc14e
@property
def path(self) -> List[utils.Location]:
"""The couriers' path from `.first_address` to `.second_address`.
The returned `Location`s all relates to `.first_address.city.southwest`.
Implementation detail: This property is cached as none of the
underlying attributes (i.e., `.directions`) are to be changed.
"""
if not hasattr(self, '_path'): # noqa:WPS421 note:86ffc14e
inner_points = [
utils.Location(point[0], point[1])
for point in json.loads(self.directions)
]
for point in inner_points:
point.relate_to(self.first_address.city.southwest)
self._path = inner_points
return self._path

View file

@ -10,7 +10,7 @@ class AddressPixelAssociation(meta.Base):
"""Association pattern between `Address` and `Pixel`. """Association pattern between `Address` and `Pixel`.
This approach is needed here mainly because it implicitly This approach is needed here mainly because it implicitly
updates the `_city_id` and `_grid_id` columns. updates the `city_id` and `grid_id` columns.
Further info: Further info:
https://docs.sqlalchemy.org/en/stable/orm/basic_relationships.html#association-object # noqa:E501 https://docs.sqlalchemy.org/en/stable/orm/basic_relationships.html#association-object # noqa:E501

View file

@ -0,0 +1,219 @@
"""Test the ORM's `DistanceMatrix` model."""
import json
import pytest
import sqlalchemy as sqla
from geopy import distance
from sqlalchemy import exc as sa_exc
from urban_meal_delivery import db
from urban_meal_delivery.db import utils
@pytest.fixture
def another_address(make_address):
"""Another `Address` object in the `city`."""
return make_address()
@pytest.fixture
def assoc(address, another_address, make_address):
"""An association between `address` and `another_address`."""
air_distance = distance.great_circle( # noqa:WPS317
(address.latitude, address.longitude),
(another_address.latitude, another_address.longitude),
).meters
# We put 5 latitude-longitude pairs as the "path" from
# `.first_address` to `.second_address`.
directions = json.dumps(
[
(float(addr.latitude), float(addr.longitude))
for addr in (make_address() for _ in range(5)) # noqa:WPS335
],
)
return db.DistanceMatrix(
first_address=address,
second_address=another_address,
air_distance=round(air_distance),
bicycle_distance=round(1.25 * air_distance),
bicycle_duration=300,
directions=directions,
)
class TestSpecialMethods:
"""Test special methods in `DistanceMatrix`."""
def test_create_an_address_address_association(self, assoc):
"""Test instantiation of a new `DistanceMatrix` object."""
assert assoc is not None
@pytest.mark.db
@pytest.mark.no_cover
class TestConstraints:
"""Test the database constraints defined in `DistanceMatrix`."""
def test_insert_into_database(self, db_session, assoc):
"""Insert an instance into the (empty) database."""
assert db_session.query(db.DistanceMatrix).count() == 0
db_session.add(assoc)
db_session.commit()
assert db_session.query(db.DistanceMatrix).count() == 1
def test_delete_a_referenced_first_address(self, db_session, assoc):
"""Remove a record that is referenced with a FK."""
db_session.add(assoc)
db_session.commit()
# Must delete without ORM as otherwise an UPDATE statement is emitted.
stmt = sqla.delete(db.Address).where(db.Address.id == assoc.first_address.id)
with pytest.raises(
sa_exc.IntegrityError,
match='fk_addresses_addresses_to_addresses_via_first_address', # shortened
):
db_session.execute(stmt)
def test_delete_a_referenced_second_address(self, db_session, assoc):
"""Remove a record that is referenced with a FK."""
db_session.add(assoc)
db_session.commit()
# Must delete without ORM as otherwise an UPDATE statement is emitted.
stmt = sqla.delete(db.Address).where(db.Address.id == assoc.second_address.id)
with pytest.raises(
sa_exc.IntegrityError,
match='fk_addresses_addresses_to_addresses_via_second_address', # shortened
):
db_session.execute(stmt)
def test_reference_an_invalid_city(self, db_session, address, another_address):
"""Insert a record with an invalid foreign key."""
db_session.add(address)
db_session.add(another_address)
db_session.commit()
# Must insert without ORM as otherwise SQLAlchemy figures out
# that something is wrong before any query is sent to the database.
stmt = sqla.insert(db.DistanceMatrix).values(
first_address_id=address.id,
second_address_id=another_address.id,
city_id=999,
air_distance=123,
)
with pytest.raises(
sa_exc.IntegrityError,
match='fk_addresses_addresses_to_addresses_via_first_address', # shortened
):
db_session.execute(stmt)
def test_redundant_addresses(self, db_session, assoc):
"""Insert a record that violates a unique constraint."""
db_session.add(assoc)
db_session.commit()
# Must insert without ORM as otherwise SQLAlchemy figures out
# that something is wrong before any query is sent to the database.
stmt = sqla.insert(db.DistanceMatrix).values(
first_address_id=assoc.first_address.id,
second_address_id=assoc.second_address.id,
city_id=assoc.city_id,
air_distance=assoc.air_distance,
)
with pytest.raises(sa_exc.IntegrityError, match='duplicate key value'):
db_session.execute(stmt)
def test_symmetric_addresses(self, db_session, assoc):
"""Insert a record that violates a check constraint."""
db_session.add(assoc)
db_session.commit()
another_assoc = db.DistanceMatrix(
first_address=assoc.second_address,
second_address=assoc.first_address,
air_distance=assoc.air_distance,
)
db_session.add(another_assoc)
with pytest.raises(
sa_exc.IntegrityError,
match='ck_addresses_addresses_on_distances_are_symmetric_for_bicycles',
):
db_session.commit()
def test_negative_air_distance(self, db_session, assoc):
"""Insert an instance with invalid data."""
assoc.air_distance = -1
db_session.add(assoc)
with pytest.raises(sa_exc.IntegrityError, match='realistic_air_distance'):
db_session.commit()
def test_air_distance_too_large(self, db_session, assoc):
"""Insert an instance with invalid data."""
assoc.air_distance = 20_000
assoc.bicycle_distance = 21_000
db_session.add(assoc)
with pytest.raises(sa_exc.IntegrityError, match='realistic_air_distance'):
db_session.commit()
def test_bicycle_distance_too_large(self, db_session, assoc):
"""Insert an instance with invalid data."""
assoc.bicycle_distance = 25_000
db_session.add(assoc)
with pytest.raises(sa_exc.IntegrityError, match='realistic_bicycle_distance'):
db_session.commit()
def test_air_distance_shorter_than_bicycle_distance(self, db_session, assoc):
"""Insert an instance with invalid data."""
assoc.bicycle_distance = round(0.75 * assoc.air_distance)
db_session.add(assoc)
with pytest.raises(sa_exc.IntegrityError, match='air_distance_is_shortest'):
db_session.commit()
@pytest.mark.parametrize('duration', [-1, 3601])
def test_unrealistic_bicycle_travel_time(self, db_session, assoc, duration):
"""Insert an instance with invalid data."""
assoc.bicycle_duration = duration
db_session.add(assoc)
with pytest.raises(
sa_exc.IntegrityError, match='realistic_bicycle_travel_time',
):
db_session.commit()
class TestProperties:
"""Test properties in `DistanceMatrix`."""
def test_path_structure(self, assoc):
"""Test `DistanceMatrix.path` property."""
result = assoc.path
assert isinstance(result, list)
assert isinstance(result[0], utils.Location)
def test_path_content(self, assoc):
"""Test `DistanceMatrix.path` property."""
result = assoc.path
assert len(result) == 5 # = 5 inner points, excluding start and end
def test_path_is_cached(self, assoc):
"""Test `DistanceMatrix.path` property."""
result1 = assoc.path
result2 = assoc.path
assert result1 is result2

View file

@ -24,7 +24,7 @@ def assoc(address, pixel):
@pytest.mark.no_cover @pytest.mark.no_cover
class TestSpecialMethods: class TestSpecialMethods:
"""Test special methods in `Pixel`.""" """Test special methods in `AddressPixelAssociation`."""
def test_create_an_address_pixel_association(self, assoc): def test_create_an_address_pixel_association(self, assoc):
"""Test instantiation of a new `AddressPixelAssociation` object.""" """Test instantiation of a new `AddressPixelAssociation` object."""