Add lalib.domains.Domain

- this class models domains from linear algebra
  and is needed for the `Vector` class to be created
- `Domain` wraps Python's built-in `frozenset` type
  + they must contain at least one label
  + as a convenience, so-called canonical `Domain`s
    (i.e., with labels `0`, `1`, ...) can be created
    by passing in a positive `int`eger to `Domain()`
- the `Domain.is_canonical` property indicates
  what kind a `Domain` is
- add unit tests for the class
- add extensive documentation for the class
This commit is contained in:
Alexander Hess 2024-10-20 02:35:43 +02:00
parent 9308633ded
commit 4a5e316d0c
Signed by: alexander
GPG key ID: 344EA5AB10D868E0
3 changed files with 357 additions and 0 deletions

144
src/lalib/domains.py Normal file
View file

@ -0,0 +1,144 @@
"""A `Domain` for discrete `Vector`s.
This module defines a `Domain` class wrapping the built-in `frozenset`.
It is designed to model domains of discrete vectors and matrices.
In conventional math, `Domain`s are implicitly thought of as strictly
positive natural numbers. For example, a `3`-vector over the reals
would then have a `Domain` like below:
>>> Domain([1, 2, 3])
Domain({1, 2, 3})
However, in Python we commonly start counting at `0`. Therefore,
a `3`-vector over the reals has the following `Domain` in `lalib`:
>>> Domain([0, 1, 2])
Domain(3)
We call such `Domain`s "canonical", and, as a convenience, such
`Domain`s can be created by passing a `Vector`'s "length" as an
`int`eger argument to the `Domain()` constructor, for example:
>>> Domain(5)
Domain(5)
Domains do not need to be made of numbers. Instead, we can use,
for example, letters or words, or any other `hash`able object.
>>> Domain(["a", "b", "c"])
...
>>> Domain("abc")
...
>>> Domain(("heads", "tails"))
...
>>> Domain({(1, 23), (4, 56), (7, 89)})
...
>>> Domain({"n_yes": 7, "n_no": 3, "n_total": 10}) # `.keys()` are used
...
>>> Domain(([1, 23], [4, 56], [7, 89]))
Traceback (most recent call last):
...
TypeError: ...
"""
from collections import abc as collections_abc
# When giving up support for Python 3.9, we can get rid of `Union`
from typing import Union
try:
from typing import Self
except ImportError: # pragma: no cover to support Python 3.9 & 3.10
from typing_extensions import Self
class Domain(frozenset):
"""The domain for a `Vector`."""
@staticmethod
def __new__(
cls: type[Self],
/,
labels: Union[collections_abc.Iterable[collections_abc.Hashable], int],
) -> Self:
"""See docstring for `.__init__()`."""
# Because `Domain` objects are immutable by design ...
if isinstance(labels, cls):
return labels
if not isinstance(labels, collections_abc.Iterable):
try:
n_labels = int(labels)
except (TypeError, ValueError):
msg = "must provide a positive integer"
raise TypeError(msg) from None
else:
if n_labels != labels:
msg = "must provide a positive integer"
raise ValueError(msg)
labels = range(n_labels)
# As we require `Vector`s to have at least one entry,
if not labels: # we also enforce this constraint on the `Domain`s
msg = "must provide at least one label or a positive integer"
raise ValueError(msg)
try:
return super().__new__(cls, labels)
except TypeError:
# Provide a nicer error message
msg = "must provide hashable labels"
raise TypeError(msg) from None
def __init__(
self,
/,
labels: Union[collections_abc.Iterable[collections_abc.Hashable], int],
) -> None:
"""Create a new domain.
Args:
labels: the domain labels provided by an iterable or
a strictly positive `int`eger `n` that then constructs
the labels `0`, `1`, ... up to and including `n - 1`
Returns:
domain
Raises:
TypeError: `labels` is not of the specified types
ValueError:
- if a collection argument contains no elements
- if an integer argument is not strictly positive
"""
def __repr__(self) -> str:
"""Text representation: `Domain(...)`.
Designed such that `eval(repr(self)) == self`; in other words,
the text representation of a `Domain` is valid code on its own
evaluating into a (new) `Domain` with the same `labels`.
See: https://docs.python.org/3/reference/datamodel.html#object.__repr__
"""
if self.is_canonical:
return f"{self.__class__.__name__}({len(self)})"
return super().__repr__()
@property # We do not use `@functools.cached_property`
# as this allows writing to the propery
def is_canonical(self) -> bool:
"""If the `labels` resemble a `range(...)`."""
try:
cached = self._is_canonical
except AttributeError:
self._is_canonical: bool = (cached := self == set(range(len(self))))
return cached

View file

@ -15,6 +15,7 @@ import xdoctest
"module",
[
"lalib",
"lalib.domains",
"lalib.elements",
"lalib.elements.galois",
"lalib.fields",

212
tests/test_domains.py Normal file
View file

@ -0,0 +1,212 @@
"""Tests for `lalib.domains.Domain`."""
import os
import random
import pytest
from lalib.domains import Domain
CROSS_REFERENCE = not os.environ.get("NO_CROSS_REFERENCE")
NUMERIC_LABELS = (1, 42) # always interpreted in a canonical way
CANONICAL_ITERABLE_LABELS = tuple(range(number) for number in NUMERIC_LABELS)
NON_CANONICAL_ITERABLE_LABELS = (
range(1, 42),
[-42, 0, +42],
"abc",
("x", "y", "z"),
)
ITERABLE_LABELS = CANONICAL_ITERABLE_LABELS + NON_CANONICAL_ITERABLE_LABELS
CANONICAL_MAPPING_LABELS = (
{0: 123},
{0: 123, 1: 456},
)
NON_CANONICAL_MAPPING_LABELS = (
{0: 123, 42: 456},
{"a": 123, "b": 456},
)
MAPPING_LABELS = CANONICAL_MAPPING_LABELS + NON_CANONICAL_MAPPING_LABELS
CANONICAL_LABELS = (
*NUMERIC_LABELS,
*CANONICAL_ITERABLE_LABELS,
*CANONICAL_MAPPING_LABELS,
)
NON_CANONICAL_LABELS = (
*NON_CANONICAL_ITERABLE_LABELS,
*NON_CANONICAL_MAPPING_LABELS,
)
ALL_LABELS = CANONICAL_LABELS + NON_CANONICAL_LABELS
@pytest.fixture
def domain(request):
"""A `Domain` object."""
return Domain(request.param)
class TestDomainInstantiation:
"""Test `Domain.__new__()` with good inputs."""
@pytest.mark.parametrize("domain", ALL_LABELS, indirect=True)
def test_from_domain(self, domain):
"""`Domain` object passed into `Domain()` is simply returned."""
new_domain = Domain(domain)
assert new_domain == domain
assert new_domain is domain
@pytest.mark.overlapping_test
@pytest.mark.parametrize("number", NUMERIC_LABELS)
def test_from_integer(self, number):
"""Positive `int`eger passed into `Domain()` creates canonical `Domain`.
This is a convenience feature.
"""
domain = Domain(number)
expected = set(range(number))
assert domain == expected
if CROSS_REFERENCE:
assert domain.is_canonical
@pytest.mark.overlapping_test
@pytest.mark.parametrize("mapping", MAPPING_LABELS)
def test_from_mapping(self, mapping):
"""Create `Domain` from various mapping objects."""
domain = Domain(mapping)
expected = mapping.keys()
assert domain == expected
@pytest.mark.overlapping_test
@pytest.mark.parametrize("iterable", ITERABLE_LABELS)
def test_from_iterable(self, iterable):
"""Create `Domain` from various iterable objects."""
domain = Domain(iterable)
expected = set(iterable)
assert domain == expected
@pytest.mark.overlapping_test
@pytest.mark.parametrize("number", NUMERIC_LABELS)
def test_from_iterator_yielding_canonical_labels(self, number):
"""`Domain()` can consume iterators: Providing canonical `labels`."""
def generator_factory():
"""Yield `0`, `1`, ... `number - 1`."""
yield from range(number)
generator = generator_factory()
domain = Domain(generator)
expected = set(range(number))
assert domain == expected
if CROSS_REFERENCE:
assert domain.is_canonical
@pytest.mark.overlapping_test
def test_from_iterator_yielding_non_canonical_labels(self):
"""`Domain()` can consume iterators: Providing non-canonical `labels`."""
def generator_factory(p_skipped=0.5):
"""Yield `0`, `1`, ..., `100` with missing values."""
for i in range(100):
if random.random() > p_skipped: # noqa: S311
yield i
generator = generator_factory()
domain = Domain(generator)
assert domain is not None
if CROSS_REFERENCE:
assert not domain.is_canonical
@pytest.mark.parametrize("domain", CANONICAL_LABELS, indirect=True)
def test_from_canonical_repr(self, domain):
"""`repr(domain)` is of the form "Domain(integer)"."""
new_domain = eval(repr(domain)) # noqa: S307
assert new_domain == domain
@pytest.mark.parametrize("domain", NON_CANONICAL_LABELS, indirect=True)
def test_from_non_canonical_repr(self, domain):
"""`repr(domain)` is of the form "Domain({label1, label2, ...})"."""
new_domain = eval(repr(domain)) # noqa: S307
assert new_domain == domain
class TestFailedDomainInstantiation:
"""Test `Domain.__new__()` with bad inputs."""
def test_wrong_type(self):
"""Cannot create `Domain` from non-numeric or non-iterable object."""
with pytest.raises(TypeError):
Domain(object())
@pytest.mark.parametrize("number", [-42, 0, 4.2])
def test_from_non_positive_integer(self, number):
"""Non-positive `int`egers passed into `Domain()` do not work."""
with pytest.raises(ValueError, match="positive integer"):
Domain(number)
@pytest.mark.overlapping_test
def test_from_empty_mapping(self):
"""Cannot create `Domain` from empty mapping objects."""
empty_dict = {}
with pytest.raises(ValueError, match="at least one label"):
Domain(empty_dict)
@pytest.mark.parametrize("iterable_type", [tuple, list, set])
def test_from_empty_iterable(self, iterable_type):
"""Cannot create `Domain` from empty iterable objects."""
empty_iterable = iterable_type()
with pytest.raises(ValueError, match="at least one label"):
Domain(empty_iterable)
@pytest.mark.parametrize("iterable_type", [tuple, list])
def test_from_iterable_with_non_hashable_labels(self, iterable_type):
"""Cannot create `Domain` with non-hashable `labels`."""
bad_iterable = iterable_type(([1], [2], [3]))
with pytest.raises(TypeError, match="hashable labels"):
Domain(bad_iterable)
@pytest.mark.overlapping_test
class TestCanonicalProperty:
"""Test `Domain.is_canonical` property."""
@pytest.mark.parametrize("domain", CANONICAL_LABELS, indirect=True)
def test_is_canonical(self, domain):
"""A `domain` with `labels` like `0`, `1`, ..."""
assert domain.is_canonical is True
@pytest.mark.parametrize("domain", NON_CANONICAL_LABELS, indirect=True)
def test_is_not_canonical(self, domain):
"""A `domain` with `labels` unlike `0`, `1`, ..."""
assert domain.is_canonical is False
# `@pytest.mark.overlapping_test` can only be used
# because the one line of code in the `try`-block
# is always regarded as fully covered,
# even if an `AttributeError` is raised and excepted
@pytest.mark.parametrize("domain", ALL_LABELS, indirect=True)
def test_is_still_canonical_or_not(self, domain):
"""`Domain.is_canonical` is cached."""
result1 = domain.is_canonical
result2 = domain.is_canonical
assert result1 is result2