From 4a5e316d0c93efd752fd12ef82b8c3137bf011ab Mon Sep 17 00:00:00 2001 From: Alexander Hess Date: Sun, 20 Oct 2024 02:35:43 +0200 Subject: [PATCH] Add `lalib.domains.Domain` - this class models domains from linear algebra and is needed for the `Vector` class to be created - `Domain` wraps Python's built-in `frozenset` type + they must contain at least one label + as a convenience, so-called canonical `Domain`s (i.e., with labels `0`, `1`, ...) can be created by passing in a positive `int`eger to `Domain()` - the `Domain.is_canonical` property indicates what kind a `Domain` is - add unit tests for the class - add extensive documentation for the class --- src/lalib/domains.py | 144 ++++++++++++++++++++++++++ tests/test_docstrings.py | 1 + tests/test_domains.py | 212 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 src/lalib/domains.py create mode 100644 tests/test_domains.py diff --git a/src/lalib/domains.py b/src/lalib/domains.py new file mode 100644 index 0000000..6f0b955 --- /dev/null +++ b/src/lalib/domains.py @@ -0,0 +1,144 @@ +"""A `Domain` for discrete `Vector`s. + +This module defines a `Domain` class wrapping the built-in `frozenset`. +It is designed to model domains of discrete vectors and matrices. + +In conventional math, `Domain`s are implicitly thought of as strictly +positive natural numbers. For example, a `3`-vector over the reals +would then have a `Domain` like below: + +>>> Domain([1, 2, 3]) +Domain({1, 2, 3}) + +However, in Python we commonly start counting at `0`. Therefore, +a `3`-vector over the reals has the following `Domain` in `lalib`: + +>>> Domain([0, 1, 2]) +Domain(3) + +We call such `Domain`s "canonical", and, as a convenience, such +`Domain`s can be created by passing a `Vector`'s "length" as an +`int`eger argument to the `Domain()` constructor, for example: + +>>> Domain(5) +Domain(5) + +Domains do not need to be made of numbers. Instead, we can use, +for example, letters or words, or any other `hash`able object. + +>>> Domain(["a", "b", "c"]) +... + +>>> Domain("abc") +... + +>>> Domain(("heads", "tails")) +... + +>>> Domain({(1, 23), (4, 56), (7, 89)}) +... + +>>> Domain({"n_yes": 7, "n_no": 3, "n_total": 10}) # `.keys()` are used +... + +>>> Domain(([1, 23], [4, 56], [7, 89])) +Traceback (most recent call last): +... +TypeError: ... +""" + +from collections import abc as collections_abc + +# When giving up support for Python 3.9, we can get rid of `Union` +from typing import Union + + +try: + from typing import Self +except ImportError: # pragma: no cover to support Python 3.9 & 3.10 + from typing_extensions import Self + + +class Domain(frozenset): + """The domain for a `Vector`.""" + + @staticmethod + def __new__( + cls: type[Self], + /, + labels: Union[collections_abc.Iterable[collections_abc.Hashable], int], + ) -> Self: + """See docstring for `.__init__()`.""" + # Because `Domain` objects are immutable by design ... + if isinstance(labels, cls): + return labels + + if not isinstance(labels, collections_abc.Iterable): + try: + n_labels = int(labels) + except (TypeError, ValueError): + msg = "must provide a positive integer" + raise TypeError(msg) from None + else: + if n_labels != labels: + msg = "must provide a positive integer" + raise ValueError(msg) + + labels = range(n_labels) + + # As we require `Vector`s to have at least one entry, + if not labels: # we also enforce this constraint on the `Domain`s + msg = "must provide at least one label or a positive integer" + raise ValueError(msg) + + try: + return super().__new__(cls, labels) + except TypeError: + # Provide a nicer error message + msg = "must provide hashable labels" + raise TypeError(msg) from None + + def __init__( + self, + /, + labels: Union[collections_abc.Iterable[collections_abc.Hashable], int], + ) -> None: + """Create a new domain. + + Args: + labels: the domain labels provided by an iterable or + a strictly positive `int`eger `n` that then constructs + the labels `0`, `1`, ... up to and including `n - 1` + + Returns: + domain + + Raises: + TypeError: `labels` is not of the specified types + ValueError: + - if a collection argument contains no elements + - if an integer argument is not strictly positive + """ + + def __repr__(self) -> str: + """Text representation: `Domain(...)`. + + Designed such that `eval(repr(self)) == self`; in other words, + the text representation of a `Domain` is valid code on its own + evaluating into a (new) `Domain` with the same `labels`. + + See: https://docs.python.org/3/reference/datamodel.html#object.__repr__ + """ + if self.is_canonical: + return f"{self.__class__.__name__}({len(self)})" + return super().__repr__() + + @property # We do not use `@functools.cached_property` + # as this allows writing to the propery + def is_canonical(self) -> bool: + """If the `labels` resemble a `range(...)`.""" + try: + cached = self._is_canonical + except AttributeError: + self._is_canonical: bool = (cached := self == set(range(len(self)))) + return cached diff --git a/tests/test_docstrings.py b/tests/test_docstrings.py index ce1ba38..2e09eca 100644 --- a/tests/test_docstrings.py +++ b/tests/test_docstrings.py @@ -15,6 +15,7 @@ import xdoctest "module", [ "lalib", + "lalib.domains", "lalib.elements", "lalib.elements.galois", "lalib.fields", diff --git a/tests/test_domains.py b/tests/test_domains.py new file mode 100644 index 0000000..ab23ec2 --- /dev/null +++ b/tests/test_domains.py @@ -0,0 +1,212 @@ +"""Tests for `lalib.domains.Domain`.""" + +import os +import random + +import pytest + +from lalib.domains import Domain + + +CROSS_REFERENCE = not os.environ.get("NO_CROSS_REFERENCE") + + +NUMERIC_LABELS = (1, 42) # always interpreted in a canonical way + +CANONICAL_ITERABLE_LABELS = tuple(range(number) for number in NUMERIC_LABELS) +NON_CANONICAL_ITERABLE_LABELS = ( + range(1, 42), + [-42, 0, +42], + "abc", + ("x", "y", "z"), +) +ITERABLE_LABELS = CANONICAL_ITERABLE_LABELS + NON_CANONICAL_ITERABLE_LABELS + +CANONICAL_MAPPING_LABELS = ( + {0: 123}, + {0: 123, 1: 456}, +) +NON_CANONICAL_MAPPING_LABELS = ( + {0: 123, 42: 456}, + {"a": 123, "b": 456}, +) +MAPPING_LABELS = CANONICAL_MAPPING_LABELS + NON_CANONICAL_MAPPING_LABELS + +CANONICAL_LABELS = ( + *NUMERIC_LABELS, + *CANONICAL_ITERABLE_LABELS, + *CANONICAL_MAPPING_LABELS, +) +NON_CANONICAL_LABELS = ( + *NON_CANONICAL_ITERABLE_LABELS, + *NON_CANONICAL_MAPPING_LABELS, +) + +ALL_LABELS = CANONICAL_LABELS + NON_CANONICAL_LABELS + + +@pytest.fixture +def domain(request): + """A `Domain` object.""" + return Domain(request.param) + + +class TestDomainInstantiation: + """Test `Domain.__new__()` with good inputs.""" + + @pytest.mark.parametrize("domain", ALL_LABELS, indirect=True) + def test_from_domain(self, domain): + """`Domain` object passed into `Domain()` is simply returned.""" + new_domain = Domain(domain) + + assert new_domain == domain + assert new_domain is domain + + @pytest.mark.overlapping_test + @pytest.mark.parametrize("number", NUMERIC_LABELS) + def test_from_integer(self, number): + """Positive `int`eger passed into `Domain()` creates canonical `Domain`. + + This is a convenience feature. + """ + domain = Domain(number) + expected = set(range(number)) + + assert domain == expected + + if CROSS_REFERENCE: + assert domain.is_canonical + + @pytest.mark.overlapping_test + @pytest.mark.parametrize("mapping", MAPPING_LABELS) + def test_from_mapping(self, mapping): + """Create `Domain` from various mapping objects.""" + domain = Domain(mapping) + expected = mapping.keys() + + assert domain == expected + + @pytest.mark.overlapping_test + @pytest.mark.parametrize("iterable", ITERABLE_LABELS) + def test_from_iterable(self, iterable): + """Create `Domain` from various iterable objects.""" + domain = Domain(iterable) + expected = set(iterable) + + assert domain == expected + + @pytest.mark.overlapping_test + @pytest.mark.parametrize("number", NUMERIC_LABELS) + def test_from_iterator_yielding_canonical_labels(self, number): + """`Domain()` can consume iterators: Providing canonical `labels`.""" + + def generator_factory(): + """Yield `0`, `1`, ... `number - 1`.""" + yield from range(number) + + generator = generator_factory() + domain = Domain(generator) + expected = set(range(number)) + + assert domain == expected + + if CROSS_REFERENCE: + assert domain.is_canonical + + @pytest.mark.overlapping_test + def test_from_iterator_yielding_non_canonical_labels(self): + """`Domain()` can consume iterators: Providing non-canonical `labels`.""" + + def generator_factory(p_skipped=0.5): + """Yield `0`, `1`, ..., `100` with missing values.""" + for i in range(100): + if random.random() > p_skipped: # noqa: S311 + yield i + + generator = generator_factory() + domain = Domain(generator) + + assert domain is not None + + if CROSS_REFERENCE: + assert not domain.is_canonical + + @pytest.mark.parametrize("domain", CANONICAL_LABELS, indirect=True) + def test_from_canonical_repr(self, domain): + """`repr(domain)` is of the form "Domain(integer)".""" + new_domain = eval(repr(domain)) # noqa: S307 + + assert new_domain == domain + + @pytest.mark.parametrize("domain", NON_CANONICAL_LABELS, indirect=True) + def test_from_non_canonical_repr(self, domain): + """`repr(domain)` is of the form "Domain({label1, label2, ...})".""" + new_domain = eval(repr(domain)) # noqa: S307 + + assert new_domain == domain + + +class TestFailedDomainInstantiation: + """Test `Domain.__new__()` with bad inputs.""" + + def test_wrong_type(self): + """Cannot create `Domain` from non-numeric or non-iterable object.""" + with pytest.raises(TypeError): + Domain(object()) + + @pytest.mark.parametrize("number", [-42, 0, 4.2]) + def test_from_non_positive_integer(self, number): + """Non-positive `int`egers passed into `Domain()` do not work.""" + with pytest.raises(ValueError, match="positive integer"): + Domain(number) + + @pytest.mark.overlapping_test + def test_from_empty_mapping(self): + """Cannot create `Domain` from empty mapping objects.""" + empty_dict = {} + + with pytest.raises(ValueError, match="at least one label"): + Domain(empty_dict) + + @pytest.mark.parametrize("iterable_type", [tuple, list, set]) + def test_from_empty_iterable(self, iterable_type): + """Cannot create `Domain` from empty iterable objects.""" + empty_iterable = iterable_type() + + with pytest.raises(ValueError, match="at least one label"): + Domain(empty_iterable) + + @pytest.mark.parametrize("iterable_type", [tuple, list]) + def test_from_iterable_with_non_hashable_labels(self, iterable_type): + """Cannot create `Domain` with non-hashable `labels`.""" + bad_iterable = iterable_type(([1], [2], [3])) + + with pytest.raises(TypeError, match="hashable labels"): + Domain(bad_iterable) + + +@pytest.mark.overlapping_test +class TestCanonicalProperty: + """Test `Domain.is_canonical` property.""" + + @pytest.mark.parametrize("domain", CANONICAL_LABELS, indirect=True) + def test_is_canonical(self, domain): + """A `domain` with `labels` like `0`, `1`, ...""" + assert domain.is_canonical is True + + @pytest.mark.parametrize("domain", NON_CANONICAL_LABELS, indirect=True) + def test_is_not_canonical(self, domain): + """A `domain` with `labels` unlike `0`, `1`, ...""" + assert domain.is_canonical is False + + # `@pytest.mark.overlapping_test` can only be used + # because the one line of code in the `try`-block + # is always regarded as fully covered, + # even if an `AttributeError` is raised and excepted + @pytest.mark.parametrize("domain", ALL_LABELS, indirect=True) + def test_is_still_canonical_or_not(self, domain): + """`Domain.is_canonical` is cached.""" + result1 = domain.is_canonical + result2 = domain.is_canonical + + assert result1 is result2