Testing Conventions

Pytest conventions for writing maintainable, behavior-focused tests.

Quick Reference

Element	Convention
Test file naming	`test_<module>.py`
Test function naming	`test_<function>_<scenario>_<expected>`
Test structure	Arrange-Act-Assert with comments
Multiple assertions	Use `pytest-check` for soft assertions
Fixtures	Local unless shared across multiple test files
Multiple inputs	Use `@pytest.mark.parametrize`
Related tests	Group in test classes

Anti-Patterns to Avoid

Anti-Pattern	Guidance
Coverage-driven tests	Test meaningful behavior, not lines
Implementation testing	Test observable behavior; tests should survive refactoring
Order-dependent tests	Tests must be independent; never rely on execution order
Multiple bare asserts	Use `pytest-check` so all assertions run even if early ones fail
Single-use fixtures	Define test data inline for clarity
Fixture duplication	Extend or generalize existing fixtures

Test Organization

Mirror source structure:

code

src/chain_reaction/retrieval/embeddings.py
tests/retrieval/test_embeddings.py

Test Naming

Pattern: test_<function>_<scenario>_<expected_result>

python

# CORRECT - descriptive name following pattern
def test_calculate_similarity_identical_vectors_returns_one() -> None:
    """Identical vectors should have similarity of 1.0."""
    vec = [1.0, 0.0, 0.0]
    assert calculate_similarity(vec, vec) == 1.0


def test_calculate_similarity_mismatched_dimensions_raises_value_error() -> None:
    """Vectors with different dimensions should raise ValueError."""
    with pytest.raises(ValueError, match="dimension"):
        calculate_similarity([1.0, 0.0], [1.0, 0.0, 0.0])


# INCORRECT - vague names
def test_similarity():  # Missing scenario and expected result
    ...

def test_error():  # No context about what's being tested
    ...

Test Structure: Arrange-Act-Assert

Always organize tests with clear AAA sections:

python

# CORRECT - clear AAA structure with comments
def test_search_returns_results_sorted_by_score() -> None:
    """Search results should be sorted by relevance score descending."""
    # Arrange
    index = SearchIndex()
    index.add_documents(sample_documents)

    # Act
    results = index.search("python async", limit=10)

    # Assert
    scores = [r.score for r in results]
    assert scores == sorted(scores, reverse=True)


# INCORRECT - mixed arrangement and assertions
def test_search_sorting() -> None:
    index = SearchIndex()
    assert index is not None  # Asserting during arrangement
    index.add_documents(sample_documents)
    results = index.search("python async", limit=10)
    assert len(results) > 0
    scores = [r.score for r in results]
    assert scores == sorted(scores, reverse=True)

Multiple Assertions with pytest-check

Use pytest-check for soft assertions when verifying multiple properties. This ensures all assertions run even if earlier ones fail, giving complete feedback in a single test run.

python

from pytest_check import check


# CORRECT - pytest-check for multiple related assertions
def test_user_profile_contains_required_fields() -> None:
    """User profile should contain all required fields with correct types."""
    # Arrange
    user_data = {"name": "Alice", "email": "alice@example.com", "age": 30}

    # Act
    profile = UserProfile.from_dict(user_data)

    # Assert - all checks run even if some fail
    with check:
        assert profile.name == "Alice"
    with check:
        assert profile.email == "alice@example.com"
    with check:
        assert profile.age == 30
    with check:
        assert profile.is_active is True


# CORRECT - pytest-check with descriptive messages
def test_search_result_structure() -> None:
    """Search results should have correct structure and values."""
    # Arrange
    index = SearchIndex()
    index.add_documents(sample_documents)

    # Act
    results = index.search("python", limit=5)

    # Assert
    with check:
        assert len(results) <= 5, "Should respect limit"
    with check:
        assert all(r.score >= 0 for r in results), "Scores should be non-negative"
    with check:
        assert results == sorted(results, key=lambda r: r.score, reverse=True), (
            "Results should be sorted by score descending"
        )


# INCORRECT - multiple bare asserts stop at first failure
def test_user_profile_fields() -> None:
    profile = UserProfile.from_dict(user_data)
    assert profile.name == "Alice"      # If this fails...
    assert profile.email == "alice@example.com"  # ...this never runs
    assert profile.age == 30            # ...nor this

When to use pytest-check:

•Testing multiple independent properties of an object
•Validating structure with several fields
•Any test where seeing all failures at once aids debugging

When regular assert is fine:

•Single assertion per test
•Assertions that logically depend on each other (if A fails, B is meaningless)
•Guard assertions in Arrange phase (prefer pytest.raises or skip these)

Fixtures

Keep fixtures close to their usage. Use conftest.py only for widely shared fixtures.

python

# CORRECT - shared fixture in conftest.py
# tests/retrieval/conftest.py
@pytest.fixture
def sample_embeddings() -> list[list[float]]:
    """Sample embeddings for testing."""
    return [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]


# CORRECT - local fixture extending shared fixture
# tests/retrieval/test_search.py
@pytest.fixture
def populated_index(sample_embeddings: list[list[float]]) -> SearchIndex:
    """Search index populated with sample data."""
    index = SearchIndex()
    for i, emb in enumerate(sample_embeddings):
        index.add(f"doc_{i}", emb)
    return index


# INCORRECT - single-use fixture that should be inline
@pytest.fixture
def single_vector() -> list[float]:
    return [1.0, 0.0, 0.0]

def test_something(single_vector):  # Just define inline instead
    ...

Parametrized Tests

Use for testing multiple inputs with the same logic:

python

# CORRECT - parametrized test with descriptive IDs
@pytest.mark.parametrize(
    ("input_text", "expected_tokens"),
    [
        ("hello world", ["hello", "world"]),
        ("", []),
        ("  spaces  ", ["spaces"]),
        ("UPPERCASE", ["uppercase"]),
    ],
)
def test_tokenize(input_text: str, expected_tokens: list[str]) -> None:
    """Tokenizer should handle various input formats."""
    assert tokenize(input_text) == expected_tokens


# INCORRECT - separate tests for each case
def test_tokenize_simple():
    assert tokenize("hello world") == ["hello", "world"]

def test_tokenize_empty():
    assert tokenize("") == []

def test_tokenize_spaces():
    assert tokenize("  spaces  ") == ["spaces"]

Test Classes

Group related tests in classes with shared fixtures:

python

from pytest_check import check


class TestDataProcessor:
    """Test suite for DataProcessor class."""

    @pytest.fixture
    def processor(self) -> DataProcessor:
        """Create a DataProcessor instance for testing."""
        return DataProcessor(max_size=1000, validate=True)

    @pytest.fixture
    def sample_data(self) -> list[dict[str, Any]]:
        """Provide sample data for testing."""
        return [
            {"id": 1, "value": 10.5, "name": "test1"},
            {"id": 2, "value": 20.0, "name": "test2"},
        ]

    def test_load_data_success(
        self, processor: DataProcessor, sample_data: list[dict[str, Any]]
    ) -> None:
        """Test successful data loading with valid input."""
        # Arrange/Act
        processor.load_data(sample_data)

        # Assert - use pytest-check for multiple assertions
        with check:
            assert processor.record_count == 2
        with check:
            assert processor.is_loaded

    def test_load_data_exceeds_max_size(self, processor: DataProcessor) -> None:
        """Test that loading data exceeding max_size raises ValueError."""
        # Arrange
        large_data = [{"id": i} for i in range(2000)]

        # Act/Assert
        with pytest.raises(ValueError, match="exceeds maximum size"):
            processor.load_data(large_data)

Coverage Requirements

Scope	Target
Core logic	>= 90%
Public APIs	100%

Validation Commands

Task	Command
Run all tests	`uv run pytest`
Run tests with coverage	`uv run pytest --cov`
Run specific test file	`uv run pytest tests/path/to/test_module.py`
Run specific test function	`uv run pytest tests/path/to/test_module.py::test_function_name`
Run tests matching pattern	`uv run pytest -k "pattern"`
Run tests with verbose output	`uv run pytest -v`
Run tests and stop on first failure	`uv run pytest -x`
Show local variables in tracebacks	`uv run pytest -l`