AgentSkillsCN

Testing

测试

SKILL.md

Testing & Validation Skill

Description

Manages testing strategies, validation workflows, and data quality checks for the OpenGov Harvester.

Triggers

  • "run tests"
  • "validate extraction"
  • "check data quality"
  • "test pipeline"
  • "integration test"

Test Organization

Directory Structure

code
tests/
├── unit/              # Unit tests for individual modules
│   ├── test_database.py
│   ├── test_extraction.py
│   └── test_utils.py
├── integration/       # Integration tests for workflows
│   ├── test_etl_pipeline.py
│   └── test_database_sync.py
├── e2e/               # End-to-end browser automation tests
│   ├── test_login_flow.py
│   └── test_extraction_flow.py
├── fixtures/          # Test data and fixtures
│   ├── sample_projects.json
│   └── mock_responses.json
└── conftest.py        # Pytest configuration and fixtures

Running Tests

All Tests

bash
# Run complete test suite
./run_tests.sh

# Or directly with pytest
pytest tests/ -v

# With coverage
pytest tests/ --cov=src --cov-report=html
open htmlcov/index.html

Specific Test Types

bash
# Unit tests only
pytest tests/unit/ -v

# Integration tests only
pytest tests/integration/ -v

# E2E tests only
pytest tests/e2e/ -v

# Specific test file
pytest tests/unit/test_database.py -v

# Specific test function
pytest tests/unit/test_database.py::test_connection -v

Test Filtering

bash
# By marker
pytest -m "slow" -v          # Run slow tests
pytest -m "not slow" -v      # Skip slow tests

# By keyword
pytest -k "database" -v      # Tests matching "database"
pytest -k "not integration" -v  # Skip integration tests

Test Writing Conventions

Unit Test Example

python
# tests/unit/test_database.py
import pytest
import sqlite3
from src.database.connection import get_connection, init_database

@pytest.fixture
def test_db(tmp_path):
    """Create temporary test database"""
    db_path = tmp_path / "test.db"
    conn = sqlite3.connect(str(db_path))
    init_database(conn)
    yield conn
    conn.close()

def test_connection_wal_mode(test_db):
    """Verify WAL mode is enabled"""
    cursor = test_db.execute("PRAGMA journal_mode")
    mode = cursor.fetchone()[0]
    assert mode == "wal", "Database should use WAL mode"

def test_insert_project(test_db):
    """Test project insertion"""
    test_db.execute(
        "INSERT INTO opengov_projects (project_id, name) VALUES (?, ?)",
        ("test-123", "Test Project")
    )

    result = test_db.execute(
        "SELECT name FROM opengov_projects WHERE project_id = ?",
        ("test-123",)
    ).fetchone()

    assert result[0] == "Test Project"

Integration Test Example

python
# tests/integration/test_etl_pipeline.py
import pytest
from src.etl.pipeline import run_extraction

@pytest.fixture
def mock_supabase(mocker):
    """Mock Supabase client"""
    return mocker.patch('src.etl.pipeline.supabase')

@pytest.mark.integration
def test_full_extraction_pipeline(test_db, mock_supabase):
    """Test complete extraction pipeline"""
    # Setup
    project_id = "test-project-123"

    # Execute
    result = run_extraction(project_id, db=test_db)

    # Verify
    assert result['success'] is True
    assert result['opportunities_extracted'] > 0

    # Check database state
    cursor = test_db.execute(
        "SELECT extracted FROM opengov_projects WHERE project_id = ?",
        (project_id,)
    )
    assert cursor.fetchone()[0] == 1

E2E Test Example

python
# tests/e2e/test_extraction_flow.py
import pytest
from playwright.sync_api import Page, expect

@pytest.mark.e2e
@pytest.mark.slow
def test_login_and_extract(page: Page):
    """Test full login and extraction flow"""
    # Navigate to login
    page.goto("https://opengov.example.com/login")

    # Login
    page.fill("#username", "test@example.com")
    page.fill("#password", "test-password")
    page.click("button[type='submit']")

    # Wait for navigation
    page.wait_for_url("**/dashboard")

    # Verify logged in
    expect(page.locator(".user-name")).to_contain_text("Test User")

    # Navigate to projects
    page.click("text=Projects")
    expect(page.locator(".project-list")).to_be_visible()

    # Count projects
    project_count = page.locator(".project-item").count()
    assert project_count > 0, "Should have at least one project"

Data Quality Checks

Validation Functions

python
# src/validation/quality_checks.py

def validate_project_data(project: dict) -> tuple[bool, list[str]]:
    """Validate project data completeness and integrity"""
    errors = []

    # Required fields
    required = ['project_id', 'name', 'created_at']
    for field in required:
        if field not in project or not project[field]:
            errors.append(f"Missing required field: {field}")

    # Data types
    if not isinstance(project.get('version'), int):
        errors.append("Version must be an integer")

    # Business logic
    if project.get('opportunities_count', 0) < 0:
        errors.append("Opportunities count cannot be negative")

    return len(errors) == 0, errors

def validate_extraction_results(results: dict) -> bool:
    """Validate extraction results"""
    checks = [
        results.get('success') is True,
        results.get('opportunities_extracted', 0) > 0,
        results.get('documents_downloaded', 0) >= 0,
        results.get('errors', []) == [],
    ]
    return all(checks)

Running Quality Checks

bash
# Run validation script
python scripts/validate_data_quality.py

# Check specific project
python scripts/validate_data_quality.py --project-id abc123

# Generate report
python scripts/validate_data_quality.py --report-format html > quality_report.html

Test Data & Fixtures

Pytest Fixtures

python
# tests/conftest.py
import pytest
from pathlib import Path

@pytest.fixture
def sample_projects():
    """Load sample project data"""
    fixture_path = Path(__file__).parent / "fixtures" / "sample_projects.json"
    with open(fixture_path) as f:
        return json.load(f)

@pytest.fixture
def mock_playwright_page(mocker):
    """Mock Playwright page object"""
    page = mocker.Mock()
    page.goto.return_value = None
    page.fill.return_value = None
    page.click.return_value = None
    return page

@pytest.fixture(scope="session")
def test_database():
    """Session-scoped test database"""
    db_path = "tests/test_data.db"
    conn = sqlite3.connect(db_path)
    init_database(conn)
    yield conn
    conn.close()
    os.remove(db_path)

Mock Data

json
// tests/fixtures/sample_projects.json
[
  {
    "project_id": "test-001",
    "name": "City Infrastructure Project",
    "created_at": "2026-01-01T00:00:00Z",
    "version": 1,
    "opportunities_count": 5,
    "extracted": false
  },
  {
    "project_id": "test-002",
    "name": "Highway Construction",
    "created_at": "2026-01-02T00:00:00Z",
    "version": 1,
    "opportunities_count": 3,
    "extracted": true
  }
]

Related Rules

  • python-testing-structure.md: Test organization patterns
  • python-modularity-patterns.md: Testable module design
  • data-quality-checks.md: Data validation patterns
  • etl-idempotency-patterns.md: Idempotent test design

Test Configuration

pytest.ini

ini
[pytest]
minversion = 6.0
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
markers =
    slow: marks tests as slow (deselect with '-m "not slow"')
    integration: integration tests
    e2e: end-to-end tests
    unit: unit tests
addopts =
    -v
    --strict-markers
    --tb=short
    --disable-warnings

Coverage Configuration

ini
# .coveragerc
[run]
source = src
omit =
    */tests/*
    */venv/*
    */migrations/*

[report]
precision = 2
show_missing = True
skip_covered = False

[html]
directory = htmlcov

Continuous Integration

GitHub Actions Example

yaml
# .github/workflows/test.yml
name: Test Suite

on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2

      - name: Set up Python
        uses: actions/setup-python@v2
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: |
          pip install -r requirements.txt
          pip install -r requirements-test.txt
          playwright install

      - name: Run tests
        run: pytest tests/ --cov=src --cov-report=xml

      - name: Upload coverage
        uses: codecov/codecov-action@v2
        with:
          file: ./coverage.xml

Performance Testing

Benchmark Tests

python
import pytest
import time

@pytest.mark.benchmark
def test_extraction_performance(benchmark):
    """Benchmark extraction performance"""
    def run_extraction():
        # Simulate extraction
        time.sleep(0.5)
        return {"success": True, "count": 100}

    result = benchmark(run_extraction)
    assert result['success']
    assert benchmark.stats['mean'] < 1.0  # Should complete in <1s

Load Testing

bash
# Using locust for load testing
pip install locust

# Run load test
locust -f tests/load/extraction_load_test.py --host=http://localhost:8080

Troubleshooting Tests

Common Issues

Import Errors:

bash
# Ensure src is in Python path
export PYTHONPATH="${PYTHONPATH}:$(pwd)/src"

Playwright Errors:

bash
# Install browser binaries
playwright install

# Run with headed mode for debugging
pytest tests/e2e/ --headed

Database Locked:

python
# Use separate test database
@pytest.fixture
def test_db(tmp_path):
    db_path = tmp_path / "test.db"
    conn = sqlite3.connect(str(db_path))
    conn.execute("PRAGMA journal_mode=WAL")
    return conn

Best Practices

Test Naming

python
# Good: Descriptive, follows pattern
def test_extraction_creates_database_record():
    ...

def test_extraction_handles_missing_data_gracefully():
    ...

# Bad: Vague, unclear
def test_function():
    ...

def test_it_works():
    ...

Test Independence

python
# Each test should be independent
def test_insert_project(test_db):
    # Setup
    project_id = "test-123"

    # Execute
    insert_project(test_db, project_id, "Test")

    # Verify
    result = get_project(test_db, project_id)
    assert result is not None

    # Cleanup not needed - test_db fixture is fresh each time

Assertion Messages

python
# Provide clear assertion messages
assert len(results) > 0, f"Expected results but got empty list"
assert project['version'] == 1, f"Expected version 1 but got {project['version']}"