Agent Provisioning Guide

개요

Agent 시스템의 인프라 레이어 설정 가이드. Dependencies 싱글톤, HTTP 클라이언트, LLM 클라이언트 프로비저닝 패턴.

code

┌─────────────────────────────────────────────────────────────────────────┐
│                    Agent System Provisioning                             │
├─────────────────────────────────────────────────────────────────────────┤
│                                                                          │
│  ┌──────────────────────────────────────────────────────────────────┐   │
│  │                    Environment Variables                          │   │
│  │  - OPENAI_API_KEY                                                 │   │
│  │  - GOOGLE_API_KEY                                                 │   │
│  │  - KAKAO_API_KEY                                                  │   │
│  │  - LANGCHAIN_TRACING_V2=true                                      │   │
│  └──────────────────────────────────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│  ┌──────────────────────────────────────────────────────────────────┐   │
│  │                    Dependencies (Singleton)                        │   │
│  │  - get_openai_client() → OpenAI                                   │   │
│  │  - get_gemini_client() → genai.Client                             │   │
│  │  - get_http_client() → httpx.AsyncClient                          │   │
│  │  - get_redis() → Redis                                            │   │
│  └──────────────────────────────────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│  ┌──────────────────────────────────────────────────────────────────┐   │
│  │                    LLM Clients (Port/Adapter)                      │   │
│  │  - OpenAILLMClient(LLMClientPort)                                 │   │
│  │  - GeminiLLMClient(LLMClientPort)                                 │   │
│  │  - LangChainAdapter(LLMClientPort)                                │   │
│  └──────────────────────────────────────────────────────────────────┘   │
│                              │                                           │
│                              ▼                                           │
│  ┌──────────────────────────────────────────────────────────────────┐   │
│  │                    Graph Factory                                   │   │
│  │  - create_chat_graph(llm, retriever, clients...)                  │   │
│  └──────────────────────────────────────────────────────────────────┘   │
│                                                                          │
└─────────────────────────────────────────────────────────────────────────┘

환경 변수

bash

# ===== LLM API Keys =====
OPENAI_API_KEY=sk-xxxxx
GOOGLE_API_KEY=AIzaSy-xxxxx
ANTHROPIC_API_KEY=sk-ant-xxxxx  # Optional

# ===== External APIs =====
KAKAO_API_KEY=xxxxx        # 카카오맵 API
TAVILY_API_KEY=tvly-xxxxx  # 웹 검색 API

# ===== LangSmith =====
LANGCHAIN_TRACING_V2=true
LANGCHAIN_API_KEY=ls-xxxxx
LANGCHAIN_PROJECT=eco2-chat-worker

# ===== Infrastructure =====
REDIS_URL=redis://localhost:6379/0
POSTGRES_URL=postgresql://user:pass@localhost:5432/db

# ===== Worker Settings =====
WORKER_CONCURRENCY=4
LLM_DEFAULT_MODEL=gpt-5.2
LLM_DEFAULT_PROVIDER=openai

HTTP 클라이언트 설정

Timeout & Limits

python

# apps/chat_worker/infrastructure/llm/config.py

import httpx

# HTTP Timeout 설정
HTTP_TIMEOUT = httpx.Timeout(
    connect=5.0,   # 연결 타임아웃
    read=60.0,     # 응답 읽기 타임아웃 (LLM은 길게)
    write=10.0,    # 요청 쓰기 타임아웃
    pool=5.0,      # 커넥션 풀 대기 타임아웃
)

# 커넥션 풀 제한
HTTP_LIMITS = httpx.Limits(
    max_connections=100,           # 최대 동시 연결
    max_keepalive_connections=20,  # Keep-alive 연결 유지
    keepalive_expiry=30.0,         # Keep-alive 만료 시간
)

# 재시도 설정
MAX_RETRIES = 2

# 모델별 컨텍스트 윈도우
MODEL_CONTEXT_WINDOWS = {
    "gpt-5.2": 256_000,
    "gpt-5.2-codex": 256_000,
    "gpt-4o": 128_000,
    "gpt-4o-mini": 128_000,
    "gemini-3-flash-preview": 1_000_000,
    "gemini-3-pro-preview": 2_000_000,
    "gemini-1.5-pro": 2_000_000,
    "gemini-1.5-flash": 1_000_000,
}

Shared HTTP Client

python

# apps/chat_worker/setup/dependencies.py

import httpx
from functools import lru_cache

_shared_http_client: httpx.AsyncClient | None = None


def get_shared_http_client() -> httpx.AsyncClient:
    """공유 HTTP 클라이언트 싱글톤.

    모든 외부 API 호출에서 커넥션 풀 공유.
    """
    global _shared_http_client

    if _shared_http_client is None:
        _shared_http_client = httpx.AsyncClient(
            timeout=HTTP_TIMEOUT,
            limits=HTTP_LIMITS,
            http2=True,  # HTTP/2 활성화
        )

    return _shared_http_client


async def close_shared_http_client():
    """애플리케이션 종료 시 클라이언트 정리."""
    global _shared_http_client

    if _shared_http_client is not None:
        await _shared_http_client.aclose()
        _shared_http_client = None

LLM 클라이언트 싱글톤

OpenAI 클라이언트

python

# apps/chat_worker/setup/dependencies.py

from openai import AsyncOpenAI

_openai_client: AsyncOpenAI | None = None


def get_openai_client() -> AsyncOpenAI | None:
    """OpenAI AsyncOpenAI 클라이언트 싱글톤."""
    global _openai_client

    if _openai_client is None:
        api_key = os.environ.get("OPENAI_API_KEY")
        if not api_key:
            return None

        # 공유 HTTP 클라이언트 사용
        http_client = get_shared_http_client()

        _openai_client = AsyncOpenAI(
            api_key=api_key,
            http_client=http_client,
            max_retries=MAX_RETRIES,
        )

    return _openai_client

Gemini 클라이언트

python

from google import genai

_gemini_client: genai.Client | None = None


def get_gemini_client() -> genai.Client | None:
    """Google Gemini 클라이언트 싱글톤."""
    global _gemini_client

    if _gemini_client is None:
        api_key = os.environ.get("GOOGLE_API_KEY")
        if not api_key:
            return None

        _gemini_client = genai.Client(api_key=api_key)

    return _gemini_client

Raw SDK vs Port/Adapter

python

# Raw SDK 클라이언트: 직접 API 호출
openai_raw = get_openai_client()
response = await openai_raw.chat.completions.create(...)

# Port/Adapter: 비즈니스 로직 추상화
from chat_worker.infrastructure.llm.clients import OpenAILLMClient

llm_client = OpenAILLMClient(
    client=get_openai_client(),
    model="gpt-5.2",
)
intent = await llm_client.generate_structured(
    prompt=message,
    response_schema=IntentClassification,
)

Port/Adapter 패턴

LLMClientPort 인터페이스

python

# apps/chat_worker/application/ports/llm/llm_client.py

from abc import ABC, abstractmethod
from typing import Any, AsyncIterator, TypeVar
from pydantic import BaseModel

T = TypeVar("T", bound=BaseModel)


class LLMClientPort(ABC):
    """LLM 클라이언트 Port (인터페이스).

    모든 LLM 구현체가 준수해야 하는 계약.
    """

    @abstractmethod
    async def generate(
        self,
        prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.7,
        max_tokens: int = 2048,
    ) -> str:
        """텍스트 생성."""
        ...

    @abstractmethod
    async def generate_structured(
        self,
        prompt: str,
        response_schema: type[T],
        system_prompt: str | None = None,
        temperature: float = 0,
    ) -> T:
        """Structured Output 생성."""
        ...

    @abstractmethod
    async def generate_stream(
        self,
        prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.7,
    ) -> AsyncIterator[str]:
        """스트리밍 생성."""
        ...

    @abstractmethod
    async def generate_function_call(
        self,
        prompt: str,
        functions: list[dict[str, Any]],
        system_prompt: str | None = None,
        function_call: str | dict = "auto",
    ) -> tuple[str, dict] | None:
        """Function Calling."""
        ...

    @abstractmethod
    async def generate_with_tools(
        self,
        prompt: str,
        tools: list[str],
        system_prompt: str | None = None,
    ) -> str:
        """Tool 사용 생성 (web_search 등)."""
        ...

Adapter 구현 예시

python

# apps/chat_worker/infrastructure/llm/clients/openai_client.py

class OpenAILLMClient(LLMClientPort):
    """OpenAI LLM 클라이언트 Adapter."""

    def __init__(
        self,
        client: AsyncOpenAI | None = None,
        model: str = "gpt-5.2",
    ):
        self._client = client or get_openai_client()
        self._model = model

        if self._client is None:
            raise ValueError("OpenAI client not available")

    async def generate(
        self,
        prompt: str,
        system_prompt: str | None = None,
        temperature: float = 0.7,
        max_tokens: int = 2048,
    ) -> str:
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
        messages.append({"role": "user", "content": prompt})

        response = await self._client.chat.completions.create(
            model=self._model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
        )

        return response.choices[0].message.content or ""

    # ... 나머지 메서드 구현

Graph Factory

의존성 주입 패턴

python

# apps/chat_worker/infrastructure/orchestration/langgraph/factory.py

from langgraph.graph import StateGraph

def create_chat_graph(
    # 필수 의존성
    llm: LLMClientPort,
    retriever: RetrieverPort,

    # 선택적 외부 서비스
    vision_model: VisionModelPort | None = None,
    character_client: CharacterClientPort | None = None,
    location_client: LocationClientPort | None = None,
    web_search_client: WebSearchPort | None = None,
    bulk_waste_client: BulkWasteClientPort | None = None,
    weather_client: WeatherClientPort | None = None,
    collection_point_client: CollectionPointClientPort | None = None,
    image_generator: ImageGeneratorPort | None = None,

    # 인프라
    checkpointer: BaseCheckpointSaver | None = None,
    event_publisher: ProgressNotifierPort | None = None,

    # 기능 플래그
    enable_summarization: bool = False,
    enable_dynamic_routing: bool = True,
    enable_multi_intent: bool = True,
    enable_enrichment: bool = True,
) -> StateGraph:
    """Chat Graph 팩토리.

    Port 기반 의존성 주입으로 테스트 용이성 확보.
    """

    graph = StateGraph(ChatState)

    # === 필수 노드 ===
    graph.add_node("intent", create_intent_node(llm, event_publisher))
    graph.add_node("router", lambda s: s)
    graph.add_node("answer", create_answer_node(llm, event_publisher))

    # === 선택적 노드 (의존성 있을 때만) ===
    if vision_model:
        graph.add_node("vision", create_vision_node(vision_model))

    if retriever:
        graph.add_node("waste_rag", create_rag_node(retriever, llm, event_publisher))

    if character_client:
        graph.add_node("character", create_character_node(character_client, event_publisher))

    if location_client:
        graph.add_node("location", create_location_node(
            location_client,
            llm,
            event_publisher,
            get_openai_client(),
            get_gemini_client(),
        ))

    if weather_client:
        graph.add_node("weather", create_weather_node(weather_client))

    if bulk_waste_client:
        graph.add_node("bulk_waste", create_bulk_waste_node(
            bulk_waste_client,
            llm,
            event_publisher,
            get_openai_client(),
            get_gemini_client(),
        ))

    if image_generator:
        graph.add_node("image_generation", create_image_node(image_generator, event_publisher))

    # === 엣지 설정 ===
    graph.set_entry_point("intent")

    # Vision 조건부 라우팅
    if vision_model:
        graph.add_conditional_edges(
            "intent",
            lambda s: "vision" if s.get("image_url") else "router",
            {"vision": "vision", "router": "router"},
        )
        graph.add_edge("vision", "router")
    else:
        graph.add_edge("intent", "router")

    # Dynamic Router
    if enable_dynamic_routing:
        graph.add_conditional_edges(
            "router",
            create_dynamic_router(
                enable_multi_intent=enable_multi_intent,
                enable_enrichment=enable_enrichment,
            ),
        )

    # Aggregator → Answer
    for node in ["waste_rag", "character", "location", "bulk_waste", "weather", "general"]:
        if node in graph.nodes:
            graph.add_edge(node, "answer")

    graph.add_edge("answer", END)

    return graph.compile(checkpointer=checkpointer)

Worker 초기화

Celery Worker Setup

python

# apps/chat_worker/setup/worker.py

from celery import Celery
from celery.signals import worker_init, worker_shutdown

app = Celery("chat_worker")


@worker_init.connect
def init_worker(**kwargs):
    """Worker 시작 시 초기화."""
    import asyncio

    # 이벤트 루프 생성
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)

    # 싱글톤 클라이언트 초기화 (lazy하게 생성되지만 미리 warmup)
    _ = get_openai_client()
    _ = get_gemini_client()
    _ = get_shared_http_client()

    # LangSmith 설정 확인
    if is_langsmith_enabled():
        logger.info("LangSmith tracing enabled", extra={
            "project": os.environ.get("LANGCHAIN_PROJECT"),
        })


@worker_shutdown.connect
def shutdown_worker(**kwargs):
    """Worker 종료 시 정리."""
    import asyncio

    loop = asyncio.get_event_loop()
    loop.run_until_complete(close_shared_http_client())

FastAPI Lifespan

python

# apps/sse_gateway/main.py

from contextlib import asynccontextmanager
from fastapi import FastAPI


@asynccontextmanager
async def lifespan(app: FastAPI):
    """애플리케이션 생명주기 관리."""

    # Startup
    logger.info("Starting SSE Gateway...")

    # Redis 연결
    redis = await get_redis()
    app.state.redis = redis

    yield

    # Shutdown
    logger.info("Shutting down SSE Gateway...")
    await close_shared_http_client()
    await redis.close()


app = FastAPI(lifespan=lifespan)

테스트용 Mock 주입

python

# tests/conftest.py

import pytest
from unittest.mock import AsyncMock, MagicMock


@pytest.fixture
def mock_llm_client():
    """Mock LLM 클라이언트."""
    client = AsyncMock(spec=LLMClientPort)
    client.generate.return_value = "Mock response"
    client.generate_structured.return_value = IntentClassification(
        intent="waste",
        confidence=0.95,
        reasoning="테스트",
    )
    return client


@pytest.fixture
def mock_retriever():
    """Mock Retriever."""
    retriever = AsyncMock(spec=RetrieverPort)
    retriever.search.return_value = [
        {"content": "Mock document", "score": 0.9},
    ]
    return retriever


@pytest.fixture
def test_graph(mock_llm_client, mock_retriever):
    """테스트용 Graph."""
    return create_chat_graph(
        llm=mock_llm_client,
        retriever=mock_retriever,
        # 외부 서비스 없이 테스트
        character_client=None,
        location_client=None,
        enable_dynamic_routing=False,  # 단순 라우팅
    )

Best Practices

•싱글톤 사용: LLM 클라이언트는 커넥션 풀 공유를 위해 싱글톤
•Lazy 초기화: 필요할 때만 클라이언트 생성
•Port/Adapter 분리: 비즈니스 로직과 인프라 분리
•환경변수 검증: 필수 키 누락 시 명확한 에러
•Graceful Shutdown: 리소스 정리 보장
•HTTP/2 활성화: 다중화로 성능 향상

agent-provisioning

Agent Provisioning Guide

개요

환경 변수

HTTP 클라이언트 설정

Timeout & Limits

Shared HTTP Client

LLM 클라이언트 싱글톤

OpenAI 클라이언트

Gemini 클라이언트

Raw SDK vs Port/Adapter

Port/Adapter 패턴

LLMClientPort 인터페이스

Adapter 구현 예시

Graph Factory

의존성 주입 패턴

Worker 초기화

Celery Worker Setup

FastAPI Lifespan

테스트용 Mock 주입

Best Practices

Reference