Modal Web Endpoints Reference

Detailed reference for creating web APIs and endpoints on Modal.

Endpoint Types

FastAPI Endpoint (Recommended)

Simple function-based endpoints:

python

import modal

app = modal.App("web-api")
image = modal.Image.debian_slim().pip_install("fastapi[standard]")

@app.function(image=image)
@modal.fastapi_endpoint()
def hello(name: str = "World") -> dict:
    return {"message": f"Hello, {name}!"}

@app.function(image=image)
@modal.fastapi_endpoint(method="POST", docs=True)
def process(body: dict) -> dict:
    return {"received": body}

ASGI App (Full FastAPI/Starlette)

For complex applications with multiple routes:

python

@app.function(image=image)
@modal.asgi_app()
def fastapi_app():
    from fastapi import FastAPI, HTTPException
    
    web_app = FastAPI()
    
    @web_app.get("/")
    def root():
        return {"status": "ok"}
    
    @web_app.get("/items/{item_id}")
    def get_item(item_id: int):
        return {"item_id": item_id}
    
    @web_app.post("/items")
    def create_item(item: dict):
        return {"created": item}
    
    return web_app

WSGI App (Flask/Django)

python

@app.function(image=modal.Image.debian_slim().pip_install("flask"))
@modal.wsgi_app()
def flask_app():
    from flask import Flask, jsonify
    
    web_app = Flask(__name__)
    
    @web_app.route("/")
    def index():
        return jsonify({"status": "ok"})
    
    return web_app

Custom Web Server

For non-Python web servers or custom setups:

python

@app.function()
@modal.web_server(port=8080)
def custom_server():
    import subprocess
    # Start any web server on port 8080
    subprocess.run(["python", "-m", "http.server", "8080"])

Endpoint Configuration

HTTP Methods

python

@modal.fastapi_endpoint(method="GET")      # Default
@modal.fastapi_endpoint(method="POST")
@modal.fastapi_endpoint(method="PUT")
@modal.fastapi_endpoint(method="DELETE")

Documentation

python

# Enable OpenAPI docs at /docs
@modal.fastapi_endpoint(docs=True)
def documented_endpoint():
    ...

Custom Labels

python

# Custom URL label
@modal.fastapi_endpoint(label="my-api")
def endpoint():
    ...
# URL: https://workspace--my-api.modal.run

Custom Domains

python

@modal.fastapi_endpoint(custom_domains=["api.example.com", "api.example.net"])
def multi_domain_endpoint():
    ...

Proxy Authentication

python

# Require Modal proxy auth tokens
@modal.fastapi_endpoint(requires_proxy_auth=True)
def private_endpoint():
    return {"secret": "data"}

Call with:

bash

curl -H "Modal-Key: $TOKEN_ID" \
     -H "Modal-Secret: $TOKEN_SECRET" \
     https://your-endpoint.modal.run

Streaming Responses

Server-Sent Events (SSE)

python

from fastapi.responses import StreamingResponse

@app.function(image=image)
@modal.fastapi_endpoint()
async def stream():
    async def generate():
        for i in range(10):
            yield f"data: {i}\n\n"
            await asyncio.sleep(0.1)
    
    return StreamingResponse(generate(), media_type="text/event-stream")

Streaming with Map

python

@app.function(gpu="A100")
def process_chunk(chunk: str) -> str:
    return f"processed: {chunk}"

@app.function(image=image)
@modal.fastapi_endpoint()
async def stream_processing(body: dict):
    from fastapi.responses import StreamingResponse
    
    chunks = body["chunks"]
    
    async def generate():
        async for result in process_chunk.map.aio(chunks):
            yield f"data: {result}\n\n"
    
    return StreamingResponse(generate(), media_type="text/event-stream")

Concurrency

Handle Multiple Requests

python

@app.function()
@modal.concurrent(max_inputs=20)  # 20 concurrent requests per container
@modal.asgi_app()
def high_concurrency_app():
    ...

URL Structure

Auto-generated URLs follow this pattern:

code

https://<workspace>-<env-suffix>--<app>-<function>.modal.run

Example: https://myworkspace-prod--my-app-hello.modal.run

Ephemeral Apps

Apps run with modal serve get a -dev suffix:

code

https://myworkspace-prod--my-app-hello-dev.modal.run

Request/Response Handling

Request Bodies

python

from pydantic import BaseModel

class Item(BaseModel):
    name: str
    price: float

@app.function(image=image)
@modal.fastapi_endpoint(method="POST")
def create_item(item: Item) -> dict:
    return {"created": item.dict()}

File Uploads

python

from fastapi import UploadFile, File

@app.function(image=image)
@modal.fastapi_endpoint(method="POST")
async def upload(file: UploadFile = File(...)):
    contents = await file.read()
    return {"filename": file.filename, "size": len(contents)}

Custom Responses

python

from fastapi.responses import JSONResponse, HTMLResponse

@app.function(image=image)
@modal.fastapi_endpoint()
def custom_response():
    return JSONResponse(
        content={"message": "hello"},
        headers={"X-Custom-Header": "value"}
    )

Long-Running Requests

Timeouts

Web endpoints have a 150-second HTTP timeout, but auto-redirect for longer tasks.

Job Queue Pattern

For very long tasks, use spawn + polling:

python

@app.function()
def long_task(data: dict) -> dict:
    import time
    time.sleep(300)  # 5 minutes
    return {"result": "done"}

@app.function(image=image)
@modal.asgi_app()
def api():
    from fastapi import FastAPI
    web_app = FastAPI()
    
    @web_app.post("/submit")
    async def submit(data: dict):
        call = long_task.spawn(data)
        return {"call_id": call.object_id}
    
    @web_app.get("/result/{call_id}")
    async def result(call_id: str):
        call = modal.FunctionCall.from_id(call_id)
        try:
            return call.get(timeout=0)
        except TimeoutError:
            return {"status": "pending"}, 202
    
    return web_app

Deployment

bash

# Development with hot-reload
modal serve app.py

# Production deployment
modal deploy app.py

Getting Endpoint URL

python

@app.function(image=image)
@modal.fastapi_endpoint()
def my_endpoint():
    # Get own URL
    url = my_endpoint.get_web_url()
    return {"url": url}

# From external code
fn = modal.Function.from_name("my-app", "my_endpoint")
url = fn.get_web_url()