Sandboxing

Why Sandbox Agents?

Problem: Agents execute code and use tools - need isolation for safety

Risks Without Sandboxing

code

Agent executes malicious code → Compromises system
Agent uses tool incorrectly → Deletes production data
Agent consumes too many resources → Crashes server
Agent accesses sensitive data → Data breach

With Sandboxing

code

Agent runs in isolated environment
Limited resources (CPU, memory, time)
Restricted permissions
Cannot harm host system

Sandboxing Strategies

Process Isolation

code

Run agent in separate process
Kill process if misbehaves
Process cannot access host resources

Container Isolation

code

Run agent in Docker container
Container has limited resources
Cannot access host filesystem
Network access restricted

VM Isolation

code

Run agent in virtual machine
Strongest isolation
Highest overhead

Code Execution Sandboxing

Subprocess with Timeout

python

import subprocess
import signal

def execute_code_sandboxed(code, timeout_seconds=5):
    """Execute code in subprocess with timeout"""
    try:
        result = subprocess.run(
            ["python", "-c", code],
            capture_output=True,
            text=True,
            timeout=timeout_seconds,
            check=True
        )
        return {"success": True, "output": result.stdout}
    
    except subprocess.TimeoutExpired:
        return {"success": False, "error": "Execution timeout"}
    
    except subprocess.CalledProcessError as e:
        return {"success": False, "error": e.stderr}

# Usage
result = execute_code_sandboxed("print('Hello')", timeout_seconds=5)

Docker Container

python

import docker

def execute_code_in_docker(code, timeout_seconds=30):
    """Execute code in Docker container"""
    client = docker.from_env()
    
    try:
        # Run code in container
        container = client.containers.run(
            image="python:3.9-slim",
            command=["python", "-c", code],
            detach=True,
            mem_limit="256m",  # 256MB RAM limit
            cpu_quota=50000,   # 50% CPU limit
            network_disabled=True,  # No network access
            read_only=True,    # Read-only filesystem
            remove=True        # Auto-remove after execution
        )
        
        # Wait for completion with timeout
        result = container.wait(timeout=timeout_seconds)
        logs = container.logs().decode('utf-8')
        
        return {"success": result["StatusCode"] == 0, "output": logs}
    
    except docker.errors.ContainerError as e:
        return {"success": False, "error": str(e)}
    
    except Exception as e:
        return {"success": False, "error": str(e)}

# Usage
result = execute_code_in_docker("print('Hello from Docker')")

E2B (Code Interpreter)

python

from e2b import Sandbox

def execute_code_e2b(code, timeout_seconds=30):
    """Execute code using E2B sandbox"""
    sandbox = Sandbox(timeout=timeout_seconds)
    
    try:
        result = sandbox.run_code(code)
        return {
            "success": True,
            "output": result.stdout,
            "error": result.stderr
        }
    
    except Exception as e:
        return {"success": False, "error": str(e)}
    
    finally:
        sandbox.close()

Resource Limits

CPU Limit

python

import resource

def set_cpu_limit(seconds):
    """Limit CPU time"""
    resource.setrlimit(resource.RLIMIT_CPU, (seconds, seconds))

# Usage
set_cpu_limit(5)  # Max 5 seconds of CPU time

Memory Limit

python

def set_memory_limit(bytes):
    """Limit memory usage"""
    resource.setrlimit(resource.RLIMIT_AS, (bytes, bytes))

# Usage
set_memory_limit(256 * 1024 * 1024)  # Max 256MB

Docker Resource Limits

python

container = client.containers.run(
    image="python:3.9-slim",
    command=["python", "script.py"],
    mem_limit="256m",      # Memory limit
    memswap_limit="256m",  # Memory + swap limit
    cpu_quota=50000,       # CPU limit (50%)
    pids_limit=100,        # Max 100 processes
    ulimits=[
        docker.types.Ulimit(name='nofile', soft=1024, hard=1024)  # Max 1024 open files
    ]
)

File System Isolation

Temporary Directory

python

import tempfile
import os

def execute_with_temp_dir(code):
    """Execute code in temporary directory"""
    with tempfile.TemporaryDirectory() as tmpdir:
        # Change to temp directory
        original_dir = os.getcwd()
        os.chdir(tmpdir)
        
        try:
            # Execute code
            exec(code)
        finally:
            # Restore original directory
            os.chdir(original_dir)
        
        # Temp directory automatically deleted

Read-Only Filesystem

python

# Docker with read-only filesystem
container = client.containers.run(
    image="python:3.9-slim",
    command=["python", "-c", code],
    read_only=True,  # Cannot write to filesystem
    tmpfs={'/tmp': 'size=100M'}  # Allow writes to /tmp only
)

Network Isolation

Disable Network Access

python

# Docker without network
container = client.containers.run(
    image="python:3.9-slim",
    command=["python", "-c", code],
    network_disabled=True  # No network access
)

Restricted Network Access

python

# Docker with custom network (whitelist IPs)
network = client.networks.create(
    name="agent-network",
    driver="bridge",
    ipam=docker.types.IPAMConfig(
        pool_configs=[
            docker.types.IPAMPool(subnet="172.20.0.0/16")
        ]
    )
)

container = client.containers.run(
    image="python:3.9-slim",
    command=["python", "-c", code],
    network=network.name
)

Tool Execution Sandboxing

Whitelist Allowed Tools

python

ALLOWED_TOOLS = {
    "search_web",
    "get_weather",
    "calculate"
}

def execute_tool(tool_name, params):
    """Execute tool only if whitelisted"""
    if tool_name not in ALLOWED_TOOLS:
        raise PermissionError(f"Tool '{tool_name}' not allowed")
    
    # Execute tool
    return tools[tool_name](**params)

Parameter Validation

python

def validate_tool_params(tool_name, params):
    """Validate tool parameters before execution"""
    
    if tool_name == "send_email":
        # Validate email address
        if not is_valid_email(params.get("to")):
            raise ValueError("Invalid email address")
        
        # Prevent sending to external domains
        if not params["to"].endswith("@company.com"):
            raise PermissionError("Can only send to @company.com")
    
    if tool_name == "delete_file":
        # Prevent deleting system files
        if params["path"].startswith("/system/"):
            raise PermissionError("Cannot delete system files")
    
    return True

def execute_tool_safe(tool_name, params):
    validate_tool_params(tool_name, params)
    return execute_tool(tool_name, params)

Rate Limiting

python

from collections import defaultdict
import time

class RateLimiter:
    def __init__(self):
        self.calls = defaultdict(list)
    
    def check_limit(self, tool_name, max_calls=10, window_seconds=60):
        """Check if tool call is within rate limit"""
        now = time.time()
        
        # Remove old calls outside window
        self.calls[tool_name] = [
            t for t in self.calls[tool_name]
            if now - t < window_seconds
        ]
        
        # Check limit
        if len(self.calls[tool_name]) >= max_calls:
            raise Exception(f"Rate limit exceeded for {tool_name}")
        
        # Record call
        self.calls[tool_name].append(now)

rate_limiter = RateLimiter()

def execute_tool_with_rate_limit(tool_name, params):
    rate_limiter.check_limit(tool_name, max_calls=10, window_seconds=60)
    return execute_tool(tool_name, params)

Monitoring Sandboxed Execution

Track Resource Usage

python

import psutil

def monitor_execution(process_id):
    """Monitor resource usage of sandboxed process"""
    process = psutil.Process(process_id)
    
    while process.is_running():
        cpu_percent = process.cpu_percent(interval=1)
        memory_mb = process.memory_info().rss / 1024 / 1024
        
        print(f"CPU: {cpu_percent}%, Memory: {memory_mb:.1f}MB")
        
        # Kill if exceeds limits
        if cpu_percent > 80:
            process.kill()
            raise Exception("CPU limit exceeded")
        
        if memory_mb > 512:
            process.kill()
            raise Exception("Memory limit exceeded")

Log Sandbox Events

python

def log_sandbox_event(event_type, details):
    """Log sandbox events for audit"""
    logger.info({
        "timestamp": datetime.utcnow().isoformat(),
        "event_type": event_type,
        "details": details
    })

# Usage
log_sandbox_event("code_execution", {
    "code": code,
    "timeout": timeout_seconds,
    "success": result["success"]
})

log_sandbox_event("tool_execution", {
    "tool_name": tool_name,
    "params": params,
    "result": result
})

Security Best Practices

1. Principle of Least Privilege

python

# Good: Only allow necessary tools
ALLOWED_TOOLS = {"search_web", "calculate"}

# Bad: Allow all tools
ALLOWED_TOOLS = "*"

2. Validate All Inputs

python

# Good
def execute_tool(tool_name, params):
    validate_tool_name(tool_name)
    validate_params(params)
    return tools[tool_name](**params)

# Bad
def execute_tool(tool_name, params):
    return tools[tool_name](**params)  # No validation

3. Set Resource Limits

python

# Good
execute_code_sandboxed(code, timeout_seconds=30, memory_limit_mb=256)

# Bad
execute_code_sandboxed(code)  # No limits

4. Isolate Network Access

python

# Good
container = client.containers.run(
    image="python:3.9-slim",
    network_disabled=True
)

# Bad
container = client.containers.run(
    image="python:3.9-slim"
)  # Full network access

5. Use Read-Only Filesystem

python

# Good
container = client.containers.run(
    image="python:3.9-slim",
    read_only=True,
    tmpfs={'/tmp': 'size=100M'}
)

# Bad
container = client.containers.run(
    image="python:3.9-slim"
)  # Writable filesystem

Tools and Services

E2B (Code Interpreter)

python

from e2b import Sandbox

sandbox = Sandbox()
result = sandbox.run_code("print('Hello')")
sandbox.close()

Firecracker (Lightweight VMs)

python

# AWS Lambda uses Firecracker for isolation
# Fast startup (<125ms)
# Strong isolation

gVisor (Google)

python

# Container runtime with additional isolation
# Used by Google Cloud Run

Summary

Sandboxing: Isolate agent execution for safety

Strategies:

•Process isolation
•Container isolation (Docker)
•VM isolation

Resource Limits:

•CPU time
•Memory usage
•Disk I/O
•Network bandwidth

File System:

•Temporary directories
•Read-only filesystem
•Restricted paths

Network:

•Disable network
•Whitelist IPs/domains

Tool Execution:

•Whitelist allowed tools
•Validate parameters
•Rate limiting

Best Practices:

•Least privilege
•Validate inputs
•Set resource limits
•Isolate network
•Read-only filesystem

Tools:

•E2B (code interpreter)
•Docker (containers)
•Firecracker (lightweight VMs)
•gVisor (container isolation)