Перейти к содержимому

Оптимизация производительности

Руководство по оптимизации MCP серверов для высокой производительности

Оптимизация MCP серверов критична для production развёртываний с высокой нагрузкой.

Terminal
# Выделение CPU cores для MCP сервера
sudo cset shield --cpu 2-3 --kthread on
# Запуск в изолированных CPU
sudo cset shield --exec -- python -m src.server
Terminal
# Создание cgroup
sudo mkdir /sys/fs/cgroup/mcp-server
# Лимит CPU (50%)
echo "50000 100000" | sudo tee /sys/fs/cgroup/mcp-server/cpu.max
# Лимит памяти (512MB)
echo "536870912" | sudo tee /sys/fs/cgroup/mcp-server/memory.max
Terminal
# Включение HugePages
echo 512 | sudo tee /proc/sys/vm/nr_hugepages
# Проверка
cat /proc/meminfo | grep Huge
/etc/sysctl.d/99-mcp.conf
# /etc/sysctl.d/99-mcp.conf
# Network buffers
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
# Connections
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
# File descriptors
fs.file-max = 2097152
cache.py
import redis
import json
import hashlib
redis_client = redis.Redis(host='localhost', port=6379, db=0)
def cache_key(tool_name: str, params: dict) -> str:
"""Генерация ключа кэша"""
param_hash = hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest()
return f"mcp:tool:{tool_name}:{param_hash}"
@mcp.tool()
async def expensive_operation(query: str) -> dict:
"""Ресурсоёмкая операция с кэшированием"""
key = cache_key("expensive_operation", {"query": query})
# Проверка кэша
cached = redis_client.get(key)
if cached:
return json.loads(cached)
# Выполнение операции
result = await perform_expensive_query(query)
# Сохранение в кэш (TTL 5 минут)
redis_client.setex(key, 300, json.dumps(result))
return result
memory_cache.py
from functools import lru_cache
from cachetools import TTLCache
import asyncio
# LRU кэш для синхронных функций
@lru_cache(maxsize=1000)
def get_config(key: str) -> str:
return load_config(key)
# TTL кэш для асинхронных
cache = TTLCache(maxsize=1000, ttl=300)
async def cached_fetch(url: str) -> str:
if url in cache:
return cache[url]
result = await fetch_url(url)
cache[url] = result
return result
database.py
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
engine = create_async_engine(
"postgresql+asyncpg://user:pass@localhost/db",
pool_size=20,
max_overflow=10,
pool_pre_ping=True,
pool_recycle=3600,
)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@mcp.tool()
async def query_db(sql: str) -> list:
async with async_session() as session:
result = await session.execute(text(sql))
return [dict(row) for row in result.fetchall()]
http_client.py
import aiohttp
# Глобальный session
connector = aiohttp.TCPConnector(
limit=100, # Общий лимит соединений
limit_per_host=30, # Лимит на хост
keepalive_timeout=30,
)
session = aiohttp.ClientSession(connector=connector)
@mcp.tool()
async def fetch_api(url: str) -> dict:
async with session.get(url) as response:
return await response.json()
parallel.py
import asyncio
@mcp.tool()
async def parallel_fetch(urls: list[str]) -> list[dict]:
"""Параллельная загрузка нескольких URL"""
tasks = [fetch_url(url) for url in urls]
results = await asyncio.gather(*tasks, return_exceptions=True)
return [
{"url": url, "result": r if not isinstance(r, Exception) else str(r)}
for url, r in zip(urls, results)
]
batch.py
@mcp.tool()
async def batch_process(items: list[dict]) -> list[dict]:
"""Пакетная обработка"""
BATCH_SIZE = 100
results = []
for i in range(0, len(items), BATCH_SIZE):
batch = items[i:i + BATCH_SIZE]
batch_results = await asyncio.gather(*[
process_item(item) for item in batch
])
results.extend(batch_results)
return results
semaphore.py
semaphore = asyncio.Semaphore(10) # Максимум 10 параллельных операций
async def limited_fetch(url: str) -> str:
async with semaphore:
return await fetch_url(url)
/etc/nginx/nginx.conf
upstream mcp_servers {
least_conn; # Выбор наименее загруженного
server 127.0.0.1:3001 weight=5;
server 127.0.0.1:3002 weight=3;
server 127.0.0.1:3003 weight=2;
keepalive 32;
}
hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: mcp-server-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: mcp-server
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
metrics.py
from prometheus_client import Counter, Histogram, Gauge, start_http_server
# Метрики
REQUESTS = Counter('mcp_requests_total', 'Total requests', ['tool'])
LATENCY = Histogram('mcp_request_latency_seconds', 'Request latency', ['tool'])
ACTIVE = Gauge('mcp_active_requests', 'Active requests')
def instrument_tool(func):
"""Декоратор для инструментации"""
@wraps(func)
async def wrapper(*args, **kwargs):
tool_name = func.__name__
REQUESTS.labels(tool=tool_name).inc()
ACTIVE.inc()
with LATENCY.labels(tool=tool_name).time():
try:
return await func(*args, **kwargs)
finally:
ACTIVE.dec()
return wrapper
# Запуск сервера метрик
start_http_server(9090)
dashboard.json
{
"panels": [
{
"title": "Requests per Second",
"type": "graph",
"targets": [
{
"expr": "rate(mcp_requests_total[1m])",
"legendFormat": "{{tool}}"
}
]
},
{
"title": "Latency P99",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.99, rate(mcp_request_latency_seconds_bucket[5m]))",
"legendFormat": "{{tool}}"
}
]
}
]
}
locustfile.py
# locustfile.py
from locust import HttpUser, task, between
class McpUser(HttpUser):
wait_time = between(0.1, 0.5)
@task(3)
def call_tool(self):
self.client.post("/message", json={
"jsonrpc": "2.0",
"method": "tools/call",
"params": {
"name": "add",
"arguments": {"a": 1, "b": 2}
},
"id": 1
})
@task(1)
def list_tools(self):
self.client.post("/message", json={
"jsonrpc": "2.0",
"method": "tools/list",
"id": 1
})
Terminal
locust -f locustfile.py --host=http://localhost:3000 -u 100 -r 10