/** * Memento MCP Server - Metrics & Observability * * Collects and exports metrics for monitoring and observability. * Compatible with Prometheus scraping format. */ import config from './config.js'; /** * Metrics storage */ const metrics = { // Request metrics requests: { total: 0, byTool: {}, byStatus: {}, byMethod: {}, }, // Response time metrics (in milliseconds) latency: { values: [], p50: 0, p95: 0, p99: 0, avg: 0, }, // Error metrics errors: { total: 0, byCategory: {}, byCode: {}, }, // Authentication metrics auth: { successes: 0, failures: 0, byMethod: {}, }, // Database metrics database: { queries: 0, errors: 0, slowQueries: 0, avgQueryTime: 0, activeConnections: 0, }, // Session metrics sessions: { active: 0, created: 0, expired: 0, total: 0, }, // Rate limiting metrics rateLimit: { blocked: 0, byUser: {}, }, // Cache metrics cache: { hits: 0, misses: 0, size: 0, }, // Tool-specific metrics tools: { calls: {}, failures: {}, avgExecutionTime: {}, }, }; // Latency samples (keep last 1000 for percentile calculation) const latencySamples = []; const MAX_LATENCY_SAMPLES = 1000; /** * Record a request */ export function recordRequest(tool, status, method = 'unknown', latency = 0) { metrics.requests.total++; // By tool if (!metrics.requests.byTool[tool]) { metrics.requests.byTool[tool] = 0; } metrics.requests.byTool[tool]++; // By status if (!metrics.requests.byStatus[status]) { metrics.requests.byStatus[status] = 0; } metrics.requests.byStatus[status]++; // By method if (!metrics.requests.byMethod[method]) { metrics.requests.byMethod[method] = 0; } metrics.requests.byMethod[method]++; // Record latency if (latency > 0) { recordLatency(latency); } } /** * Record latency sample */ function recordLatency(ms) { latencySamples.push(ms); // Keep only recent samples if (latencySamples.length > MAX_LATENCY_SAMPLES) { latencySamples.shift(); } // Update percentiles updateLatencyMetrics(); } /** * Update latency percentiles */ function updateLatencyMetrics() { if (latencySamples.length === 0) return; const sorted = [...latencySamples].sort((a, b) => a - b); const len = sorted.length; metrics.latency.p50 = sorted[Math.floor(len * 0.5)] || 0; metrics.latency.p95 = sorted[Math.floor(len * 0.95)] || 0; metrics.latency.p99 = sorted[Math.floor(len * 0.99)] || 0; metrics.latency.avg = sorted.reduce((a, b) => a + b, 0) / len; } /** * Record an error */ export function recordError(category, code, context = {}) { metrics.errors.total++; // By category if (!metrics.errors.byCategory[category]) { metrics.errors.byCategory[category] = 0; } metrics.errors.byCategory[category]++; // By code if (!metrics.errors.byCode[code]) { metrics.errors.byCode[code] = 0; } metrics.errors.byCode[code]++; } /** * Record authentication event */ export function recordAuth(success, method = 'unknown') { if (success) { metrics.auth.successes++; } else { metrics.auth.failures++; } // By method if (!metrics.auth.byMethod[method]) { metrics.auth.byMethod[method] = { successes: 0, failures: 0 }; } if (success) { metrics.auth.byMethod[method].successes++; } else { metrics.auth.byMethod[method].failures++; } } /** * Record database query */ export function recordDbQuery(success, duration = 0) { metrics.database.queries++; if (duration > 0) { // Update average query time const currentAvg = metrics.database.avgQueryTime; const count = metrics.database.queries; metrics.database.avgQueryTime = (currentAvg * (count - 1) + duration) / count; // Track slow queries (> 1 second) if (duration > 1000) { metrics.database.slowQueries++; } } if (!success) { metrics.database.errors++; } } /** * Record session event */ export function recordSession(event, count = 1) { switch (event) { case 'create': metrics.sessions.created += count; metrics.sessions.total += count; break; case 'expire': metrics.sessions.expired += count; metrics.sessions.total -= count; break; case 'active': metrics.sessions.active = count; break; } } /** * Record rate limit block */ export function recordRateLimitBlocked(identifier) { metrics.rateLimit.blocked++; if (!metrics.rateLimit.byUser[identifier]) { metrics.rateLimit.byUser[identifier] = 0; } metrics.rateLimit.byUser[identifier]++; } /** * Record cache hit/miss */ export function recordCacheHit(hit) { if (hit) { metrics.cache.hits++; } else { metrics.cache.misses++; } } /** * Update cache size */ export function updateCacheSize(size) { metrics.cache.size = size; } /** * Record tool execution */ export function recordToolExecution(tool, success, duration = 0) { // Record call if (!metrics.tools.calls[tool]) { metrics.tools.calls[tool] = 0; } metrics.tools.calls[tool]++; // Record failure if (!success) { if (!metrics.tools.failures[tool]) { metrics.tools.failures[tool] = 0; } metrics.tools.failures[tool]++; } // Update average execution time if (duration > 0) { const currentAvg = metrics.tools.avgExecutionTime[tool] || 0; const callCount = metrics.tools.calls[tool]; metrics.tools.avgExecutionTime[tool] = (currentAvg * (callCount - 1) + duration) / callCount; } } /** * Get current metrics snapshot */ export function getMetrics() { return { ...metrics, uptime: process.uptime(), memory: process.memoryUsage(), timestamp: new Date().toISOString(), }; } /** * Get metrics in Prometheus format */ export function getPrometheusMetrics() { const lines = []; const timestamp = Date.now(); // Helper to format metric line const metricLine = (name, value, labels = {}) => { const labelStr = Object.keys(labels).length > 0 ? `{${Object.entries(labels).map(([k, v]) => `${k}="${v}"`).join(',')}}` : ''; return `mcp_${name}${labelStr} ${value} ${timestamp}`; }; // Request metrics lines.push(`# HELP mcp_requests_total Total number of requests`); lines.push(`# TYPE mcp_requests_total counter`); lines.push(metricLine('requests_total', metrics.requests.total)); for (const [tool, count] of Object.entries(metrics.requests.byTool)) { lines.push(metricLine('requests_total', count, { tool })); } for (const [status, count] of Object.entries(metrics.requests.byStatus)) { lines.push(metricLine('requests_total', count, { status })); } // Latency metrics lines.push(`# HELP mcp_latency_ms Request latency in milliseconds`); lines.push(`# TYPE mcp_latency_ms gauge`); lines.push(metricLine('latency_ms', metrics.latency.p50, { quantile: '0.5' })); lines.push(metricLine('latency_ms', metrics.latency.p95, { quantile: '0.95' })); lines.push(metricLine('latency_ms', metrics.latency.p99, { quantile: '0.99' })); lines.push(metricLine('latency_ms', metrics.latency.avg, { quantile: 'avg' })); // Error metrics lines.push(`# HELP mcp_errors_total Total number of errors`); lines.push(`# TYPE mcp_errors_total counter`); lines.push(metricLine('errors_total', metrics.errors.total)); for (const [category, count] of Object.entries(metrics.errors.byCategory)) { lines.push(metricLine('errors_total', count, { category })); } // Auth metrics lines.push(`# HELP mcp_auth_total Authentication attempts`); lines.push(`# TYPE mcp_auth_total counter`); lines.push(metricLine('auth_total', metrics.auth.successes, { result: 'success' })); lines.push(metricLine('auth_total', metrics.auth.failures, { result: 'failure' })); // Database metrics lines.push(`# HELP mcp_db_queries_total Total database queries`); lines.push(`# TYPE mcp_db_queries_total counter`); lines.push(metricLine('db_queries_total', metrics.database.queries)); lines.push(metricLine('db_errors_total', metrics.database.errors)); lines.push(metricLine('db_slow_queries_total', metrics.database.slowQueries)); lines.push(metricLine('db_query_latency_ms', metrics.database.avgQueryTime)); // Session metrics lines.push(`# HELP mcp_sessions_active Active sessions`); lines.push(`# TYPE mcp_sessions_active gauge`); lines.push(metricLine('sessions_active', metrics.sessions.active)); lines.push(metricLine('sessions_created_total', metrics.sessions.created)); lines.push(metricLine('sessions_expired_total', metrics.sessions.expired)); // Rate limit metrics lines.push(`# HELP mcp_rate_limit_blocked_total Rate limit blocks`); lines.push(`# TYPE mcp_rate_limit_blocked_total counter`); lines.push(metricLine('rate_limit_blocked_total', metrics.rateLimit.blocked)); // Cache metrics lines.push(`# HELP mcp_cache_operations_total Cache operations`); lines.push(`# TYPE mcp_cache_operations_total counter`); lines.push(metricLine('cache_operations_total', metrics.cache.hits, { result: 'hit' })); lines.push(metricLine('cache_operations_total', metrics.cache.misses, { result: 'miss' })); lines.push(metricLine('cache_size', metrics.cache.size)); // Tool metrics lines.push(`# HELP mcp_tool_calls_total Tool calls`); lines.push(`# TYPE mcp_tool_calls_total counter`); for (const [tool, count] of Object.entries(metrics.tools.calls)) { lines.push(metricLine('tool_calls_total', count, { tool })); } lines.push(`# HELP mcp_tool_failures_total Tool failures`); lines.push(`# TYPE mcp_tool_failures_total counter`); for (const [tool, count] of Object.entries(metrics.tools.failures)) { lines.push(metricLine('tool_failures_total', count, { tool })); } lines.push(`# HELP mcp_tool_duration_ms Tool execution duration`); lines.push(`# TYPE mcp_tool_duration_ms gauge`); for (const [tool, avg] of Object.entries(metrics.tools.avgExecutionTime)) { lines.push(metricLine('tool_duration_ms', avg, { tool })); } // Process metrics lines.push(`# HELP mcp_process_memory_bytes Process memory usage`); lines.push(`# TYPE mcp_process_memory_bytes gauge`); lines.push(metricLine('process_memory_bytes', process.memoryUsage().rss, { type: 'rss' })); lines.push(metricLine('process_memory_bytes', process.memoryUsage().heapUsed, { type: 'heap_used' })); lines.push(metricLine('process_memory_bytes', process.memoryUsage().heapTotal, { type: 'heap_total' })); lines.push(`# HELP mcp_process_uptime_seconds Process uptime`); lines.push(`# TYPE mcp_process_uptime_seconds gauge`); lines.push(metricLine('process_uptime_seconds', process.uptime())); return lines.join('\n') + '\n'; } /** * Reset all metrics (useful for testing) */ export function resetMetrics() { metrics.requests.total = 0; metrics.requests.byTool = {}; metrics.requests.byStatus = {}; metrics.requests.byMethod = {}; metrics.latency.values = []; metrics.latency.p50 = 0; metrics.latency.p95 = 0; metrics.latency.p99 = 0; metrics.latency.avg = 0; metrics.errors.total = 0; metrics.errors.byCategory = {}; metrics.errors.byCode = {}; metrics.auth.successes = 0; metrics.auth.failures = 0; metrics.auth.byMethod = {}; metrics.database.queries = 0; metrics.database.errors = 0; metrics.database.slowQueries = 0; metrics.database.avgQueryTime = 0; metrics.sessions.active = 0; metrics.sessions.created = 0; metrics.sessions.expired = 0; metrics.sessions.total = 0; metrics.rateLimit.blocked = 0; metrics.rateLimit.byUser = {}; metrics.cache.hits = 0; metrics.cache.misses = 0; metrics.cache.size = 0; metrics.tools.calls = {}; metrics.tools.failures = {}; metrics.tools.avgExecutionTime = {}; latencySamples.length = 0; } /** * Get metrics summary for health endpoint */ export function getMetricsSummary() { return { uptime: process.uptime(), requests: metrics.requests.total, errors: metrics.errors.total, errorRate: metrics.requests.total > 0 ? metrics.errors.total / metrics.requests.total : 0, avgLatency: metrics.latency.avg, sessions: metrics.sessions.active, cacheHitRate: metrics.cache.hits + metrics.cache.misses > 0 ? metrics.cache.hits / (metrics.cache.hits + metrics.cache.misses) : 0, }; } export default metrics;