/** * Provider failover on retriable upstream errors (Story 3.3 — FR18 / NFR-R1). * * Story 3.5 BYOK: when user BYOK is active, call with skipSystemFallback: true. */ import { APICallError } from 'ai' import { QuotaExceededError } from '@/lib/entitlements' import { getProviderInstance, type ProviderType } from './factory' import { resolveAiRoute, VALID_PROVIDERS, type AiFeatureLane, type AiGatewayProvider, type ResolvedAiRoute, } from './router' import type { AIProvider } from './types' export const FALLBACK_BUDGET_MS = 1500 const VALID_PROVIDER_LIST = [...VALID_PROVIDERS].join(', ') const LANE_FALLBACK_KEYS: Record< AiFeatureLane, { provider: string; model: string; ollamaUrl?: string } > = { chat: { provider: 'AI_PROVIDER_CHAT_FALLBACK', model: 'AI_MODEL_CHAT_FALLBACK', ollamaUrl: 'OLLAMA_BASE_URL_CHAT', }, tags: { provider: 'AI_PROVIDER_TAGS_FALLBACK', model: 'AI_MODEL_TAGS_FALLBACK', ollamaUrl: 'OLLAMA_BASE_URL_TAGS', }, embedding: { provider: 'AI_PROVIDER_EMBEDDING_FALLBACK', model: 'AI_MODEL_EMBEDDING_FALLBACK', ollamaUrl: 'OLLAMA_BASE_URL_EMBEDDING', }, } function pick(config: Record, key: string): string | undefined { const v = config[key] if (v != null && v !== '') return v const e = process.env[key] return e != null && e !== '' ? e : undefined } function cfgOnly(config: Record, key: string): string | undefined { const v = config[key] return v != null && v !== '' ? v : undefined } function extractProviderErrorStatusDepth(err: unknown, depth: number): number | undefined { if (depth > 5) return undefined if (err instanceof QuotaExceededError) return 402 if (APICallError.isInstance(err)) { return err.statusCode ?? undefined } if (err && typeof err === 'object') { const o = err as Record if (typeof o.statusCode === 'number') return o.statusCode if (typeof o.status === 'number') return o.status if (o.cause) return extractProviderErrorStatusDepth(o.cause, depth + 1) } return undefined } export function extractProviderErrorStatus(err: unknown): number | undefined { return extractProviderErrorStatusDepth(err, 0) } /** True for HTTP 429 and 5xx provider failures; false for quota and other 4xx. */ export function isRetriableProviderError(err: unknown): boolean { if (err instanceof QuotaExceededError) return false if (err && typeof err === 'object') { const code = (err as { code?: string }).code if (code === 'QUOTA_EXCEEDED') return false } const status = extractProviderErrorStatus(err) if (status === undefined) return false if (status === 429) return true if (status >= 500 && status < 600) return true return false } /** * Resolve secondary route from *_FALLBACK keys only (primary keys untouched). */ export function resolveAiFallbackRoute( lane: AiFeatureLane, config: Record ): ResolvedAiRoute | null { const keys = LANE_FALLBACK_KEYS[lane] const providerRaw = pick(config, keys.provider)?.trim() if (!providerRaw) return null const providerType = providerRaw.toLowerCase() if (!VALID_PROVIDERS.has(providerType)) { throw new Error( `Unknown fallback provider '${providerRaw}'. Valid options: ${VALID_PROVIDER_LIST}` ) } if (lane === 'embedding' && (providerType === 'anthropic' || providerType === 'anthropic_custom')) { throw new Error( 'AI_PROVIDER_EMBEDDING_FALLBACK cannot use "anthropic" or "anthropic_custom": no embeddings on this gateway.' ) } const primary = resolveAiRoute(lane, config) if (providerType === primary.providerType) return null const modelName = pick(config, keys.model) ?? (lane === 'chat' ? pick(config, 'AI_MODEL_CHAT') : lane === 'tags' ? pick(config, 'AI_MODEL_TAGS') : pick(config, 'AI_MODEL_EMBEDDING')) ?? primary.modelName const embeddingModelName = pick(config, 'AI_MODEL_EMBEDDING_FALLBACK') ?? pick(config, 'AI_MODEL_EMBEDDING') ?? primary.embeddingModelName const ollamaBaseUrl = cfgOnly(config, keys.ollamaUrl!) || cfgOnly(config, 'OLLAMA_BASE_URL') return { lane, providerType: providerType as AiGatewayProvider, modelName, embeddingModelName, ollamaBaseUrl, meta: {}, } } function getProviderForRoute(config: Record, route: ResolvedAiRoute): AIProvider { return getProviderInstance( route.providerType as ProviderType, config, route.modelName, route.embeddingModelName, route.ollamaBaseUrl ) } function getPrimaryProvider(lane: AiFeatureLane, config: Record): { provider: AIProvider route: ResolvedAiRoute } { const route = resolveAiRoute(lane, config) return { route, provider: getProviderForRoute(config, route) } } function getSecondaryProvider(lane: AiFeatureLane, config: Record): { provider: AIProvider route: ResolvedAiRoute } | null { try { const fbRoute = resolveAiFallbackRoute(lane, config) if (!fbRoute) return null return { route: fbRoute, provider: getProviderForRoute(config, fbRoute) } } catch { return null } } function logFallbackSuccess(meta: { lane: AiFeatureLane primaryProvider: string secondaryProvider: string primaryStatus?: number fallbackMs: number }): void { if (process.env.NODE_ENV !== 'production' || process.env.MEMENTO_AI_ROUTE_DEBUG === '1') { console.debug('[ai-fallback]', JSON.stringify(meta)) if (meta.fallbackMs > FALLBACK_BUDGET_MS) { console.warn( `[ai-fallback] NFR-R1 budget exceeded: ${meta.fallbackMs.toFixed(1)}ms > ${FALLBACK_BUDGET_MS}ms` ) } } } export interface WithAiProviderFallbackOptions { /** Story 3.5: skip system secondary when user BYOK is active */ skipSystemFallback?: boolean } /** * Run an AI operation on the primary provider; on retriable failure, try secondary once. */ export async function withAiProviderFallback( lane: AiFeatureLane, config: Record, run: (provider: AIProvider) => Promise, options?: WithAiProviderFallbackOptions ): Promise { if (options?.skipSystemFallback) { const primary = getPrimaryProvider(lane, config) return run(primary.provider) } const primary = getPrimaryProvider(lane, config) try { return await run(primary.provider) } catch (err) { if (!isRetriableProviderError(err)) throw err const fallbackStart = performance.now() const secondary = getSecondaryProvider(lane, config) if (!secondary) throw err const primaryStatus = extractProviderErrorStatus(err) try { const result = await run(secondary.provider) logFallbackSuccess({ lane, primaryProvider: primary.route.providerType, secondaryProvider: secondary.route.providerType, primaryStatus, fallbackMs: performance.now() - fallbackStart, }) return result } catch (secondaryErr) { console.error( `[ai-fallback] secondary also failed for lane '${lane}':`, secondaryErr instanceof Error ? secondaryErr.message : secondaryErr ) throw secondaryErr } } }