/** * Central synchronous AI gateway routing (Story 3.2 — FR17 / NFR-P3). * * Future (Story 3.5 BYOK): plug user-scoped API keys into resolveAiRoute output / factory instantiation. * * Non-goals here (by design): * - Multi-provider HTTP fallback on 429/500 → Story 3.3 * - BYOK / UserAPIKey decryption → Story 3.5 (extension seam: same resolve output + key source later) */ export type AiFeatureLane = 'chat' | 'tags' | 'embedding' export type AiGatewayProvider = | 'ollama' | 'openai' | 'google' | 'minimax' | 'glm' | 'custom' | 'deepseek' | 'openrouter' | 'mistral' | 'zai' | 'lmstudio' | 'anthropic' | 'anthropic_custom' export interface ResolvedAiRoute { lane: AiFeatureLane providerType: AiGatewayProvider modelName: string embeddingModelName: string ollamaBaseUrl?: string meta: { resolveMs?: number } } export const VALID_PROVIDERS = new Set([ 'ollama', 'openai', 'google', 'minimax', 'glm', 'custom', 'deepseek', 'openrouter', 'mistral', 'zai', 'lmstudio', 'anthropic', 'anthropic_custom', ]) const PROVIDER_MODEL_DEFAULTS: Record = { ollama: { model: 'granite4:latest', embeddingModel: 'embeddinggemma:latest' }, openai: { model: 'gpt-4o-mini', embeddingModel: 'text-embedding-3-small' }, anthropic: { model: 'claude-sonnet-4-6-20250514', embeddingModel: '' }, anthropic_custom: { model: 'claude-sonnet-4-6-20250514', embeddingModel: '' }, deepseek: { model: 'deepseek-chat', embeddingModel: '' }, openrouter: { model: 'openai/gpt-4o-mini', embeddingModel: 'openai/text-embedding-3-small' }, google: { model: 'gemini-1.5-flash', embeddingModel: 'text-embedding-004' }, mistral: { model: 'mistral-small-latest', embeddingModel: 'mistral-embed' }, zai: { model: 'gpt-4o-mini', embeddingModel: 'text-embedding-3-small' }, minimax: { model: 'abab6.5-chat', embeddingModel: '' }, glm: { model: 'glm-4', embeddingModel: 'embedding-2' }, lmstudio: { model: '', embeddingModel: '' }, custom: { model: '', embeddingModel: '' }, } function pick(config: Record, key: string): string | undefined { const v = config[key] if (v != null && v !== '') return v const e = process.env[key] return e != null && e !== '' ? e : undefined } function cfgOnly(config: Record, key: string): string | undefined { const v = config[key] return v != null && v !== '' ? v : undefined } const VALID_PROVIDER_LIST = [...VALID_PROVIDERS].join(', ') export function resolveAiRoute(lane: AiFeatureLane, config: Record): ResolvedAiRoute { let providerRaw: string | undefined let modelKey: string let ollamaBaseUrl: string | undefined if (lane === 'tags') { providerRaw = pick(config, 'AI_PROVIDER_TAGS') || pick(config, 'AI_PROVIDER_EMBEDDING') || pick(config, 'AI_PROVIDER') modelKey = 'AI_MODEL_TAGS' ollamaBaseUrl = cfgOnly(config, 'OLLAMA_BASE_URL_TAGS') || cfgOnly(config, 'OLLAMA_BASE_URL') if (!providerRaw) { throw new Error( 'AI_PROVIDER_TAGS is not configured. Please set it in the admin settings or environment variables. ' + 'Options: ' + VALID_PROVIDER_LIST ) } } else if (lane === 'embedding') { providerRaw = pick(config, 'AI_PROVIDER_EMBEDDING') || pick(config, 'AI_PROVIDER_TAGS') || pick(config, 'AI_PROVIDER') modelKey = 'AI_MODEL_EMBEDDING' ollamaBaseUrl = cfgOnly(config, 'OLLAMA_BASE_URL_EMBEDDING') || cfgOnly(config, 'OLLAMA_BASE_URL') if (!providerRaw) { throw new Error( 'AI_PROVIDER_EMBEDDING is not configured. Please set it in the admin settings or environment variables. ' + 'Options: ' + VALID_PROVIDER_LIST ) } } else { providerRaw = pick(config, 'AI_PROVIDER_CHAT') || pick(config, 'AI_PROVIDER_TAGS') || pick(config, 'AI_PROVIDER_EMBEDDING') || pick(config, 'AI_PROVIDER') modelKey = 'AI_MODEL_CHAT' ollamaBaseUrl = cfgOnly(config, 'OLLAMA_BASE_URL_CHAT') || cfgOnly(config, 'OLLAMA_BASE_URL_TAGS') || cfgOnly(config, 'OLLAMA_BASE_URL_EMBEDDING') || cfgOnly(config, 'OLLAMA_BASE_URL') if (!providerRaw) { throw new Error( 'AI_PROVIDER_CHAT is not configured. Please set it in the admin settings or environment variables. ' + 'Options: ' + VALID_PROVIDER_LIST ) } } const providerType = providerRaw.toLowerCase() if (!VALID_PROVIDERS.has(providerType)) { throw new Error( `Unknown AI provider '${providerRaw}'. Valid options: ${VALID_PROVIDER_LIST}` ) } if (lane === 'embedding' && (providerType === 'anthropic' || providerType === 'anthropic_custom')) { throw new Error( 'AI_PROVIDER_EMBEDDING cannot use "anthropic" or "anthropic_custom": these gateways use the Anthropic Messages API only (no embeddings in Memento). Use ollama, openai, or "custom" with MiniMax OpenAI URL https://api.minimax.io/v1 for embeddings.' ) } const defaults = PROVIDER_MODEL_DEFAULTS[providerType] || { model: '', embeddingModel: '' } const modelName = pick(config, modelKey) || defaults.model const embeddingModelName = pick(config, 'AI_MODEL_EMBEDDING') || defaults.embeddingModel return { lane, providerType: providerType as AiGatewayProvider, modelName, embeddingModelName, ollamaBaseUrl, meta: {}, } } export function resolveAiRouteWithTiming(lane: AiFeatureLane, config: Record): ResolvedAiRoute { const t0 = performance.now() const route = resolveAiRoute(lane, config) const resolveMs = performance.now() - t0 return { ...route, meta: { ...route.meta, resolveMs }, } } export function formatAiRouteDebug(route: ResolvedAiRoute): string { return JSON.stringify({ lane: route.lane, providerType: route.providerType, modelId: route.modelName, embeddingModelId: route.embeddingModelName, resolveMs: route.meta.resolveMs, }) }