import { clsx, type ClassValue } from "clsx" import { twMerge } from "tailwind-merge" import { LABEL_COLORS, LabelColorName, QueryType } from "./types" export function cn(...inputs: ClassValue[]) { return twMerge(clsx(inputs)) } export function getHashColor(name: string): LabelColorName { let hash = 0; for (let i = 0; i < name.length; i++) { hash = name.charCodeAt(i) + ((hash << 5) - hash); } const colors = Object.keys(LABEL_COLORS) as LabelColorName[]; // Skip 'gray' for colorful tags const colorfulColors = colors.filter(c => c !== 'gray'); const colorIndex = Math.abs(hash) % colorfulColors.length; return colorfulColors[colorIndex]; } export function cosineSimilarity(vecA: number[], vecB: number[]): number { if (vecA.length !== vecB.length) return 0; let dotProduct = 0; let mA = 0; let mB = 0; for (let i = 0; i < vecA.length; i++) { dotProduct += vecA[i] * vecB[i]; mA += vecA[i] * vecA[i]; mB += vecB[i] * vecB[i]; } mA = Math.sqrt(mA); mB = Math.sqrt(mB); if (mA === 0 || mB === 0) return 0; return dotProduct / (mA * mB); } /** * Validate an embedding vector for quality issues */ export function validateEmbedding(embedding: number[]): { valid: boolean; issues: string[] } { const issues: string[] = []; // Check 1: Dimensionality > 0 if (!embedding || embedding.length === 0) { issues.push('Embedding is empty or has zero dimensionality'); return { valid: false, issues }; } // Check 2: Valid numbers (no NaN or Infinity) let hasNaN = false; let hasInfinity = false; let hasZeroVector = true; for (let i = 0; i < embedding.length; i++) { const val = embedding[i]; if (isNaN(val)) hasNaN = true; if (!isFinite(val)) hasInfinity = true; if (val !== 0) hasZeroVector = false; } if (hasNaN) { issues.push('Embedding contains NaN values'); } if (hasInfinity) { issues.push('Embedding contains Infinity values'); } if (hasZeroVector) { issues.push('Embedding is a zero vector (all values are 0)'); } // Check 3: L2 norm is in reasonable range (0.7 to 1.2) const l2Norm = calculateL2Norm(embedding); if (l2Norm < 0.7 || l2Norm > 1.2) { issues.push(`L2 norm is ${l2Norm.toFixed(3)} (expected range: 0.7-1.2)`); } return { valid: issues.length === 0, issues }; } /** * Calculate L2 norm of a vector */ export function calculateL2Norm(vector: number[]): number { let sum = 0; for (let i = 0; i < vector.length; i++) { sum += vector[i] * vector[i]; } return Math.sqrt(sum); } /** * Normalize an embedding to unit L2 norm */ export function normalizeEmbedding(embedding: number[]): number[] { const norm = calculateL2Norm(embedding); if (norm === 0) return embedding; // Can't normalize zero vector return embedding.map(val => val / norm); } /** * Calculate the RRF (Reciprocal Rank Fusion) constant k * * RRF Formula: score = Σ 1 / (k + rank) * * The k constant controls how much we penalize lower rankings: * - Lower k (e.g., 20) penalizes low ranks more heavily * - Higher k (e.g., 60) is more lenient with low ranks * * Adaptive formula: k = max(20, totalNotes / 10) * - For small datasets (< 200 notes): k = 20 (strict) * - For larger datasets: k scales linearly * * Examples: * - 50 notes → k = 20 * - 200 notes → k = 20 * - 500 notes → k = 50 * - 1000 notes → k = 100 */ export function calculateRRFK(totalNotes: number): number { const BASE_K = 20; const adaptiveK = Math.floor(totalNotes / 10); return Math.max(BASE_K, adaptiveK); } /** * Detect the type of search query to adapt search weights * * Detection rules: * 1. EXACT: Query contains quotes (e.g., "Error 404") * 2. CONCEPTUAL: Query starts with question words or is a phrase like "how to X" * 3. MIXED: No specific pattern detected * * Examples: * - "exact phrase" → 'exact' * - "how to cook pasta" → 'conceptual' * - "what is python" → 'conceptual' * - "javascript tutorial" → 'mixed' */ export function detectQueryType(query: string): QueryType { const trimmed = query.trim().toLowerCase(); // Rule 1: Check for quotes (exact match) if ((query.startsWith('"') && query.endsWith('"')) || (query.startsWith("'") && query.endsWith("'"))) { return 'exact'; } // Rule 2: Check for conceptual patterns const conceptualPatterns = [ /^(how|what|when|where|why|who|which|whose|can|could|would|should|is|are|do|does|did)\b/, /^(how to|ways to|best way to|guide for|tips for|learn about|understand)/, /^(tutorial|guide|introduction|overview|explanation|examples)/, ]; for (const pattern of conceptualPatterns) { if (pattern.test(trimmed)) { return 'conceptual'; } } // Default: mixed search return 'mixed'; } /** * Get search weight multipliers based on query type * * Returns keyword and semantic weight multipliers: * - EXACT: Boost keyword matches (2.0x), reduce semantic (0.7x) * - CONCEPTUAL: Reduce keyword (0.7x), boost semantic (1.5x) * - MIXED: Default weights (1.0x, 1.0x) */ export function getSearchWeights(queryType: QueryType): { keywordWeight: number; semanticWeight: number; } { switch (queryType) { case 'exact': return { keywordWeight: 2.0, semanticWeight: 0.7 }; case 'conceptual': return { keywordWeight: 0.7, semanticWeight: 1.5 }; case 'mixed': default: return { keywordWeight: 1.0, semanticWeight: 1.0 }; } }