Keep/keep-notes/lib/ai/services/auto-label-creation.service.ts
2026-02-15 17:38:16 +01:00

469 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { prisma } from '@/lib/prisma'
import { getAIProvider } from '@/lib/ai/factory'
import { getSystemConfig } from '@/lib/config'
export interface SuggestedLabel {
name: string
count: number
confidence: number
noteIds: string[]
}
export interface AutoLabelSuggestion {
notebookId: string
notebookName: string
notebookIcon: string | null
suggestedLabels: SuggestedLabel[]
totalNotes: number
}
/**
* Service for automatically suggesting new labels based on recurring themes
* (Story 5.4 - IA4)
*/
export class AutoLabelCreationService {
/**
* Analyze a notebook and suggest new labels based on recurring themes
* @param notebookId - Notebook ID to analyze
* @param userId - User ID (for authorization)
* @returns Suggested labels or null if not enough notes/no patterns found
*/
async suggestLabels(notebookId: string, userId: string, language: string = 'en'): Promise<AutoLabelSuggestion | null> {
// 1. Get notebook with existing labels
const notebook = await prisma.notebook.findFirst({
where: {
id: notebookId,
userId,
},
include: {
labels: {
select: {
id: true,
name: true,
},
},
_count: {
select: { notes: true },
},
},
})
if (!notebook) {
throw new Error('Notebook not found')
}
// Only trigger if notebook has 15+ notes (PRD requirement)
if (notebook._count.notes < 15) {
return null
}
// Get all notes in this notebook
const notes = await prisma.note.findMany({
where: {
notebookId,
userId,
},
select: {
id: true,
title: true,
content: true,
labelRelations: {
select: {
name: true,
},
},
},
orderBy: {
updatedAt: 'desc',
},
take: 100, // Limit to 100 most recent notes
})
if (notes.length === 0) {
return null
}
// 2. Use AI to detect recurring themes
const suggestions = await this.detectRecurringThemes(notes, notebook, language)
return suggestions
}
/**
* Use AI to detect recurring themes and suggest labels
*/
private async detectRecurringThemes(
notes: any[],
notebook: any,
language: string
): Promise<AutoLabelSuggestion | null> {
const existingLabelNames = new Set<string>(
notebook.labels.map((l: any) => l.name.toLowerCase())
)
const prompt = this.buildPrompt(notes, existingLabelNames, language)
try {
const config = await getSystemConfig()
const provider = getAIProvider(config)
const response = await provider.generateText(prompt)
// Parse AI response
const suggestions = this.parseAIResponse(response, notes)
if (!suggestions || suggestions.suggestedLabels.length === 0) {
return null
}
return {
notebookId: notebook.id,
notebookName: notebook.name,
notebookIcon: notebook.icon,
suggestedLabels: suggestions.suggestedLabels,
totalNotes: notebook._count.notes,
}
} catch (error) {
console.error('Failed to detect recurring themes:', error)
return null
}
}
/**
* Build prompt for AI (localized)
*/
private buildPrompt(notes: any[], existingLabelNames: Set<string>, language: string = 'en'): string {
const notesSummary = notes
.map((note, index) => {
const title = note.title || 'Sans titre'
const content = note.content.substring(0, 150)
return `[${index}] "${title}": ${content}`
})
.join('\n')
const existingLabels = Array.from(existingLabelNames).join(', ')
const instructions: Record<string, string> = {
fr: `
Tu es un assistant qui détecte les thèmes récurrents dans des notes pour suggérer de nouvelles étiquettes.
CARNET ANALYSÉ :
${notes.length} notes
ÉTIQUETTES EXISTANTES (ne pas suggérer celles-ci) :
${existingLabels || 'Aucune'}
NOTES DU CARNET :
${notesSummary}
TÂCHE :
Analyse les notes et détecte les thèmes récurrents (mots-clés, sujets, lieux, personnes).
Un thème doit apparaître dans au moins 5 notes différentes pour être suggéré.
FORMAT DE RÉPONSE (JSON) :
{
"labels": [
{
"nom": "nom_du_label",
"note_indices": [0, 5, 12, 23, 45],
"confiance": 0.85
}
]
}
RÈGLES :
- Le nom du label doit être court (1-2 mots max)
- Un thème doit apparaître dans 5+ notes pour être suggéré
- La confiance doit être > 0.60
- Ne pas suggérer des étiquettes qui existent déjà
- Priorise les lieux, personnes, catégories claires
- Maximum 5 suggestions
Exemples de bonnes étiquettes :
- "tokyo", "kyoto", "osaka" (lieux)
- "hôtels", "restos", "vols" (catégories)
- "marie", "jean", "équipe" (personnes)
Ta réponse (JSON seulement) :
`.trim(),
en: `
You are an assistant that detects recurring themes in notes to suggest new labels.
ANALYZED NOTEBOOK:
${notes.length} notes
EXISTING LABELS (do not suggest these):
${existingLabels || 'None'}
NOTEBOOK NOTES:
${notesSummary}
TASK:
Analyze the notes and detect recurring themes (keywords, subjects, places, people).
A theme must appear in at least 5 different notes to be suggested.
RESPONSE FORMAT (JSON):
{
"labels": [
{
"nom": "label_name",
"note_indices": [0, 5, 12, 23, 45],
"confiance": 0.85
}
]
}
RULES:
- Label name must be short (max 1-2 words)
- A theme must appear in 5+ notes to be suggested
- Confidence must be > 0.60
- Do not suggest labels that already exist
- Prioritize places, people, clear categories
- Maximum 5 suggestions
Examples of good labels:
- "tokyo", "kyoto", "osaka" (places)
- "hotels", "restaurants", "flights" (categories)
- "mary", "john", "team" (people)
Your response (JSON only):
`.trim(),
fa: `
شما یک دستیار هستید که تم‌های تکرارشونده در یادداشت‌ها را برای پیشنهاد برچسب‌های جدید شناسایی می‌کنید.
دفترچه‌ تحلیل شده:
${notes.length} یادداشت
برچسب‌های موجود (این‌ها را پیشنهاد ندهید):
${existingLabels || 'هیچ'}
یادداشت‌های دفترچه:
${notesSummary}
وظیفه:
یادداشت‌ها را تحلیل کنید و تم‌های تکرارشونده (کلمات کلیدی، موضوعات، مکان‌ها، افراد) را شناسایی کنید.
یک تم باید حداقل در ۵ یادداشت مختلف ظاهر شود تا پیشنهاد داده شود.
فرمت پاسخ (JSON):
{
"labels": [
{
"nom": "نام_برچسب",
"note_indices": [0, 5, 12, 23, 45],
"confiance": 0.85
}
]
}
قوانین:
- نام برچسب باید کوتاه باشد (حداکثر ۱-۲ کلمه)
- یک تم باید در ۵+ یادداشت ظاهر شود تا پیشنهاد داده شود
- اطمینان باید > 0.60 باشد
- برچسب‌هایی که قبلاً وجود دارند را پیشنهاد ندهید
- اولویت با مکان‌ها، افراد، دسته‌بندی‌های واضح است
- حداکثر ۵ پیشنهاد
مثال‌های برچسب خوب:
- "توکیو"، "کیوتو"، "اوزاکا" (مکان‌ها)
- "هتل‌ها"، "رستوران‌ها"، "پروازها" (دسته‌بندی‌ها)
- "مریم"، "علی"، "تیم" (افراد)
پاسخ شما (فقط JSON):
`.trim(),
es: `
Eres un asistente que detecta temas recurrentes en notas para sugerir nuevas etiquetas.
CUADERNO ANALIZADO:
${notes.length} notas
ETIQUETAS EXISTENTES (no sugerir estas):
${existingLabels || 'Ninguna'}
NOTAS DEL CUADERNO:
${notesSummary}
TAREA:
Analiza las notas y detecta temas recurrentes (palabras clave, temas, lugares, personas).
Un tema debe aparecer en al menos 5 notas diferentes para ser sugerido.
FORMATO DE RESPUESTA (JSON):
{
"labels": [
{
"nom": "nombre_etiqueta",
"note_indices": [0, 5, 12, 23, 45],
"confiance": 0.85
}
]
}
REGLAS:
- El nombre de la etiqueta debe ser corto (máx 1-2 palabras)
- Un tema debe aparecer en 5+ notas para ser sugerido
- La confianza debe ser > 0.60
- No sugieras etiquetas que ya existen
- Prioriza lugares, personas, categorías claras
- Máximo 5 sugerencias
Ejemplos de buenas etiquetas:
- "tokio", "kyoto", "osaka" (lugares)
- "hoteles", "restaurantes", "vuelos" (categorías)
- "maría", "juan", "equipo" (personas)
Tu respuesta (solo JSON):
`.trim(),
de: `
Du bist ein Assistent, der wiederkehrende Themen in Notizen erkennt, um neue Labels vorzuschlagen.
ANALYSIERTES NOTIZBUCH:
${notes.length} Notizen
VORHANDENE LABELS (schlage diese nicht vor):
${existingLabels || 'Keine'}
NOTIZBUCH-NOTIZEN:
${notesSummary}
AUFGABE:
Analysiere die Notizen und erkenne wiederkehrende Themen (Schlüsselwörter, Themen, Orte, Personen).
Ein Thema muss in mindestens 5 verschiedenen Notizen erscheinen, um vorgeschlagen zu werden.
ANTWORTFORMAT (JSON):
{
"labels": [
{
"nom": "label_name",
"note_indices": [0, 5, 12, 23, 45],
"confiance": 0.85
}
]
}
REGELN:
- Der Labelname muss kurz sein (max 1-2 Wörter)
- Ein Thema muss in 5+ Notizen erscheinen, um vorgeschlagen zu werden
- Konfidenz muss > 0.60 sein
- Schlage keine Labels vor, die bereits existieren
- Priorisiere Orte, Personen, klare Kategorien
- Maximal 5 Vorschläge
Beispiele für gute Labels:
- "tokio", "kyoto", "osaka" (Orte)
- "hotels", "restaurants", "flüge" (Kategorien)
- "maria", "johannes", "team" (Personen)
Deine Antwort (nur JSON):
`.trim()
}
return instructions[language] || instructions['en'] || instructions['fr']
}
/**
* Parse AI response into suggested labels
*/
private parseAIResponse(response: string, notes: any[]): { suggestedLabels: SuggestedLabel[] } | null {
try {
const jsonMatch = response.match(/\{[\s\S]*\}/)
if (!jsonMatch) {
throw new Error('No JSON found in response')
}
const aiData = JSON.parse(jsonMatch[0])
const suggestedLabels: SuggestedLabel[] = (aiData.labels || [])
.map((label: any) => {
// Filter by confidence threshold
if (label.confiance <= 0.60) return null
// Get note IDs from indices
const noteIds = label.note_indices
.map((idx: number) => notes[idx]?.id)
.filter(Boolean)
// Must have at least 5 notes
if (noteIds.length < 5) return null
return {
name: label.nom,
count: noteIds.length,
confidence: label.confiance,
noteIds,
}
})
.filter(Boolean)
if (suggestedLabels.length === 0) {
return null
}
// Sort by count (descending) and confidence
suggestedLabels.sort((a, b) => {
if (b.count !== a.count) {
return b.count - a.count // More notes first
}
return b.confidence - a.confidence // Then higher confidence
})
// Limit to top 5
return {
suggestedLabels: suggestedLabels.slice(0, 5),
}
} catch (error) {
console.error('Failed to parse AI response:', error)
return null
}
}
/**
* Create suggested labels and assign them to notes
* @param notebookId - Notebook ID
* @param userId - User ID
* @param suggestions - Suggested labels to create
* @param selectedLabels - Labels user selected to create
* @returns Number of labels created
*/
async createLabels(
notebookId: string,
userId: string,
suggestions: AutoLabelSuggestion,
selectedLabels: string[]
): Promise<number> {
let createdCount = 0
for (const suggestedLabel of suggestions.suggestedLabels) {
if (!selectedLabels.includes(suggestedLabel.name)) continue
// Create the label
const label = await prisma.label.create({
data: {
name: suggestedLabel.name,
color: 'gray', // Default color, user can change later
notebookId,
userId,
},
})
// Assign label to all suggested notes (updateMany doesn't support relations)
for (const noteId of suggestedLabel.noteIds) {
await prisma.note.update({
where: { id: noteId },
data: {
labelRelations: {
connect: {
id: label.id,
},
},
},
})
}
createdCount++
}
return createdCount
}
}
// Export singleton instance
export const autoLabelCreationService = new AutoLabelCreationService()