fix: use AI SDK + language detection for label suggestions
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 43s

Same pattern as TitleSuggestionService: getModel() + generateText
with system/user prompts. LanguageDetectionService (tinyld) auto-detects
note content language. Labels now match note language (Persian note → Persian labels).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-27 21:11:55 +02:00
parent c7e654afa6
commit 3b8152c7c0

View File

@@ -8,6 +8,7 @@ import { prisma } from '@/lib/prisma'
import { getTagsProvider } from '@/lib/ai/factory'
import { getSystemConfig } from '@/lib/config'
import { LanguageDetectionService } from './language-detection.service'
import { generateText } from 'ai'
export interface LabelSuggestion {
label: string
@@ -84,84 +85,55 @@ export class ContextualAutoTagService {
private async suggestFromExistingLabels(
noteContent: string,
notebook: any,
language: string
contentLanguage: string
): Promise<LabelSuggestion[]> {
const availableLabels = notebook.labels.map((l: any) => l.name)
// Build prompt with available labels
const prompt = this.buildPrompt(noteContent, notebook.name, availableLabels, language)
const labelList = availableLabels.map((l: string) => `- ${l}`).join('\n')
try {
const config = await getSystemConfig()
const provider = getTagsProvider(config)
const model = provider.getModel()
// Use generateText with JSON response
const response = await provider.generateText(prompt)
console.log('[ContextualAutoTag] AI raw response (existing):', response?.substring(0, 300))
const systemPrompt = `You suggest labels for notes from an existing list.
Rules:
- Only suggest a label if DIRECTLY related to THIS note's content
- Do NOT suggest labels just because they exist
- Maximum 2 suggestions
- Confidence < 0.7 = do not suggest
- If NO label is relevant, return: {"suggestions":[]}
Output JSON only: {"suggestions":[{"label":"name","confidence":0.85,"reasoning":"why"}]}`
// Improved JSON parsing with multiple fallback strategies
let parsed: any
const userPrompt = `Note (language: ${contentLanguage}):
${noteContent.substring(0, 1000)}
// Strategy 1: Direct parse
try {
parsed = JSON.parse(response)
} catch (e) {
// Strategy 2: Extract JSON from markdown code blocks
const codeBlockMatch = response.match(/```(?:json)?\s*(\{[\s\S]*?\}|\[[\s\S]*?\])\s*```/)
if (codeBlockMatch) {
parsed = JSON.parse(codeBlockMatch[1])
} else {
// Strategy 3: Extract JSON object or array
const jsonArrayMatch = response.match(/\[[\s\S]*\]/)
const jsonObjectMatch = response.match(/\{[\s\S]*\}/)
Available labels:
${labelList}
if (jsonArrayMatch) {
let cleanedJson = jsonArrayMatch[0]
cleanedJson = cleanedJson.replace(/,\s*([}\]])/g, '$1')
cleanedJson = cleanedJson.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":')
parsed = JSON.parse(cleanedJson)
} else if (jsonObjectMatch) {
let cleanedJson = jsonObjectMatch[0]
cleanedJson = cleanedJson.replace(/,\s*([}\]])/g, '$1')
cleanedJson = cleanedJson.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":')
parsed = JSON.parse(cleanedJson)
} else {
console.error('[ContextualAutoTag] Could not extract JSON from response')
return []
}
}
}
Suggest relevant labels from the list above.`
// Handle both formats: array directly OR {suggestions: array}
let suggestionsArray = parsed
if (parsed.suggestions && Array.isArray(parsed.suggestions)) {
suggestionsArray = parsed.suggestions
} else if (Array.isArray(parsed)) {
suggestionsArray = parsed
} else {
console.error('[ContextualAutoTag] Invalid response structure:', parsed)
return []
}
const { text } = await generateText({ model, system: systemPrompt, prompt: userPrompt, temperature: 0.3 })
console.log('[ContextualAutoTag] AI response (existing):', text?.substring(0, 300))
const parsed = this.parseAIResponse(text)
if (!parsed) return []
// Filter and map suggestions (case-insensitive, strict threshold)
const lowerAvailable = availableLabels.map((l: string) => l.toLowerCase())
const suggestions = suggestionsArray
.filter((s: any) => {
return s.label && lowerAvailable.includes(s.label.toLowerCase()) && (s.confidence || 0) > 0.5
})
const suggestions = parsed
.filter((s: any) => s.label && lowerAvailable.includes(s.label.toLowerCase()) && (s.confidence || 0) > 0.5)
.map((s: any) => {
const originalLabel = availableLabels.find((l: string) => l.toLowerCase() === s.label.toLowerCase()) || s.label
return {
label: originalLabel,
confidence: Math.round(s.confidence * 100),
reasoning: s.reasoning || '',
isNewLabel: false,
confidence: Math.round(s.confidence * 100),
reasoning: s.reasoning || '',
isNewLabel: false,
}
})
.sort((a: any, b: any) => b.confidence - a.confidence)
.slice(0, 3)
console.log('[ContextualAutoTag] filtered existing suggestions:', suggestions.length, suggestions.map((s: any) => `${s.label}(${s.confidence})`))
console.log('[ContextualAutoTag] filtered existing:', suggestions.map((s: any) => `${s.label}(${s.confidence})`))
return suggestions as LabelSuggestion[]
} catch (error) {
console.error('Failed to suggest labels:', error)
@@ -175,134 +147,81 @@ export class ContextualAutoTagService {
private async suggestNewLabels(
noteContent: string,
notebook: any,
language: string
contentLanguage: string
): Promise<LabelSuggestion[]> {
// Build prompt to suggest NEW labels based on content
const prompt = this.buildNewLabelsPrompt(noteContent, notebook.name, language)
try {
const config = await getSystemConfig()
const provider = getTagsProvider(config)
const model = provider.getModel()
// Use generateText with JSON response
const response = await provider.generateText(prompt)
console.log('[ContextualAutoTag] AI raw response (new):', response?.substring(0, 300))
const systemPrompt = `You suggest labels to organize notes.
Rules:
- Suggest 1-3 short labels (max 1-2 words)
- Labels MUST be in the SAME LANGUAGE as the note content
- Labels must be lowercase
- Maximum 3 suggestions, confidence < 0.6 = do not suggest
Output JSON only: {"suggestions":[{"label":"name","confidence":0.85,"reasoning":"why"}]}`
// Improved JSON parsing with multiple fallback strategies
let parsed: any
const userPrompt = `Note (language: ${contentLanguage}):
${noteContent.substring(0, 1000)}
// Strategy 1: Direct parse
try {
parsed = JSON.parse(response)
} catch (e) {
// Strategy 2: Extract JSON from markdown code blocks
const codeBlockMatch = response.match(/```(?:json)?\s*(\{[\s\S]*?\}|\[[\s\S]*?\])\s*```/)
if (codeBlockMatch) {
parsed = JSON.parse(codeBlockMatch[1])
} else {
// Strategy 3: Extract JSON object or array
const jsonArrayMatch = response.match(/\[[\s\S]*\]/)
const jsonObjectMatch = response.match(/\{[\s\S]*\}/)
Notebook: ${notebook.name}
if (jsonArrayMatch) {
let cleanedJson = jsonArrayMatch[0]
cleanedJson = cleanedJson.replace(/,\s*([}\]])/g, '$1')
cleanedJson = cleanedJson.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":')
parsed = JSON.parse(cleanedJson)
} else if (jsonObjectMatch) {
let cleanedJson = jsonObjectMatch[0]
cleanedJson = cleanedJson.replace(/,\s*([}\]])/g, '$1')
cleanedJson = cleanedJson.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":')
parsed = JSON.parse(cleanedJson)
} else {
console.error('[ContextualAutoTag] Could not extract JSON from new-labels response')
return []
}
}
}
Note language detected: ${contentLanguage}
Respond with labels in ${contentLanguage} (same language as the note).`
// Handle both formats: array directly OR {suggestions: array}
let suggestionsArray = parsed
if (parsed.suggestions && Array.isArray(parsed.suggestions)) {
suggestionsArray = parsed.suggestions
} else if (Array.isArray(parsed)) {
suggestionsArray = parsed
} else {
console.error('[ContextualAutoTag] Invalid new-labels response structure:', parsed)
return []
}
const { text } = await generateText({ model, system: systemPrompt, prompt: userPrompt, temperature: 0.3 })
console.log('[ContextualAutoTag] AI response (new):', text?.substring(0, 300))
// Filter and map suggestions
const suggestions = suggestionsArray
.filter((s: any) => {
return s.label && s.label.length > 0 && (s.confidence || 0) > 0.3
})
const parsed = this.parseAIResponse(text)
if (!parsed) return []
const suggestions = parsed
.filter((s: any) => s.label && s.label.length > 0 && (s.confidence || 0) > 0.3)
.map((s: any) => ({
label: s.label,
confidence: Math.round(s.confidence * 100),
reasoning: s.reasoning || '',
isNewLabel: true, // Mark as new label suggestion
isNewLabel: true,
}))
.sort((a: any, b: any) => b.confidence - a.confidence)
.slice(0, 5)
console.log('[ContextualAutoTag] new label suggestions:', suggestions.length, suggestions.map((s: any) => `${s.label}(${s.confidence})`))
console.log('[ContextualAutoTag] new labels:', suggestions.map((s: any) => `${s.label}(${s.confidence})`))
return suggestions as LabelSuggestion[]
} catch (error) {
console.error('Failed to suggest new labels:', error)
console.error('Failed to suggest new labels:', error)
return []
}
}
/**
* Build the AI prompt for contextual label suggestion (localized)
*/
private buildPrompt(noteContent: string, notebookName: string, availableLabels: string[], contentLanguage: string): string {
const labelList = availableLabels.map(l => `- ${l}`).join('\n')
/** Parse AI JSON response with fallback strategies */
private parseAIResponse(response: string): any[] | null {
try {
let parsed: any
return `NOTE CONTENT (language: ${contentLanguage}):
${noteContent.substring(0, 1000)}
try {
parsed = JSON.parse(response)
} catch {
const codeBlockMatch = response.match(/```(?:json)?\s*(\{[\s\S]*?\}|\[[\s\S]*?\])\s*```/)
if (codeBlockMatch) {
parsed = JSON.parse(codeBlockMatch[1])
} else {
const jsonMatch = response.match(/\{[\s\S]*\}/) || response.match(/\[[\s\S]*\]/)
if (!jsonMatch) return null
let cleaned = jsonMatch[0]
cleaned = cleaned.replace(/,\s*([}\]])/g, '$1')
cleaned = cleaned.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:/g, '$1"$2":')
parsed = JSON.parse(cleaned)
}
}
NOTEBOOK: ${notebookName}
AVAILABLE LABELS:
${labelList}
RULES:
- Only suggest a label if it is DIRECTLY related to the content of THIS specific note
- Do NOT suggest a label just because it exists in the notebook
- If the note content language (${contentLanguage}) differs from the available labels, only suggest a label if it clearly applies regardless of language
- Maximum 2 suggestions
- Confidence < 0.7 = do not suggest
- If NO label is clearly relevant, return an EMPTY array
Respond in JSON:
{"suggestions":[{"label":"name","confidence":0.85,"reasoning":"why"}]}
Your response:`
}
/**
* Build the AI prompt for NEW label suggestions (when notebook is empty) (localized)
*/
private buildNewLabelsPrompt(noteContent: string, notebookName: string, contentLanguage: string): string {
return `NOTE CONTENT (language: ${contentLanguage}):
${noteContent.substring(0, 1000)}
NOTEBOOK: ${notebookName}
RULES:
- Suggest 1-3 labels to organize this note
- Labels must be SHORT (max 1-2 words)
- Labels must be in the SAME LANGUAGE as the note content (${contentLanguage})
- Labels must be lowercase
- Maximum 3 suggestions
- Confidence < 0.6 = do not suggest
IMPORTANT: Respond ONLY with valid JSON, no markdown, no code blocks.
{"suggestions":[{"label":"label_name","confidence":0.85,"reasoning":"why"}]}
Your response:`
if (parsed.suggestions && Array.isArray(parsed.suggestions)) return parsed.suggestions
if (Array.isArray(parsed)) return parsed
return null
} catch {
return null
}
}
}