Files
Momento/memento-note/lib/blocks/extract-blocks.ts
Antigravity f46654f574 feat: editor improvements and architectural grid prototype
Multiple feature additions and improvements across the application:

- NextGen Editor: drag handles, smart paste, block actions
- Structured views: Kanban and table layouts for notes
- Architectural Grid: new brainstorming/agent interface prototype
- Flashcards: SM-2 revision algorithm with AI generation
- MCP server: robustness improvements
- Graph/PDF chat: fix click propagation and copy behavior
- Various UI/UX enhancements and bug fixes

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 19:45:15 +00:00

85 lines
2.8 KiB
TypeScript

import { stripHtmlToPlainText, tokenizeForSimilarity } from '@/lib/text/plain-text'
export interface ExtractedBlock {
blockId: string
content: string
}
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
}
/** Contenu texte d'un bloc identifié par data-id (sans filtre longueur min). */
export function extractBlockContentById(html: string, blockId: string): string | null {
if (!html || !blockId) return null
const regex = new RegExp(
`<(?:p|h[1-6]|blockquote|li)[^>]*data-id="${escapeRegExp(blockId)}"[^>]*>([\\s\\S]*?)<\\/(?:p|h[1-6]|blockquote|li)>`,
'i',
)
const match = regex.exec(html)
if (!match) return null
const content = stripHtmlToPlainText(match[1])
return content.length > 0 ? content : null
}
export function extractBlocksFromHtml(html: string): ExtractedBlock[] {
const blocks: ExtractedBlock[] = []
const regex = /<(?:p|h[1-6]|blockquote|li)[^>]*data-id="([^"]+)"[^>]*>([\s\S]*?)<\/(?:p|h[1-6]|blockquote|li)>/gi
let match
while ((match = regex.exec(html)) !== null) {
const blockId = match[1]
const content = stripHtmlToPlainText(match[2])
if (content.length >= 10) {
blocks.push({ blockId, content })
}
}
return blocks
}
export function jaccardSimilarity(a: string, b: string): number {
const A = tokenizeForSimilarity(a)
const B = tokenizeForSimilarity(b)
if (A.size === 0 || B.size === 0) return 0
let intersection = 0
A.forEach(w => { if (B.has(w)) intersection++ })
return intersection / (A.size + B.size - intersection)
}
function extractPlainBlocksFromHtml(html: string): ExtractedBlock[] {
const blocks: ExtractedBlock[] = []
const regex = /<(?:p|h[1-6]|blockquote|li|td|th|div)[^>]*>([\s\S]*?)<\/(?:p|h[1-6]|blockquote|li|td|th|div)>/gi
let match
while ((match = regex.exec(html)) !== null) {
const content = stripHtmlToPlainText(match[1])
if (content.length >= 10) {
blocks.push({ blockId: '', content })
}
}
return blocks
}
function pickBestFromBlocks(blocks: ExtractedBlock[], hint: string): ExtractedBlock | null {
if (blocks.length === 0) return null
if (!hint.trim()) return blocks[0]
let best = blocks[0]
let bestScore = jaccardSimilarity(hint, best.content)
for (const block of blocks.slice(1)) {
const score = jaccardSimilarity(hint, block.content)
if (score > bestScore) {
best = block
bestScore = score
}
}
return best
}
export function pickBestBlockForHint(html: string, hint: string): ExtractedBlock | null {
return pickBestFromBlocks(extractBlocksFromHtml(html), hint)
}
/** Fallback when notes have no data-id yet (citation statique, pas de bloc vivant). */
export function pickBestPlainPassageForHint(html: string, hint: string): ExtractedBlock | null {
return pickBestFromBlocks(extractPlainBlocksFromHtml(html), hint)
}