Files
Momento/memento-note/lib/ai/services/document-chunking.service.ts
Antigravity 1fcea6ed7d
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 7s
feat: brainstorm sessions, PDF document Q&A, embedding fixes, and UI improvements
- Add brainstorm feature with collaborative canvas, AI idea generation, live cursors, playback, and export
- Add PDF upload/extraction/ingestion pipeline with pgvector document search (RAG)
- Add document Q&A overlay with streaming chat and PDF preview
- Add note attachments UI with status polling, grid layout, and auto-scroll
- Add task extraction AI tool and agent executor improvements
- Fix NoteEmbedding missing updatedAt column, re-index 66 notes with 1536-dim embeddings
- Fix brainstorm 'Create Note' button: add success toast and redirect to created note
- Fix memory echo notification infinite polling
- Fix chat route to always include document_search tool
- Add brainstorm i18n keys across all 14 locales
- Add socket server for real-time brainstorm collaboration
- Add hierarchical notebook selector and organize notebook dialog improvements
- Add sidebar brainstorm section with session management
- Update prisma schema with brainstorm tables, attachments, and document chunks
2026-05-14 17:43:21 +00:00

84 lines
2.2 KiB
TypeScript

interface ChunkInput {
text: string
pageNumber: number
}
export interface DocumentChunkData {
content: string
chunkIndex: number
pageNumber: number
startChar: number
endChar: number
metadata?: string
}
export class DocumentChunkingService {
private readonly CHUNK_SIZE = 800
private readonly OVERLAP = 200
chunk(pages: ChunkInput[]): DocumentChunkData[] {
const chunks: DocumentChunkData[] = []
let globalIndex = 0
let previousTail = ''
for (const page of pages) {
const text = page.text.trim()
if (!text) continue
const sections = this.splitSections(text)
let buffer = previousTail
let bufferStart = 0
for (const section of sections) {
if (buffer.length + section.length > this.CHUNK_SIZE && buffer.length > 0) {
chunks.push({
content: buffer.trim(),
chunkIndex: globalIndex++,
pageNumber: page.pageNumber,
startChar: bufferStart,
endChar: bufferStart + buffer.length,
})
previousTail = buffer.slice(-this.OVERLAP)
buffer = previousTail + '\n' + section
bufferStart += buffer.length - section.length - previousTail.length
} else {
buffer += (buffer ? '\n\n' : '') + section
}
}
if (buffer.trim()) {
chunks.push({
content: buffer.trim(),
chunkIndex: globalIndex++,
pageNumber: page.pageNumber,
startChar: bufferStart,
endChar: bufferStart + buffer.length,
})
previousTail = buffer.slice(-this.OVERLAP)
}
}
return chunks
}
private splitSections(text: string): string[] {
const lines = text.split('\n')
const sections: string[] = []
let current = ''
for (const line of lines) {
const isHeading = /^(#{1,6}\s|[A-Z][A-Z\s]{5,}$)/.test(line.trim())
if (isHeading && current.trim()) {
sections.push(current.trim())
current = line
} else {
current += (current ? '\n' : '') + line
}
}
if (current.trim()) sections.push(current.trim())
return sections
}
}
export const documentChunkingService = new DocumentChunkingService()