Files
Momento/memento-note/lib/ai/services/document-extraction.service.ts
Antigravity 1fcea6ed7d
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 7s
feat: brainstorm sessions, PDF document Q&A, embedding fixes, and UI improvements
- Add brainstorm feature with collaborative canvas, AI idea generation, live cursors, playback, and export
- Add PDF upload/extraction/ingestion pipeline with pgvector document search (RAG)
- Add document Q&A overlay with streaming chat and PDF preview
- Add note attachments UI with status polling, grid layout, and auto-scroll
- Add task extraction AI tool and agent executor improvements
- Fix NoteEmbedding missing updatedAt column, re-index 66 notes with 1536-dim embeddings
- Fix brainstorm 'Create Note' button: add success toast and redirect to created note
- Fix memory echo notification infinite polling
- Fix chat route to always include document_search tool
- Add brainstorm i18n keys across all 14 locales
- Add socket server for real-time brainstorm collaboration
- Add hierarchical notebook selector and organize notebook dialog improvements
- Add sidebar brainstorm section with session management
- Update prisma schema with brainstorm tables, attachments, and document chunks
2026-05-14 17:43:21 +00:00

57 lines
1.4 KiB
TypeScript

import fs from 'fs'
import path from 'path'
import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs'
if (typeof pdfjsLib.GlobalWorkerOptions !== 'undefined') {
pdfjsLib.GlobalWorkerOptions.workerSrc = path.join(
process.cwd(),
'node_modules/pdfjs-dist/legacy/build/pdf.worker.mjs'
)
}
interface ExtractedPage {
pageNumber: number
text: string
}
export interface ExtractedDocument {
pages: ExtractedPage[]
totalPages: number
metadata: { title?: string; author?: string }
}
export class DocumentExtractionService {
async extractPdf(filePath: string): Promise<ExtractedDocument> {
const dataBuffer = fs.readFileSync(filePath)
const doc = await pdfjsLib.getDocument({
data: new Uint8Array(dataBuffer),
useSystemFonts: true,
useWorkerFetch: false,
isEvalSupported: false,
}).promise
const pages: ExtractedPage[] = []
for (let i = 1; i <= doc.numPages; i++) {
const page = await doc.getPage(i)
const content = await page.getTextContent()
const text = content.items
.map((item: any) => item.str)
.join(' ')
pages.push({ pageNumber: i, text })
}
const metadata = await doc.getMetadata().catch(() => null) as any
return {
pages,
totalPages: doc.numPages,
metadata: {
title: metadata?.info?.Title,
author: metadata?.info?.Author,
},
}
}
}
export const documentExtractionService = new DocumentExtractionService()