All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 7s
- Add brainstorm feature with collaborative canvas, AI idea generation, live cursors, playback, and export - Add PDF upload/extraction/ingestion pipeline with pgvector document search (RAG) - Add document Q&A overlay with streaming chat and PDF preview - Add note attachments UI with status polling, grid layout, and auto-scroll - Add task extraction AI tool and agent executor improvements - Fix NoteEmbedding missing updatedAt column, re-index 66 notes with 1536-dim embeddings - Fix brainstorm 'Create Note' button: add success toast and redirect to created note - Fix memory echo notification infinite polling - Fix chat route to always include document_search tool - Add brainstorm i18n keys across all 14 locales - Add socket server for real-time brainstorm collaboration - Add hierarchical notebook selector and organize notebook dialog improvements - Add sidebar brainstorm section with session management - Update prisma schema with brainstorm tables, attachments, and document chunks
57 lines
1.4 KiB
TypeScript
57 lines
1.4 KiB
TypeScript
import fs from 'fs'
|
|
import path from 'path'
|
|
import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs'
|
|
|
|
if (typeof pdfjsLib.GlobalWorkerOptions !== 'undefined') {
|
|
pdfjsLib.GlobalWorkerOptions.workerSrc = path.join(
|
|
process.cwd(),
|
|
'node_modules/pdfjs-dist/legacy/build/pdf.worker.mjs'
|
|
)
|
|
}
|
|
|
|
interface ExtractedPage {
|
|
pageNumber: number
|
|
text: string
|
|
}
|
|
|
|
export interface ExtractedDocument {
|
|
pages: ExtractedPage[]
|
|
totalPages: number
|
|
metadata: { title?: string; author?: string }
|
|
}
|
|
|
|
export class DocumentExtractionService {
|
|
async extractPdf(filePath: string): Promise<ExtractedDocument> {
|
|
const dataBuffer = fs.readFileSync(filePath)
|
|
const doc = await pdfjsLib.getDocument({
|
|
data: new Uint8Array(dataBuffer),
|
|
useSystemFonts: true,
|
|
useWorkerFetch: false,
|
|
isEvalSupported: false,
|
|
}).promise
|
|
|
|
const pages: ExtractedPage[] = []
|
|
for (let i = 1; i <= doc.numPages; i++) {
|
|
const page = await doc.getPage(i)
|
|
const content = await page.getTextContent()
|
|
const text = content.items
|
|
.map((item: any) => item.str)
|
|
.join(' ')
|
|
pages.push({ pageNumber: i, text })
|
|
}
|
|
|
|
const metadata = await doc.getMetadata().catch(() => null) as any
|
|
|
|
return {
|
|
pages,
|
|
totalPages: doc.numPages,
|
|
metadata: {
|
|
title: metadata?.info?.Title,
|
|
author: metadata?.info?.Author,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
export const documentExtractionService = new DocumentExtractionService()
|