/** * markdown-export.ts * Utilities for TipTap HTML ↔ Markdown conversion. * * Uses: * - turndown (+ turndown-plugin-gfm) : HTML → Markdown * - marked : Markdown → HTML */ import TurndownService from 'turndown' import { tables, taskListItems, strikethrough } from 'turndown-plugin-gfm' import { marked } from 'marked' // ── Markdown heuristic detection ──────────────────────────────────────────── const MARKDOWN_PATTERNS = [ /^#{1,6}\s/m, // headings /^\s*[-*+]\s/m, // unordered list /^\s*\d+\.\s/m, // ordered list /^\s*>\s/m, // blockquote /^```/m, // code fence /`[^`]+`/, // inline code /\*\*[^*]+\*\*/, // bold /\*[^*]+\*/, // italic /^[|].+[|]/m, // table /\[.+\]\(.+\)/, // link /!\[.+\]\(.+\)/, // image /~~[^~]+~~/, // strikethrough ] /** * Returns true if the given plain text looks like it contains Markdown syntax. * Used by the paste handler to decide whether to convert before inserting. */ export function looksLikeMarkdown(text: string): boolean { if (!text || text.trim().length < 3) return false return MARKDOWN_PATTERNS.some((re) => re.test(text)) } // ── Turndown service factory ───────────────────────────────────────────────── function createTurndownService(): TurndownService { const td = new TurndownService({ headingStyle: 'atx', hr: '---', bulletListMarker: '-', codeBlockStyle: 'fenced', fence: '```', emDelimiter: '_', strongDelimiter: '**', linkStyle: 'inlined', linkReferenceStyle: 'full', }) // GFM plugins: tables + task lists + strikethrough td.use([tables, taskListItems, strikethrough]) // Custom rule: liveBlock → HTML comment td.addRule('liveBlock', { filter(node) { return ( node.nodeName === 'DIV' && (node as HTMLElement).hasAttribute('data-live-block') ) }, replacement(_content, node) { const el = node as HTMLElement const sourceNoteId = el.getAttribute('sourcenoteId') || el.getAttribute('sourcenoteId') || el.getAttribute('sourcenoteid') || '' const blockId = el.getAttribute('blockId') || el.getAttribute('blockid') || '' return `\n\n\n\n` }, }) // Custom rule: structuredViewBlock → HTML comment td.addRule('structuredViewBlock', { filter(node) { return ( node.nodeName === 'DIV' && (node as HTMLElement).hasAttribute('data-structured-view-block') ) }, replacement(_content, node) { const el = node as HTMLElement const attrs: Record = {} for (const attr of Array.from(el.attributes)) { if (attr.name !== 'data-structured-view-block') { attrs[attr.name] = attr.value } } return `\n\n\n\n` }, }) return td } // Singleton (lazy-init) — safe for server + client usage let _tdService: TurndownService | null = null function getTurndownService(): TurndownService { if (!_tdService) _tdService = createTurndownService() return _tdService } // ── Custom node pre-processor ───────────────────────────────────────────── // Sentinel prefix — alphanumeric only to avoid Markdown escaping by turndown const SENTINEL_PREFIX = 'MOMENTOBLOCKSENTINEL' interface BlockPlaceholder { key: string comment: string } /** * Pre-process HTML before passing to turndown: * - Replace empty custom node divs (liveBlock, structuredViewBlock) with text * placeholders so they survive turndown processing (turndown drops blank nodes * and strips HTML comments). * - Return the modified HTML and a map of placeholder → HTML comment. */ function preprocessCustomNodes(html: string): { html: string; placeholders: BlockPlaceholder[] } { const placeholders: BlockPlaceholder[] = [] // liveBlock:
let result = html.replace( /]*?data-live-block[^>]*?)>\s*<\/div>/gi, (_match, attrs) => { const snId = (attrs.match(/sourcenoteid="([^"]*)"/i) || attrs.match(/sourcenoteid='([^']*)'/i) || [])[1] || '' const bId = (attrs.match(/blockid="([^"]*)"/i) || attrs.match(/blockid='([^']*)'/i) || [])[1] || '' const key = `${SENTINEL_PREFIX}LIVEBLOCK${placeholders.length}` placeholders.push({ key, comment: `` }) return `

${key}

` } ) // structuredViewBlock:
result = result.replace( /]*?data-structured-view-block[^>]*?)>\s*<\/div>/gi, (_match, attrs) => { const attrMap: Record = {} const attrRe = /(data-[a-z-]+)="([^"]*)"/gi let m: RegExpExecArray | null while ((m = attrRe.exec(attrs)) !== null) { if (m[1] !== 'data-structured-view-block') attrMap[m[1]] = m[2] } const key = `${SENTINEL_PREFIX}SVBLOCK${placeholders.length}` placeholders.push({ key, comment: `` }) return `

${key}

` } ) return { html: result, placeholders } } /** * Post-process the markdown output: replace sentinel placeholders with HTML comments. */ function postprocessPlaceholders(md: string, placeholders: BlockPlaceholder[]): string { let result = md for (const { key, comment } of placeholders) { result = result.replace(key, `\n\n${comment}\n\n`) } return result } // ── HTML → Markdown ────────────────────────────────────────────────────────── /** * Convert a TipTap-generated HTML string to GitHub-Flavored Markdown. * Custom nodes (liveBlock, structuredViewBlock) are serialised as HTML comments. */ export function tiptapHTMLToMarkdown(html: string): string { if (!html || html.trim() === '') return '' const { html: preprocessed, placeholders } = preprocessCustomNodes(html) const td = getTurndownService() const md = td.turndown(preprocessed).trim() return postprocessPlaceholders(md, placeholders).trim() } // ── Markdown → HTML ────────────────────────────────────────────────────────── /** * Convert a Markdown string to HTML suitable for injection into TipTap via * `editor.commands.setContent(html)`. * * Uses marked with GFM enabled (tables, task lists, line breaks). */ export function markdownToHTML(markdown: string): string { if (!markdown || markdown.trim() === '') return '' // marked v18+ uses synchronous parse by default when no async tokens const html = marked.parse(markdown, { gfm: true, breaks: false, }) as string return html } // ── Title extraction from Markdown ────────────────────────────────────────── /** * Extract the first H1 title from a Markdown string. * Returns null if no H1 is found. */ export function extractMarkdownTitle(markdown: string): string | null { const match = markdown.match(/^#\s+(.+)/m) return match ? match[1].trim() : null }