From c75cd188c1b3d1bc9e013c7c77a91460b568c80e Mon Sep 17 00:00:00 2001 From: Drew Malzahn Date: Sat, 6 Jun 2026 03:36:35 +0000 Subject: [PATCH] feat: Add book viewer at /book with shared design system - Add html/index.html: book viewer with auto-discovering sidebar, prev/next navigation, keyboard shortcuts, and URL hash persistence - Add html/book-page.css: shared stylesheet for all book pages derived from fabula-ultima-sheet.css (dark theme, CSS variables, Cinzel/ Crimson Text fonts, common class styles) - Add book.js entry point so webpack injects the shared CSS into the book viewer; update webpack.config.js for two entry points, split CSS chunk, CopyWebpackPlugin for book pages, and /book dev server rewrite rule - Add scripts/strip_watermark.py: removes "Guest Customer (Order #52072168)" watermark artifacts from all 210 book pages - Add scripts/restyle_book.py: strips per-page + + + +
+ +
+ +
+
+ +
+ + +
+ + +
+
+ + + + diff --git a/package-lock.json b/package-lock.json index 5db9d6f..a6ffe54 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "prettier": "^3.8.3" }, "devDependencies": { + "copy-webpack-plugin": "^14.0.0", "css-loader": "^7.1.4", "css-minimizer-webpack-plugin": "^8.0.0", "html-webpack-plugin": "^5.6.7", @@ -1846,6 +1847,43 @@ "dev": true, "license": "MIT" }, + "node_modules/copy-webpack-plugin": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-14.0.0.tgz", + "integrity": "sha512-3JLW90aBGeaTLpM7mYQKpnVdgsUZRExY55giiZgLuX/xTQRUs1dOCwbBnWnvY6Q6rfZoXMNwzOQJCSZPppfqXA==", + "dev": true, + "license": "MIT", + "dependencies": { + "glob-parent": "^6.0.1", + "normalize-path": "^3.0.0", + "schema-utils": "^4.2.0", + "serialize-javascript": "^7.0.3", + "tinyglobby": "^0.2.12" + }, + "engines": { + "node": ">= 20.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^5.1.0" + } + }, + "node_modules/copy-webpack-plugin/node_modules/glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, "node_modules/core-util-is": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", @@ -2599,6 +2637,24 @@ "node": ">=0.8.0" } }, + "node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, "node_modules/fill-range": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", @@ -5648,6 +5704,23 @@ "dev": true, "license": "MIT" }, + "node_modules/tinyglobby": { + "version": "0.2.17", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.17.tgz", + "integrity": "sha512-wXR/dYpcqKmfWpEdZjiKJOwCNFndD0DMnrW/cYjVGttEkBfVgcLFHoNrlj47mjOVic9yyNu65alsgF4NQyTa2g==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.4" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", diff --git a/package.json b/package.json index 6597b01..905e1af 100644 --- a/package.json +++ b/package.json @@ -7,6 +7,7 @@ "prettier": "^3.8.3" }, "devDependencies": { + "copy-webpack-plugin": "^14.0.0", "css-loader": "^7.1.4", "css-minimizer-webpack-plugin": "^8.0.0", "html-webpack-plugin": "^5.6.7", diff --git a/scripts/pdftohtml.py b/scripts/pdftohtml.py new file mode 100644 index 0000000..da58caa --- /dev/null +++ b/scripts/pdftohtml.py @@ -0,0 +1,63 @@ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "cryptography", +# "ollama", +# "pypdf", +# ] +# /// + +import ollama +from pypdf import PdfReader + + +def extract_text_from_pdf(pdf_path): + """ + Extracts raw text content from all pages of a PDF file, one page at a time. + """ + reader = PdfReader(pdf_path) + full_text = "" + for page in reader.pages: + text = page.extract_text() + if text: + yield text + + +def convert_text_to_html(raw_text, model_name="gemma4"): + """Sends raw text to Ollama and requests a semantic HTML conversion.""" + prompt = f""" + You are an expert web developer. Convert the following raw text extracted from a PDF document into a beautifully styled, clean, and semantic HTML document. + + Requirements: + 1. Use appropriate HTML tags (

,

,