- Add html/index.html: book viewer with auto-discovering sidebar, prev/next navigation, keyboard shortcuts, and URL hash persistence - Add html/book-page.css: shared stylesheet for all book pages derived from fabula-ultima-sheet.css (dark theme, CSS variables, Cinzel/ Crimson Text fonts, common class styles) - Add book.js entry point so webpack injects the shared CSS into the book viewer; update webpack.config.js for two entry points, split CSS chunk, CopyWebpackPlugin for book pages, and /book dev server rewrite rule - Add scripts/strip_watermark.py: removes "Guest Customer (Order #52072168)" watermark artifacts from all 210 book pages - Add scripts/restyle_book.py: strips per-page <style> blocks and injects <link rel="stylesheet" href="book-page.css"> into all pages - Update Justfile deploy to scp -r dist/* for the new /book subtree Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
122 lines
3.8 KiB
Python
122 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Remove "Guest Customer (Order #52072168)" watermark artifacts from all book pages.
|
|
|
|
Strategy:
|
|
- If a line's text content (with HTML tags stripped) consists entirely of
|
|
watermark text, remove the whole line.
|
|
- If watermark text is embedded within a line that has other content, strip
|
|
just the watermark portion and tidy the surrounding punctuation.
|
|
"""
|
|
|
|
import glob
|
|
import os
|
|
import re
|
|
|
|
HTML_DIR = os.path.join(os.path.dirname(__file__), "..", "html")
|
|
|
|
# Matches the watermark text in all the forms it appears
|
|
WATERMARK_RE = re.compile(
|
|
r"(?:"
|
|
r"Guest\s+Customer\b[^\n<]*?" # "Guest Customer" + trailing text
|
|
r"|Order\s+\w[^\n<]*?52072168" # "Order <label>: 52072168"
|
|
r"|Order\s*[#:]\s*:?\s*52072168" # "Order #52072168", "Order #: 52072168"
|
|
r"|#\s*52072168" # "#52072168" standalone
|
|
r"|\b52072168\b" # bare order number
|
|
r")",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
TAG_RE = re.compile(r"<[^>]+>")
|
|
|
|
# Punctuation and decoration that can be left behind after stripping
|
|
DECORATION_RE = re.compile(r"^[\s\-–—|,.:;!?()\[\]*&#\d/\\]+$")
|
|
|
|
|
|
def text_content(line: str) -> str:
|
|
"""Return the visible text of a line with HTML tags removed."""
|
|
return TAG_RE.sub("", line)
|
|
|
|
|
|
def is_watermark_only(line: str) -> bool:
|
|
"""True when the line's entire visible text is watermark content."""
|
|
text = text_content(line).strip()
|
|
remaining = WATERMARK_RE.sub("", text).strip()
|
|
return DECORATION_RE.match(remaining) is not None or remaining == ""
|
|
|
|
|
|
def strip_watermark_inline(line: str) -> str:
|
|
"""Remove watermark text from a line that has other real content."""
|
|
result = WATERMARK_RE.sub("", line)
|
|
# Tidy decoration left behind after removal (e.g. "— ", " —", " | ", "---")
|
|
result = re.sub(r"\s*[—–\-]{1,3}\s*$", "", result, flags=re.MULTILINE)
|
|
result = re.sub(r"^\s*[—–\-]{1,3}\s*", "", result, flags=re.MULTILINE)
|
|
result = re.sub(r"\|\s*$", "", result, flags=re.MULTILINE)
|
|
result = re.sub(r"^\s*\|\s*", "", result, flags=re.MULTILINE)
|
|
result = re.sub(r"\s{2,}", " ", result)
|
|
return result
|
|
|
|
|
|
def process_file(filepath: str) -> tuple[int, int]:
|
|
with open(filepath, encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
|
|
new_lines: list[str] = []
|
|
removed = 0
|
|
stripped = 0
|
|
|
|
for line in lines:
|
|
if not WATERMARK_RE.search(line):
|
|
new_lines.append(line)
|
|
continue
|
|
|
|
if is_watermark_only(line.strip()):
|
|
removed += 1
|
|
# Keep the newline gap only if needed for readability; skip blank result
|
|
else:
|
|
new_lines.append(strip_watermark_inline(line))
|
|
stripped += 1
|
|
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
f.writelines(new_lines)
|
|
|
|
return removed, stripped
|
|
|
|
|
|
def main() -> None:
|
|
def sort_key(p):
|
|
m = re.search(r"(\d+)", os.path.basename(p))
|
|
return int(m.group(1)) if m else -1
|
|
|
|
html_files = sorted(
|
|
glob.glob(os.path.join(HTML_DIR, "*.html")),
|
|
key=sort_key,
|
|
)
|
|
|
|
total_removed = 0
|
|
total_stripped = 0
|
|
affected = 0
|
|
|
|
for filepath in html_files:
|
|
removed, stripped = process_file(filepath)
|
|
if removed or stripped:
|
|
affected += 1
|
|
name = os.path.basename(filepath)
|
|
parts = []
|
|
if removed:
|
|
parts.append(f"{removed} line{'s' if removed != 1 else ''} removed")
|
|
if stripped:
|
|
parts.append(f"{stripped} inline")
|
|
print(f" {name}: {', '.join(parts)}")
|
|
total_removed += removed
|
|
total_stripped += stripped
|
|
|
|
print(
|
|
f"\nDone. {affected} files changed — "
|
|
f"{total_removed} lines removed, {total_stripped} inline occurrences stripped."
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|