# /// script # requires-python = ">=3.12" # dependencies = [ # "bs4", # ] # /// from bs4 import BeautifulSoup import re from pathlib import Path # Dictionary to track how many times we've seen each header ID header_seen = {} def add_anchors_to_headers(html_content): # Parse the HTML content soup = BeautifulSoup(html_content, 'html.parser') # Find all header tags (h1 through h6) header_tags = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) # Add a count-based ID to each header tag for header in header_tags: # Extract header text (fallback to tag name if empty) header_text = header.get_text(strip=True) or header.name # Normalize header header_text = header_text.lower().replace(" ", "-") # Create base ID: hN- base_id = f"{header.name}-{header_text}" # Check if we've seen this base ID before if base_id in header_seen: # Append next count: -1, -2, -3... count = header_seen[base_id] header_seen[base_id] = count + 1 header_id = f"{base_id}-{count + 1}" else: # First time seeing this text → count starts at 1 header_seen[base_id] = 1 header_id = base_id # Add the ID to the header tag header['id'] = header_id # Wrap the header in an anchor link (clickable permalink) # The link points to itself via the id attribute # anchor_tag = soup.new_tag("a", href=f"#{header_id}", class_="anchor-link") # anchor_tag.string = f"🔗" # header.wrap(anchor_tag) # Return the modified HTML content return str(soup) for book in ('./books/core', './books/natural-fantasy-atlas'): for root, dirs, files in Path(book).walk(): for fn in files: path = root / fn if path.suffix != ".html": continue with path.open('r') as fh: raw_html = fh.read() new_html = add_anchors_to_headers(raw_html) with path.open('w') as fh: fh.write(new_html)