# /// script # requires-python = ">=3.12" # dependencies = [ # "cryptography", # "ollama", # "pypdf", # ] # /// import ollama from pypdf import PdfReader def extract_text_from_pdf(pdf_path): """ Extracts raw text content from all pages of a PDF file, one page at a time. """ reader = PdfReader(pdf_path) full_text = "" for page in reader.pages: text = page.extract_text() if text: yield text def convert_text_to_html(raw_text, model_name="gemma4"): """Sends raw text to Ollama and requests a semantic HTML conversion.""" prompt = f""" You are an expert web developer. Convert the following raw text extracted from a PDF document into a beautifully styled, clean, and semantic HTML document. Requirements: 1. Use appropriate HTML tags (

,

,