feat: Add core rules
This commit is contained in:
@@ -30,7 +30,7 @@ def convert_text_to_html(raw_text, model_name="gemma4"):
|
||||
|
||||
Requirements:
|
||||
1. Use appropriate HTML tags (<h1>, <p>, <ul>, <li>, <strong>, etc.) to recreate the structural hierarchy.
|
||||
2. Add inline CSS or a <style> block in the <head> to make it look professional (modern sans-serif typography, clean margins, and clear layout).
|
||||
2. Do not add CSS, styling will be added later.
|
||||
3. Return ONLY valid HTML code. Do not include markdown code block backticks (```html) or extra conversational commentary.
|
||||
|
||||
Raw Document Text:
|
||||
@@ -45,15 +45,16 @@ def convert_text_to_html(raw_text, model_name="gemma4"):
|
||||
|
||||
def main():
|
||||
input_pdf = (
|
||||
"Fabula_Ultima_-_Natural_Fantasy_Atlas_ENG_v1_1.pdf"
|
||||
# "Fabula_Ultima_-_Natural_Fantasy_Atlas_ENG_v1_1.pdf"
|
||||
"Fabula_Ultima_TTJRPG.pdf"
|
||||
)
|
||||
try:
|
||||
from pathlib import Path
|
||||
Path('./html').mkdir(exist_ok=True)
|
||||
Path('./html2').mkdir(exist_ok=True)
|
||||
for page_num, text in enumerate(extract_text_from_pdf(input_pdf)):
|
||||
html_output = convert_text_to_html(text)
|
||||
print(f'Writing html/{page_num}.html ({len(text)} bytes)')
|
||||
with open(f'html/{page_num}.html', 'w') as fh:
|
||||
print(f'Writing html2/{page_num}.html ({len(text)} bytes)')
|
||||
with open(f'html2/{page_num}.html', 'w') as fh:
|
||||
fh.write(html_output)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The file '{input_pdf}' was not found. Please check your path.")
|
||||
|
||||
Reference in New Issue
Block a user