feat: Add core rules
This commit is contained in:
@@ -13,7 +13,7 @@ import glob
|
||||
import os
|
||||
import re
|
||||
|
||||
HTML_DIR = os.path.join(os.path.dirname(__file__), "..", "html")
|
||||
HTML_DIR = os.path.join(os.path.dirname(__file__), "..", "books")
|
||||
|
||||
# Matches the watermark text in all the forms it appears
|
||||
WATERMARK_RE = re.compile(
|
||||
@@ -23,6 +23,7 @@ WATERMARK_RE = re.compile(
|
||||
r"|Order\s*[#:]\s*:?\s*52072168" # "Order #52072168", "Order #: 52072168"
|
||||
r"|#\s*52072168" # "#52072168" standalone
|
||||
r"|\b52072168\b" # bare order number
|
||||
r"|\b38246845\b"
|
||||
r")",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
@@ -88,10 +89,18 @@ def main() -> None:
|
||||
m = re.search(r"(\d+)", os.path.basename(p))
|
||||
return int(m.group(1)) if m else -1
|
||||
|
||||
html_files = sorted(
|
||||
glob.glob(os.path.join(HTML_DIR, "*.html")),
|
||||
key=sort_key,
|
||||
)
|
||||
from pathlib import Path
|
||||
html_files = []
|
||||
for root, dirs, files in Path(HTML_DIR).walk():
|
||||
hf = filter(lambda fn: fn.endswith(".html"), files)
|
||||
hf = [ root / fn for fn in hf ]
|
||||
html_files.extend(hf)
|
||||
# html_files = sorted(
|
||||
# glob.glob(os.path.join(HTML_DIR, "*.html")),
|
||||
# key=sort_key,
|
||||
# )
|
||||
breakpoint()
|
||||
html_files = sorted(html_files, key=sort_key)
|
||||
|
||||
total_removed = 0
|
||||
total_stripped = 0
|
||||
|
||||
Reference in New Issue
Block a user