import pathlib import re PAGE_RGX = r"page ([0-9]+)" # for root, dirs, files in pathlib.Path("./books/core").walk(): for root, dirs, files in pathlib.Path("./books/natural-fantasy-atlas").walk(): files = [root / fn for fn in files if fn.endswith(".html")] for fn in files: with fn.open() as fh: html = fh.read() with fn.open('w') as fh: for line in html.split("\n"): if re.search(PAGE_RGX, line) and "\g<0>', line) fh.write(line)