author | Alan Dipert
<alan@dipert.org> 2025-10-08 06:23:43 UTC |
committer | Alan Dipert
<alan@dipert.org> 2025-10-08 06:23:43 UTC |
parent | ccce6fa8fe1d6f9fce04fe41c152f5610add9760 |
Makefile | +4 | -1 |
tools/check_links.py | +75 | -0 |
diff --git a/Makefile b/Makefile index 1e6e0d6..543b850 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: all assets clean deploy help tree check-git-clean +.PHONY: all assets clean deploy help tree check-git-clean check-links SRC := md OUT := out @@ -75,3 +75,6 @@ check-git-clean: git status --short; \ exit 1; \ fi + +check-links: + @python3 tools/check_links.py diff --git a/tools/check_links.py b/tools/check_links.py new file mode 100755 index 0000000..d8f2695 --- /dev/null +++ b/tools/check_links.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +"""Check outbound links in Markdown files under md/ using curl.""" +from __future__ import annotations + +import pathlib +import re +import subprocess +import sys +from collections import defaultdict + +ROOT = pathlib.Path(__file__).resolve().parent.parent +MD_DIR = ROOT / "md" +URL_RE = re.compile(r"https?://[\w\-./?%#=&+:~]+", re.IGNORECASE) + + +def collect_links(md_path: pathlib.Path) -> set[str]: + text = md_path.read_text(encoding="utf-8") + return set(URL_RE.findall(text)) + + +def check_url(url: str) -> tuple[bool, str]: + try: + proc = subprocess.run( + [ + "curl", + "-Is", + "--max-time", + "10", + url, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + errors="replace", + check=False, + ) + except Exception as exc: # pragma: no cover - defensive + return False, f"error: {exc}" + + status_lines = [line for line in proc.stdout.splitlines() if line.startswith("HTTP/")] + status_line = status_lines[0] if status_lines else "" + if not status_line: + detail = proc.stderr.strip() or "no response" + return False, detail + + ok_prefixes = (" 200 ", " 301 ", " 302 ", " 303 ", " 307 ", " 308 ") + if any(code in status_line for code in ok_prefixes): + return True, status_line.strip() + + return False, status_line.strip() + + +def main() -> int: + broken: dict[str, list[tuple[str, str]]] = defaultdict(list) + + for md_file in sorted(MD_DIR.glob("*.md")): + links = collect_links(md_file) + for url in sorted(links): + ok, detail = check_url(url) + if not ok: + broken[str(md_file)].append((url, detail)) + + if broken: + print("Broken links found:") + for file_path, entries in broken.items(): + for url, detail in entries: + print(f" {file_path}: {url} -> {detail}") + return 1 + + print("All referenced links responded with HTTP 2xx/3xx.") + return 0 + + +if __name__ == "__main__": + sys.exit(main())