git » homepage.git » commit b6ea70c

Add link checker make target

author Alan Dipert
2025-10-08 06:23:43 UTC
committer Alan Dipert
2025-10-08 06:23:43 UTC
parent ccce6fa8fe1d6f9fce04fe41c152f5610add9760

Add link checker make target

Makefile +4 -1
tools/check_links.py +75 -0

diff --git a/Makefile b/Makefile
index 1e6e0d6..543b850 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: all assets clean deploy help tree check-git-clean
+.PHONY: all assets clean deploy help tree check-git-clean check-links
 
 SRC := md
 OUT := out
@@ -75,3 +75,6 @@ check-git-clean:
 		git status --short; \
 		exit 1; \
 	fi
+
+check-links:
+	@python3 tools/check_links.py
diff --git a/tools/check_links.py b/tools/check_links.py
new file mode 100755
index 0000000..d8f2695
--- /dev/null
+++ b/tools/check_links.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+"""Check outbound links in Markdown files under md/ using curl."""
+from __future__ import annotations
+
+import pathlib
+import re
+import subprocess
+import sys
+from collections import defaultdict
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+MD_DIR = ROOT / "md"
+URL_RE = re.compile(r"https?://[\w\-./?%#=&+:~]+", re.IGNORECASE)
+
+
+def collect_links(md_path: pathlib.Path) -> set[str]:
+    text = md_path.read_text(encoding="utf-8")
+    return set(URL_RE.findall(text))
+
+
+def check_url(url: str) -> tuple[bool, str]:
+    try:
+        proc = subprocess.run(
+            [
+                "curl",
+                "-Is",
+                "--max-time",
+                "10",
+                url,
+            ],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            encoding="utf-8",
+            errors="replace",
+            check=False,
+        )
+    except Exception as exc:  # pragma: no cover - defensive
+        return False, f"error: {exc}"
+
+    status_lines = [line for line in proc.stdout.splitlines() if line.startswith("HTTP/")]
+    status_line = status_lines[0] if status_lines else ""
+    if not status_line:
+        detail = proc.stderr.strip() or "no response"
+        return False, detail
+
+    ok_prefixes = (" 200 ", " 301 ", " 302 ", " 303 ", " 307 ", " 308 ")
+    if any(code in status_line for code in ok_prefixes):
+        return True, status_line.strip()
+
+    return False, status_line.strip()
+
+
+def main() -> int:
+    broken: dict[str, list[tuple[str, str]]] = defaultdict(list)
+
+    for md_file in sorted(MD_DIR.glob("*.md")):
+        links = collect_links(md_file)
+        for url in sorted(links):
+            ok, detail = check_url(url)
+            if not ok:
+                broken[str(md_file)].append((url, detail))
+
+    if broken:
+        print("Broken links found:")
+        for file_path, entries in broken.items():
+            for url, detail in entries:
+                print(f"  {file_path}: {url} -> {detail}")
+        return 1
+
+    print("All referenced links responded with HTTP 2xx/3xx.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())