git » homepage.git » commit 7e4f512

Show inbound links on each page

author Alan Dipert
2025-12-29 16:48:51 UTC
committer Alan Dipert
2025-12-29 16:48:51 UTC
parent e3c66738cc6918b5df6e317333ff6c39deca4149

Show inbound links on each page

tools/build_page.py +93 -1
tpl/style.css +16 -0

diff --git a/tools/build_page.py b/tools/build_page.py
index 4eefa00..7c18a8e 100755
--- a/tools/build_page.py
+++ b/tools/build_page.py
@@ -6,11 +6,12 @@ from __future__ import annotations
 import argparse
 import html
 import os
+import re
 import shlex
 import subprocess
 import sys
 from pathlib import Path
-from typing import List
+from typing import Dict, List, Set
 
 TOOLS_DIR = Path(__file__).resolve().parent
 if str(TOOLS_DIR) not in sys.path:
@@ -35,6 +36,75 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
+_LINK_PATTERN = re.compile(r"\]\(([^)]+)\)")
+
+
+def find_md_root(input_md: Path) -> Path:
+    for parent in input_md.parents:
+        if parent.name == "md":
+            return parent.resolve()
+    return input_md.parent.resolve()
+
+
+def is_temp_path(path: Path) -> bool:
+    for part in path.parts:
+        if part.startswith(".#") or part.endswith("~"):
+            return True
+    return False
+
+
+def extract_links(md_path: Path) -> List[str]:
+    return _LINK_PATTERN.findall(md_path.read_text())
+
+
+def normalize_target(current_md: Path, raw_url: str, md_root: Path) -> Path | None:
+    if "://" in raw_url or raw_url.startswith("#") or not raw_url:
+        return None
+
+    frag_split = raw_url.split("#", 1)
+    raw_path = frag_split[0]
+    if not raw_path:
+        return None
+
+    candidate = Path(raw_path)
+    suffix = candidate.suffix.lower()
+    if not suffix:
+        candidate = candidate.with_suffix(".md")
+    elif suffix == ".html":
+        candidate = candidate.with_suffix(".md")
+    elif suffix != ".md":
+        return None
+
+    resolved = (current_md.parent / candidate).resolve()
+    try:
+        rel = resolved.relative_to(md_root)
+    except ValueError:
+        return None
+
+    if is_temp_path(rel):
+        return None
+
+    if not (md_root / rel).exists():
+        return None
+
+    return rel
+
+
+def compute_backlinks(md_root: Path) -> Dict[str, List[str]]:
+    backlinks: Dict[str, Set[str]] = {}
+    for md_path in md_root.rglob("*.md"):
+        rel_source = md_path.relative_to(md_root)
+        if is_temp_path(rel_source):
+            continue
+        for raw_url in extract_links(md_path):
+            target = normalize_target(md_path, raw_url, md_root)
+            if target is None or target == rel_source:
+                continue
+            backlinks.setdefault(target.as_posix(), set()).add(rel_source.as_posix())
+
+    return {target: sorted(sources) for target, sources in backlinks.items()}
+
+
 def run_cmark(md_content: str, md2html: str, cmark_flags: List[str]) -> str:
     result = subprocess.run(
         [md2html, *cmark_flags],
@@ -72,6 +142,7 @@ def main() -> None:
 
     output_path = Path(args.output)
     input_md = Path(args.input_md)
+    md_root = find_md_root(input_md)
     head_tpl = Path(args.head_tpl)
     foot_tpl = Path(args.foot_tpl)
     mdlink_script = Path(args.mdlink_script)
@@ -91,12 +162,33 @@ def main() -> None:
     build_info_html = buildinfo.build_info(input_md.as_posix())
     root_prefix = compute_root_prefix(output_path, out_dir)
 
+    backlinks = compute_backlinks(md_root)
+    target_key = input_md.resolve().relative_to(md_root).as_posix()
+    backlink_sources = backlinks.get(target_key, [])
+
     head_html = render_template(head_tpl, root_prefix, page_title, build_info_html)
     foot_html = render_template(foot_tpl, root_prefix, page_title, build_info_html)
 
+    backlinks_html = ""
+    if backlink_sources:
+        items = []
+        for rel_source in backlink_sources:
+            href = html.escape(f"{root_prefix}{rel_source[:-3]}.html", quote=True)
+            display = html.escape(rel_source[:-3])
+            items.append(f"<li><a href=\"{href}\">{display}</a></li>")
+        backlinks_html = (
+            '<div class="backlinks">\n'
+            "  <h3>Links to this page</h3>\n"
+            "  <ul>\n"
+            f"    {'\n    '.join(items)}\n"
+            "  </ul>\n"
+            "</div>\n"
+        )
+
     with output_path.open("w") as outf:
         outf.write(head_html)
         outf.write(body_html)
+        outf.write(backlinks_html)
         outf.write(foot_html)
 
 
diff --git a/tpl/style.css b/tpl/style.css
index ce0600a..06f22f2 100644
--- a/tpl/style.css
+++ b/tpl/style.css
@@ -114,6 +114,22 @@ img[src*="200px"]{width:200px;}
 .site-index>li{margin:.35rem 0;}
 .site-index ul{list-style:none;margin:.35rem 0;padding-left:1.25rem;}
 .site-index a{font-weight:600;}
+.content .backlinks{
+  margin:2rem 0 1.5rem;
+  padding:1rem;
+  border:1px solid rgba(0,0,0,.08);
+  border-radius:.5rem;
+  background:rgba(0,0,0,.02);
+}
+.content .backlinks h3{
+  margin:0 0 .5rem;
+  color:#cc3b12;
+}
+.content .backlinks ul{
+  margin:.25rem 0 0;
+  padding-left:1.25rem;
+}
+.content .backlinks li{margin:.2rem 0;}
 .site-foot{
   margin-top:2rem;
   border-top:1px solid rgba(0,0,0,.08);