git » alan.git » commit 09d8e53

Improve PDF layout and opaque hardness profiles

author Alan Dipert
2025-12-06 18:12:30 UTC
committer Alan Dipert
2025-12-06 18:12:30 UTC
parent 88bd9580b71384d1b7561323a7a4850853ded0df

Improve PDF layout and opaque hardness profiles

Containerfile +3 -1
README.md +2 -2
generator.py +2 -1
main.py +10 -0
render_text.py +154 -19

diff --git a/Containerfile b/Containerfile
index 2bb7551..5f4f398 100644
--- a/Containerfile
+++ b/Containerfile
@@ -7,9 +7,11 @@ RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         ca-certificates \
         make \
-        pandoc \
         texlive-latex-base \
         texlive-fonts-recommended \
+        texlive-fonts-extra \
+        texlive-fonts-extra-links \
+        texlive-humanities \
         fonts-dejavu-core && \
     rm -rf /var/lib/apt/lists/*
 
diff --git a/README.md b/README.md
index 4005580..7a5a86d 100644
--- a/README.md
+++ b/README.md
@@ -34,13 +34,13 @@ cat answer_key.txt     # view the key
 ```
 
 - Each run is written to `runs/<timestamp>_seed<...>_hardness<...>/generated_test.json` (and copied to `--out` for convenience). Override the base folder with `--run-dir` or the subfolder name with `--run-name`. Rendered outputs are also mirrored into the run directory so each run folder is self-contained.
-- **PDF output:** Requires `pandoc` **and** `pdflatex` (TeX Live). Example:
+- **PDF output:** Requires `pdflatex` (from TeX Live). Example:
   ```bash
   ./python render_text.py --in generated_test.json \
     --test-out test_booklet.txt --key-out answer_key.txt \
     --test-pdf test_booklet.pdf --key-pdf answer_key.pdf
   ```
-  If `pandoc` or `pdflatex` is missing, the script skips PDF generation and reports the issue. The booklet/key are rendered as Markdown, so bullets/headings convert cleanly to PDF when the tools are present.
+  If `pdflatex` is missing, the script skips PDF generation and reports the issue. The booklet/key are rendered as Markdown for the `.txt` outputs, and LaTeX is generated directly for PDF builds.
 
 If PDF engines are missing, PDF output is skipped; the Markdown text still renders correctly.
 
diff --git a/generator.py b/generator.py
index 239712a..4cedb33 100644
--- a/generator.py
+++ b/generator.py
@@ -586,12 +586,13 @@ def generate_test(
             }
         )
 
+    proword = {"easy": "ECHO", "medium": "FOXTROT", "hard": "GOLF", "extreme": "HOTEL"}[hardness]
     meta = {
         "version": "backtrack-0.2",
         "description": "Alan's Language Aptitude iNstrument (ALAN) synthesized via backtracking search",
         "seed": seed,
         "git_sha": git_sha,
-        "generation_params": {"hardness": hardness, "max_clue_reuse": max_clue_reuse},
+        "generation_params": {"profile": proword},
         "dictionary": spec.lexicon,
         "instructions": (
             "You will see a brief dictionary, a handful of rules, and examples. Words may take small "
diff --git a/main.py b/main.py
index 13ca49c..fe95dcb 100644
--- a/main.py
+++ b/main.py
@@ -24,6 +24,12 @@ def parse_args() -> argparse.Namespace:
         default="medium",
         help="Preset coverage targets (medium = default quotas).",
     )
+    parser.add_argument(
+        "--profile",
+        choices=["ECHO", "FOXTROT", "GOLF", "HOTEL"],
+        default=None,
+        help="Alias for hardness without revealing labels in output (ECHO=easy, FOXTROT=medium, GOLF=hard, HOTEL=extreme).",
+    )
     parser.add_argument(
         "--run-dir",
         dest="run_dir",
@@ -48,6 +54,10 @@ def _git_sha() -> str | None:
 
 def main() -> None:
     args = parse_args()
+    proword_map = {"ECHO": "easy", "FOXTROT": "medium", "GOLF": "hard", "HOTEL": "extreme"}
+    hardness = args.hardness
+    if args.profile:
+        hardness = proword_map[args.profile]
     actual_seed = args.seed if args.seed is not None else random.randint(0, 1_000_000)
     concepts = get_default_concepts()
     blueprint = get_default_blueprint()
diff --git a/render_text.py b/render_text.py
index 51da112..7edaa0b 100644
--- a/render_text.py
+++ b/render_text.py
@@ -106,26 +106,161 @@ def render_key(data: Dict[str, Any]) -> str:
     return "\n".join(lines)
 
 
-def _write_pdf(text: str, pdf_path: str, title: str) -> None:
-    """Render plain text to PDF via pandoc + pdflatex if available."""
-    pandoc = shutil.which("pandoc")
+def _tex_escape(s: str) -> str:
+    """Escape LaTeX special characters."""
+    replacements = {
+        "\\": r"\textbackslash{}",
+        "{": r"\{",
+        "}": r"\}",
+        "#": r"\#",
+        "$": r"\$",
+        "%": r"\%",
+        "&": r"\&",
+        "_": r"\_",
+        "~": r"\textasciitilde{}",
+        "^": r"\textasciicircum{}",
+    }
+    return "".join(replacements.get(ch, ch) for ch in s)
+
+
+def _booklet_latex(data: Dict[str, Any]) -> str:
+    meta = data.get("meta", {})
+    info_bits = []
+    if meta.get("seed") is not None:
+        info_bits.append(f"seed={meta['seed']}")
+    if meta.get("git_sha"):
+        info_bits.append(f"sha={str(meta['git_sha'])[:7]}")
+    params = meta.get("generation_params") or {}
+    if params:
+        joined = ", ".join(f"{k}={v}" for k, v in sorted(params.items()))
+        info_bits.append(f"params: {joined}")
+    header_line = ""
+    if info_bits:
+        safe_bits = [_tex_escape(bit) for bit in info_bits]
+        header_line = f"{{\\small\\texttt{{Test Version: {'; '.join(safe_bits)}}}}}\\\\[6pt]"
+
+    lines = [
+        r"\documentclass[10pt]{article}",
+        r"\usepackage[margin=0.75in]{geometry}",
+        r"\usepackage[T1]{fontenc}",
+        r"\usepackage[scaled=0.95]{helvet}",
+        r"\usepackage{microtype}",
+        r"\usepackage{enumitem}",
+        r"\usepackage{needspace}",
+        r"\renewcommand{\familydefault}{\sfdefault}",
+        r"\setlength{\parskip}{4pt}",
+        r"\setlength{\parindent}{0pt}",
+        r"\setlist[itemize]{leftmargin=*,itemsep=2pt,topsep=2pt}",
+        r"\begin{document}",
+        r"\section*{Alan's Language Aptitude iNstrument (ALAN)}",
+    ]
+    if header_line:
+        lines.append(header_line)
+    if meta.get("instructions"):
+        lines.append(_tex_escape(meta["instructions"]))
+    if meta.get("rules"):
+        lines.append(r"\subsection*{Grammar Cheat Sheet}")
+        lines.append(r"\begin{itemize}")
+        for rule in meta["rules"]:
+            lines.append(rf"\item {_tex_escape(rule)}")
+        lines.append(r"\end{itemize}")
+    dict_data = meta.get("dictionary", {})
+    if dict_data:
+        lines.append(r"\subsection*{Starter Dictionary}")
+        for title, group in [
+            ("Nouns", dict_data.get("nouns", {})),
+            ("Verbs", dict_data.get("verbs", {})),
+            ("Adjectives", dict_data.get("adjectives", {})),
+        ]:
+            lines.append(rf"\paragraph*{{{_tex_escape(title)}}}")
+            lines.append(r"\begin{itemize}")
+            for eng, lang in group.items():
+                lines.append(rf"\item {_tex_escape(eng)} = {_tex_escape(lang)}")
+            lines.append(r"\end{itemize}")
+
+    for sec_idx, section in enumerate(data.get("sections", [])):
+        lines.append(r"\newpage")
+        lines.append(rf"\section*{{Section {sec_idx + 1}}}")
+        for intro in section.get("intro_text", []):
+            lines.append(_tex_escape(intro))
+        # breathing room before first question
+        lines.append(r"")
+        lines.append(r"")
+        for q in section.get("questions", []):
+            lines.append(r"\needspace{12\baselineskip}")
+            lines.append(rf"\noindent\textbf{{{q['number']}. {_tex_escape(q['stem'])}}}")
+            lines.append(r"\begin{itemize}[leftmargin=1.25em]")
+            for opt in q.get("options", []):
+                lines.append(rf"\item[{_tex_escape(opt['label'])})] {_tex_escape(opt['text'])}")
+            lines.append(r"\end{itemize}")
+    lines.append(r"\end{document}")
+    return "\n".join(lines)
+
+
+def _key_latex(data: Dict[str, Any]) -> str:
+    lines = [
+        r"\documentclass[10pt]{article}",
+        r"\usepackage[margin=0.75in]{geometry}",
+        r"\usepackage[T1]{fontenc}",
+        r"\usepackage[scaled=0.95]{helvet}",
+        r"\usepackage{microtype}",
+        r"\usepackage{enumitem}",
+        r"\usepackage{needspace}",
+        r"\renewcommand{\familydefault}{\sfdefault}",
+        r"\setlength{\parskip}{4pt}",
+        r"\setlength{\parindent}{0pt}",
+        r"\setlist[itemize]{leftmargin=*,itemsep=2pt,topsep=2pt}",
+        r"\begin{document}",
+        r"\section*{Answer Key}",
+    ]
+    meta = data.get("meta", {})
+    info_bits = []
+    if meta.get("seed") is not None:
+        info_bits.append(f"seed={meta['seed']}")
+    if meta.get("git_sha"):
+        info_bits.append(f"sha={str(meta['git_sha'])[:7]}")
+    params = meta.get("generation_params") or {}
+    if params:
+        joined = ", ".join(f"{k}={v}" for k, v in sorted(params.items()))
+        info_bits.append(f"params: {joined}")
+    if info_bits:
+        safe_bits = [_tex_escape(bit) for bit in info_bits]
+        lines.append(f"{{\\small\\texttt{{Test Version: {'; '.join(safe_bits)}}}}}\\\\[6pt]")
+
+    for section in data.get("sections", []):
+        for q in section.get("questions", []):
+            correct = next((o for o in q["options"] if o["is_correct"]), None)
+            lines.append(rf"\noindent\textbf{{{q['number']}: {_tex_escape(correct['label'] if correct else '?')}}}")
+            lines.append(r"\begin{itemize}[leftmargin=1.25em]")
+            for opt in q["options"]:
+                mark = " (correct)" if opt["is_correct"] else ""
+                lines.append(rf"\item[{_tex_escape(opt['label'])})] {_tex_escape(opt['text'])}{_tex_escape(mark)}")
+            lines.append(r"\end{itemize}")
+    lines.append(r"\end{document}")
+    return "\n".join(lines)
+
+
+def _write_pdf(latex_source: str, pdf_path: str) -> None:
+    """Render LaTeX source to PDF using pdflatex directly."""
     pdflatex = shutil.which("pdflatex")
-    if not pandoc or not pdflatex:
-        print("pandoc + pdflatex required for PDF generation; skipping.", file=sys.stderr)
+    if not pdflatex:
+        print("pdflatex required for PDF generation; skipping.", file=sys.stderr)
         return
-    with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as tmp:
-        tmp.write(f"{title}\n\n{text}")
-        tmp_path = tmp.name
-    try:
-        cmd = [pandoc, tmp_path, "-o", pdf_path, "--from", "gfm", "--pdf-engine", "pdflatex"]
-        subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    except subprocess.CalledProcessError:
-        print("pandoc + pdflatex failed to produce PDF.", file=sys.stderr)
-    finally:
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tex_path = os.path.join(tmpdir, "doc.tex")
+        with open(tex_path, "w", encoding="utf-8") as tf:
+            tf.write(latex_source)
         try:
-            os.remove(tmp_path)
-        except OSError:
-            pass
+            subprocess.run(
+                [pdflatex, "-interaction=nonstopmode", "-halt-on-error", "doc.tex"],
+                cwd=tmpdir,
+                check=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            shutil.copyfile(os.path.join(tmpdir, "doc.pdf"), pdf_path)
+        except subprocess.CalledProcessError:
+            print("pdflatex failed to produce PDF.", file=sys.stderr)
 
 
 def main() -> None:
@@ -139,9 +274,9 @@ def main() -> None:
     with open(args.key_out, "w", encoding="utf-8") as f:
         f.write(key_text)
     if args.test_pdf:
-        _write_pdf(booklet_text, args.test_pdf, "ALAN Booklet")
+        _write_pdf(_booklet_latex(data), args.test_pdf)
     if args.key_pdf:
-        _write_pdf(key_text, args.key_pdf, "ALAN Answer Key")
+        _write_pdf(_key_latex(data), args.key_pdf)
     run_dir = data.get("meta", {}).get("run_dir")
     if run_dir and os.path.isdir(run_dir):
         for src in [args.test_out, args.key_out, args.test_pdf, args.key_pdf]: