| author | Alan Dipert
<alan@dipert.org> 2025-12-06 18:12:30 UTC |
| committer | Alan Dipert
<alan@dipert.org> 2025-12-06 18:12:30 UTC |
| parent | 88bd9580b71384d1b7561323a7a4850853ded0df |
| Containerfile | +3 | -1 |
| README.md | +2 | -2 |
| generator.py | +2 | -1 |
| main.py | +10 | -0 |
| render_text.py | +154 | -19 |
diff --git a/Containerfile b/Containerfile index 2bb7551..5f4f398 100644 --- a/Containerfile +++ b/Containerfile @@ -7,9 +7,11 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates \ make \ - pandoc \ texlive-latex-base \ texlive-fonts-recommended \ + texlive-fonts-extra \ + texlive-fonts-extra-links \ + texlive-humanities \ fonts-dejavu-core && \ rm -rf /var/lib/apt/lists/* diff --git a/README.md b/README.md index 4005580..7a5a86d 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,13 @@ cat answer_key.txt # view the key ``` - Each run is written to `runs/<timestamp>_seed<...>_hardness<...>/generated_test.json` (and copied to `--out` for convenience). Override the base folder with `--run-dir` or the subfolder name with `--run-name`. Rendered outputs are also mirrored into the run directory so each run folder is self-contained. -- **PDF output:** Requires `pandoc` **and** `pdflatex` (TeX Live). Example: +- **PDF output:** Requires `pdflatex` (from TeX Live). Example: ```bash ./python render_text.py --in generated_test.json \ --test-out test_booklet.txt --key-out answer_key.txt \ --test-pdf test_booklet.pdf --key-pdf answer_key.pdf ``` - If `pandoc` or `pdflatex` is missing, the script skips PDF generation and reports the issue. The booklet/key are rendered as Markdown, so bullets/headings convert cleanly to PDF when the tools are present. + If `pdflatex` is missing, the script skips PDF generation and reports the issue. The booklet/key are rendered as Markdown for the `.txt` outputs, and LaTeX is generated directly for PDF builds. If PDF engines are missing, PDF output is skipped; the Markdown text still renders correctly. diff --git a/generator.py b/generator.py index 239712a..4cedb33 100644 --- a/generator.py +++ b/generator.py @@ -586,12 +586,13 @@ def generate_test( } ) + proword = {"easy": "ECHO", "medium": "FOXTROT", "hard": "GOLF", "extreme": "HOTEL"}[hardness] meta = { "version": "backtrack-0.2", "description": "Alan's Language Aptitude iNstrument (ALAN) synthesized via backtracking search", "seed": seed, "git_sha": git_sha, - "generation_params": {"hardness": hardness, "max_clue_reuse": max_clue_reuse}, + "generation_params": {"profile": proword}, "dictionary": spec.lexicon, "instructions": ( "You will see a brief dictionary, a handful of rules, and examples. Words may take small " diff --git a/main.py b/main.py index 13ca49c..fe95dcb 100644 --- a/main.py +++ b/main.py @@ -24,6 +24,12 @@ def parse_args() -> argparse.Namespace: default="medium", help="Preset coverage targets (medium = default quotas).", ) + parser.add_argument( + "--profile", + choices=["ECHO", "FOXTROT", "GOLF", "HOTEL"], + default=None, + help="Alias for hardness without revealing labels in output (ECHO=easy, FOXTROT=medium, GOLF=hard, HOTEL=extreme).", + ) parser.add_argument( "--run-dir", dest="run_dir", @@ -48,6 +54,10 @@ def _git_sha() -> str | None: def main() -> None: args = parse_args() + proword_map = {"ECHO": "easy", "FOXTROT": "medium", "GOLF": "hard", "HOTEL": "extreme"} + hardness = args.hardness + if args.profile: + hardness = proword_map[args.profile] actual_seed = args.seed if args.seed is not None else random.randint(0, 1_000_000) concepts = get_default_concepts() blueprint = get_default_blueprint() diff --git a/render_text.py b/render_text.py index 51da112..7edaa0b 100644 --- a/render_text.py +++ b/render_text.py @@ -106,26 +106,161 @@ def render_key(data: Dict[str, Any]) -> str: return "\n".join(lines) -def _write_pdf(text: str, pdf_path: str, title: str) -> None: - """Render plain text to PDF via pandoc + pdflatex if available.""" - pandoc = shutil.which("pandoc") +def _tex_escape(s: str) -> str: + """Escape LaTeX special characters.""" + replacements = { + "\\": r"\textbackslash{}", + "{": r"\{", + "}": r"\}", + "#": r"\#", + "$": r"\$", + "%": r"\%", + "&": r"\&", + "_": r"\_", + "~": r"\textasciitilde{}", + "^": r"\textasciicircum{}", + } + return "".join(replacements.get(ch, ch) for ch in s) + + +def _booklet_latex(data: Dict[str, Any]) -> str: + meta = data.get("meta", {}) + info_bits = [] + if meta.get("seed") is not None: + info_bits.append(f"seed={meta['seed']}") + if meta.get("git_sha"): + info_bits.append(f"sha={str(meta['git_sha'])[:7]}") + params = meta.get("generation_params") or {} + if params: + joined = ", ".join(f"{k}={v}" for k, v in sorted(params.items())) + info_bits.append(f"params: {joined}") + header_line = "" + if info_bits: + safe_bits = [_tex_escape(bit) for bit in info_bits] + header_line = f"{{\\small\\texttt{{Test Version: {'; '.join(safe_bits)}}}}}\\\\[6pt]" + + lines = [ + r"\documentclass[10pt]{article}", + r"\usepackage[margin=0.75in]{geometry}", + r"\usepackage[T1]{fontenc}", + r"\usepackage[scaled=0.95]{helvet}", + r"\usepackage{microtype}", + r"\usepackage{enumitem}", + r"\usepackage{needspace}", + r"\renewcommand{\familydefault}{\sfdefault}", + r"\setlength{\parskip}{4pt}", + r"\setlength{\parindent}{0pt}", + r"\setlist[itemize]{leftmargin=*,itemsep=2pt,topsep=2pt}", + r"\begin{document}", + r"\section*{Alan's Language Aptitude iNstrument (ALAN)}", + ] + if header_line: + lines.append(header_line) + if meta.get("instructions"): + lines.append(_tex_escape(meta["instructions"])) + if meta.get("rules"): + lines.append(r"\subsection*{Grammar Cheat Sheet}") + lines.append(r"\begin{itemize}") + for rule in meta["rules"]: + lines.append(rf"\item {_tex_escape(rule)}") + lines.append(r"\end{itemize}") + dict_data = meta.get("dictionary", {}) + if dict_data: + lines.append(r"\subsection*{Starter Dictionary}") + for title, group in [ + ("Nouns", dict_data.get("nouns", {})), + ("Verbs", dict_data.get("verbs", {})), + ("Adjectives", dict_data.get("adjectives", {})), + ]: + lines.append(rf"\paragraph*{{{_tex_escape(title)}}}") + lines.append(r"\begin{itemize}") + for eng, lang in group.items(): + lines.append(rf"\item {_tex_escape(eng)} = {_tex_escape(lang)}") + lines.append(r"\end{itemize}") + + for sec_idx, section in enumerate(data.get("sections", [])): + lines.append(r"\newpage") + lines.append(rf"\section*{{Section {sec_idx + 1}}}") + for intro in section.get("intro_text", []): + lines.append(_tex_escape(intro)) + # breathing room before first question + lines.append(r"") + lines.append(r"") + for q in section.get("questions", []): + lines.append(r"\needspace{12\baselineskip}") + lines.append(rf"\noindent\textbf{{{q['number']}. {_tex_escape(q['stem'])}}}") + lines.append(r"\begin{itemize}[leftmargin=1.25em]") + for opt in q.get("options", []): + lines.append(rf"\item[{_tex_escape(opt['label'])})] {_tex_escape(opt['text'])}") + lines.append(r"\end{itemize}") + lines.append(r"\end{document}") + return "\n".join(lines) + + +def _key_latex(data: Dict[str, Any]) -> str: + lines = [ + r"\documentclass[10pt]{article}", + r"\usepackage[margin=0.75in]{geometry}", + r"\usepackage[T1]{fontenc}", + r"\usepackage[scaled=0.95]{helvet}", + r"\usepackage{microtype}", + r"\usepackage{enumitem}", + r"\usepackage{needspace}", + r"\renewcommand{\familydefault}{\sfdefault}", + r"\setlength{\parskip}{4pt}", + r"\setlength{\parindent}{0pt}", + r"\setlist[itemize]{leftmargin=*,itemsep=2pt,topsep=2pt}", + r"\begin{document}", + r"\section*{Answer Key}", + ] + meta = data.get("meta", {}) + info_bits = [] + if meta.get("seed") is not None: + info_bits.append(f"seed={meta['seed']}") + if meta.get("git_sha"): + info_bits.append(f"sha={str(meta['git_sha'])[:7]}") + params = meta.get("generation_params") or {} + if params: + joined = ", ".join(f"{k}={v}" for k, v in sorted(params.items())) + info_bits.append(f"params: {joined}") + if info_bits: + safe_bits = [_tex_escape(bit) for bit in info_bits] + lines.append(f"{{\\small\\texttt{{Test Version: {'; '.join(safe_bits)}}}}}\\\\[6pt]") + + for section in data.get("sections", []): + for q in section.get("questions", []): + correct = next((o for o in q["options"] if o["is_correct"]), None) + lines.append(rf"\noindent\textbf{{{q['number']}: {_tex_escape(correct['label'] if correct else '?')}}}") + lines.append(r"\begin{itemize}[leftmargin=1.25em]") + for opt in q["options"]: + mark = " (correct)" if opt["is_correct"] else "" + lines.append(rf"\item[{_tex_escape(opt['label'])})] {_tex_escape(opt['text'])}{_tex_escape(mark)}") + lines.append(r"\end{itemize}") + lines.append(r"\end{document}") + return "\n".join(lines) + + +def _write_pdf(latex_source: str, pdf_path: str) -> None: + """Render LaTeX source to PDF using pdflatex directly.""" pdflatex = shutil.which("pdflatex") - if not pandoc or not pdflatex: - print("pandoc + pdflatex required for PDF generation; skipping.", file=sys.stderr) + if not pdflatex: + print("pdflatex required for PDF generation; skipping.", file=sys.stderr) return - with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as tmp: - tmp.write(f"{title}\n\n{text}") - tmp_path = tmp.name - try: - cmd = [pandoc, tmp_path, "-o", pdf_path, "--from", "gfm", "--pdf-engine", "pdflatex"] - subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - except subprocess.CalledProcessError: - print("pandoc + pdflatex failed to produce PDF.", file=sys.stderr) - finally: + with tempfile.TemporaryDirectory() as tmpdir: + tex_path = os.path.join(tmpdir, "doc.tex") + with open(tex_path, "w", encoding="utf-8") as tf: + tf.write(latex_source) try: - os.remove(tmp_path) - except OSError: - pass + subprocess.run( + [pdflatex, "-interaction=nonstopmode", "-halt-on-error", "doc.tex"], + cwd=tmpdir, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + shutil.copyfile(os.path.join(tmpdir, "doc.pdf"), pdf_path) + except subprocess.CalledProcessError: + print("pdflatex failed to produce PDF.", file=sys.stderr) def main() -> None: @@ -139,9 +274,9 @@ def main() -> None: with open(args.key_out, "w", encoding="utf-8") as f: f.write(key_text) if args.test_pdf: - _write_pdf(booklet_text, args.test_pdf, "ALAN Booklet") + _write_pdf(_booklet_latex(data), args.test_pdf) if args.key_pdf: - _write_pdf(key_text, args.key_pdf, "ALAN Answer Key") + _write_pdf(_key_latex(data), args.key_pdf) run_dir = data.get("meta", {}).get("run_dir") if run_dir and os.path.isdir(run_dir): for src in [args.test_out, args.key_out, args.test_pdf, args.key_pdf]: