git » alan.git » master » tree

[master] / main.py

"""CLI entry point for generating a JSON DLAB-style test (RNG generator by default)."""
from __future__ import annotations

import argparse
import json
import random
import subprocess
import os
from datetime import datetime

from language_spec import generate_language_instance
from test_blueprint import get_default_concepts, get_default_blueprint
from property_tests import validate_data
from generator import generate_test


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Generate an artificial language test JSON.")
    parser.add_argument("--seed", type=int, help="Random seed for reproducibility.", default=None)
    parser.add_argument("--out", dest="out_path", default="generated_test.json", help="Output file name (within run dir).")
    parser.add_argument(
        "--hardness",
        choices=["easy", "medium", "hard", "extreme"],
        default="medium",
        help="Preset coverage targets (medium = default quotas).",
    )
    parser.add_argument(
        "--profile",
        choices=["MAPLE", "CEDAR", "CYPRESS", "SEQUOIA"],
        default=None,
        help="Alias for hardness without revealing labels in output (MAPLE=easy, CEDAR=medium, CYPRESS=hard, SEQUOIA=extreme).",
    )
    parser.add_argument(
        "--run-dir",
        dest="run_dir",
        default=None,
        help="Base directory to create the run subdir under. Defaults to ./runs.",
    )
    parser.add_argument(
        "--run-name",
        dest="run_name",
        default=None,
        help="Optional explicit subdirectory name; otherwise uses timestamp + params.",
    )
    return parser.parse_args()


def _git_sha() -> str | None:
    try:
        return subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()
    except Exception:
        return None


def main() -> None:
    args = parse_args()
    proword_map = {"MAPLE": "easy", "CEDAR": "medium", "CYPRESS": "hard", "SEQUOIA": "extreme"}
    hardness = args.hardness
    if args.profile:
        hardness = proword_map[args.profile]
    actual_seed = args.seed if args.seed is not None else random.randint(0, 1_000_000)
    concepts = get_default_concepts()
    blueprint = get_default_blueprint()

    git_sha = _git_sha()
    spec = generate_language_instance(actual_seed)
    rng = random.Random(actual_seed)
    test_dict = generate_test(spec, blueprint, concepts, rng, seed=actual_seed, git_sha=git_sha, hardness=args.hardness)
    if not validate_data(test_dict, spec, quiet=False):
        raise SystemExit("Property tests failed; test not written.")
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    base_dir = args.run_dir or "runs"
    run_name = args.run_name or f"{timestamp}_seed{actual_seed}_hardness{args.hardness}"
    run_dir = os.path.join(base_dir, run_name)
    os.makedirs(run_dir, exist_ok=True)
    test_dict.setdefault("meta", {})
    test_dict["meta"]["run_dir"] = os.path.abspath(run_dir)
    test_dict["meta"]["run_name"] = run_name
    test_dict["meta"]["run_base_dir"] = os.path.abspath(base_dir)
    out_json = os.path.join(run_dir, os.path.basename(args.out_path))
    with open(out_json, "w", encoding="utf-8") as f:
        json.dump(test_dict, f, indent=2)
    # also copy to requested path (relative to cwd) for compatibility
    if args.out_path and os.path.dirname(args.out_path):
        os.makedirs(os.path.dirname(args.out_path), exist_ok=True)
    with open(args.out_path, "w", encoding="utf-8") as f:
        json.dump(test_dict, f, indent=2)
    print(f"Generated test JSON at {out_json} (seed {actual_seed})")


if __name__ == "__main__":
    main()