git » alan.git » master » tree

[master] / validate_test.py

"""Validator for ALAN grammar and test invariants."""
from __future__ import annotations

import json
import sys
from typing import Dict

from language_spec import (
    generate_language_instance,
    SentenceFeatures,
    NPFeature,
    AGENT,
    RECIPIENT,
    THEME,
    realize_sentence,
)


def load(path: str) -> Dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def has_one_correct(question: Dict) -> bool:
    return sum(1 for o in question.get("options", []) if o.get("is_correct")) == 1


def unique_options(question: Dict) -> bool:
    texts = [o["text"] for o in question.get("options", [])]
    return len(texts) == len(set(texts))


def validate_option(text: str, sf: SentenceFeatures, spec) -> bool:
    return text == realize_sentence(spec, sf)


def parse_surface(text: str) -> SentenceFeatures | None:
    # Not a full parser—reject parsing; rely on generation path for correctness.
    return None


def validate_test(data: Dict) -> int:
    errors = 0
    spec = generate_language_instance()
    questions = [q for s in data.get("sections", []) for q in s.get("questions", [])]
    if len(questions) != 32:
        print(f"ERROR: expected 32 questions, found {len(questions)}")
        errors += 1
    for q in questions:
        if not has_one_correct(q):
            print(f"ERROR: question {q.get('number')} has !=1 correct option")
            errors += 1
        if not unique_options(q):
            print(f"ERROR: question {q.get('number')} has duplicate option text")
            errors += 1
    if errors == 0:
        print("Validation passed.")
    else:
        print(f"Validation failed with {errors} issue(s).")
    return errors


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python validate_test.py generated_test.json")
        sys.exit(1)
    data = load(sys.argv[1])
    sys.exit(validate_test(data))