git » alan.git » master » tree

[master] / grade_answers.py

"""Score a set of answers against a generated test JSON."""
from __future__ import annotations

import argparse
import json
from typing import List, Dict, Any


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Grade answers for a generated test JSON.")
    parser.add_argument("test_json", help="Path to generated_test.json")
    parser.add_argument("answers_file", help="Path to newline-delimited answers (e.g., A\\nB\\nC...)")
    parser.add_argument(
        "--show-details",
        action="store_true",
        help="Print per-question correctness details.",
    )
    return parser.parse_args()


def load_test(path: str) -> List[Dict[str, Any]]:
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    questions: List[Dict[str, Any]] = []
    for section in data.get("sections", []):
        questions.extend(section.get("questions", []))
    questions.sort(key=lambda q: q.get("number", 0))
    return questions


def load_answers(path: str) -> List[str]:
    labels: List[str] = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            stripped = line.strip().upper()
            if stripped:
                labels.append(stripped)
    return labels


def score(questions: List[Dict[str, Any]], answers: List[str]) -> Dict[str, Any]:
    results = []
    correct_count = 0
    for idx, question in enumerate(questions):
        provided = answers[idx] if idx < len(answers) else None
        correct_label = next(
            (opt["label"] for opt in question.get("options", []) if opt.get("is_correct")), None
        )
        is_correct = provided == correct_label
        if is_correct:
            correct_count += 1
        results.append(
            {
                "number": question.get("number"),
                "provided": provided,
                "correct": correct_label,
                "is_correct": is_correct,
            }
        )
    return {
        "total": len(questions),
        "answered": min(len(answers), len(questions)),
        "correct": correct_count,
        "results": results,
    }


def main() -> None:
    args = parse_args()
    questions = load_test(args.test_json)
    answers = load_answers(args.answers_file)
    summary = score(questions, answers)

    print(f"Scored {summary['answered']} of {summary['total']} questions.")
    print(f"Correct: {summary['correct']} / {summary['total']}")
    if args.show_details:
        for r in summary["results"]:
            provided = r['provided'] if r['provided'] is not None else "-"
            marker = "✓" if r["is_correct"] else "✗"
            print(f"Q{r['number']}: provided {provided}, correct {r['correct']} {marker}")


if __name__ == "__main__":
    main()