"""Score a set of answers against a generated test JSON."""
from __future__ import annotations
import argparse
import json
from typing import List, Dict, Any
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Grade answers for a generated test JSON.")
parser.add_argument("test_json", help="Path to generated_test.json")
parser.add_argument("answers_file", help="Path to newline-delimited answers (e.g., A\\nB\\nC...)")
parser.add_argument(
"--show-details",
action="store_true",
help="Print per-question correctness details.",
)
return parser.parse_args()
def load_test(path: str) -> List[Dict[str, Any]]:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
questions: List[Dict[str, Any]] = []
for section in data.get("sections", []):
questions.extend(section.get("questions", []))
questions.sort(key=lambda q: q.get("number", 0))
return questions
def load_answers(path: str) -> List[str]:
labels: List[str] = []
with open(path, "r", encoding="utf-8") as f:
for line in f:
stripped = line.strip().upper()
if stripped:
labels.append(stripped)
return labels
def score(questions: List[Dict[str, Any]], answers: List[str]) -> Dict[str, Any]:
results = []
correct_count = 0
for idx, question in enumerate(questions):
provided = answers[idx] if idx < len(answers) else None
correct_label = next(
(opt["label"] for opt in question.get("options", []) if opt.get("is_correct")), None
)
is_correct = provided == correct_label
if is_correct:
correct_count += 1
results.append(
{
"number": question.get("number"),
"provided": provided,
"correct": correct_label,
"is_correct": is_correct,
}
)
return {
"total": len(questions),
"answered": min(len(answers), len(questions)),
"correct": correct_count,
"results": results,
}
def main() -> None:
args = parse_args()
questions = load_test(args.test_json)
answers = load_answers(args.answers_file)
summary = score(questions, answers)
print(f"Scored {summary['answered']} of {summary['total']} questions.")
print(f"Correct: {summary['correct']} / {summary['total']}")
if args.show_details:
for r in summary["results"]:
provided = r['provided'] if r['provided'] is not None else "-"
marker = "✓" if r["is_correct"] else "✗"
print(f"Q{r['number']}: provided {provided}, correct {r['correct']} {marker}")
if __name__ == "__main__":
main()