"""Lightweight schema checks for ALAN JSON structures."""
from __future__ import annotations
from typing import Any, Dict, List
def _is_bool(x: Any) -> bool:
return isinstance(x, bool)
def _is_str(x: Any) -> bool:
return isinstance(x, str)
def _is_list_of_str(x: Any) -> bool:
return isinstance(x, list) and all(isinstance(i, str) for i in x)
def _check_np(feat: Dict[str, Any]) -> bool:
required = {"noun_id", "feminine", "plural", "adjectives", "role", "use_irregular"}
if not isinstance(feat, dict) or not required.issubset(feat.keys()):
return False
return (
_is_str(feat["noun_id"])
and _is_bool(feat["feminine"])
and _is_bool(feat["plural"])
and isinstance(feat["adjectives"], list)
and _is_str(feat["role"])
and _is_bool(feat["use_irregular"])
)
def _check_features(f: Dict[str, Any]) -> bool:
required = {"subject", "obj1", "verb_id", "tense", "use_irregular_verb"}
if not isinstance(f, dict) or not required.issubset(f.keys()):
return False
if not _check_np(f["subject"]) or not _check_np(f["obj1"]):
return False
if f.get("obj2") is not None and not _check_np(f["obj2"]):
return False
return _is_str(f["verb_id"]) and _is_str(f["tense"]) and _is_bool(f["use_irregular_verb"])
def validate_schema(data: Dict[str, Any]) -> bool:
"""Shallow structural validation for generated JSON."""
meta = data.get("meta", {})
if not (_is_str(meta.get("version", "")) and _is_str(meta.get("description", ""))):
return False
if not isinstance(meta.get("rules", []), list):
return False
dictionary = meta.get("dictionary", {})
for part in ("nouns", "verbs", "adjectives"):
if not isinstance(dictionary.get(part, {}), dict):
return False
if not all(_is_str(k) and _is_str(v) for k, v in dictionary.get(part, {}).items()):
return False
sections: List[Dict[str, Any]] = data.get("sections", [])
if not isinstance(sections, list) or not sections:
return False
for sec in sections:
if not _is_str(sec.get("id", "")):
return False
if not isinstance(sec.get("questions", []), list):
return False
for q in sec.get("questions", []):
if not all(_is_str(q.get(k, "")) for k in ("id", "item_type", "section_id", "stem")):
return False
if not isinstance(q.get("options", []), list) or len(q["options"]) != 4:
return False
for opt in q["options"]:
if not (_is_str(opt.get("label", "")) and _is_str(opt.get("text", ""))):
return False
if not _is_bool(opt.get("is_correct", False)):
return False
if not _is_str(opt.get("explanation", "")):
return False
if not _check_features(opt.get("features", {})):
return False
return True