git » alan.git » master » tree

[master] / meta_schema.py

"""Lightweight schema checks for ALAN JSON structures."""
from __future__ import annotations

from typing import Any, Dict, List


def _is_bool(x: Any) -> bool:
    return isinstance(x, bool)


def _is_str(x: Any) -> bool:
    return isinstance(x, str)


def _is_list_of_str(x: Any) -> bool:
    return isinstance(x, list) and all(isinstance(i, str) for i in x)


def _check_np(feat: Dict[str, Any]) -> bool:
    required = {"noun_id", "feminine", "plural", "adjectives", "role", "use_irregular"}
    if not isinstance(feat, dict) or not required.issubset(feat.keys()):
        return False
    return (
        _is_str(feat["noun_id"])
        and _is_bool(feat["feminine"])
        and _is_bool(feat["plural"])
        and isinstance(feat["adjectives"], list)
        and _is_str(feat["role"])
        and _is_bool(feat["use_irregular"])
    )


def _check_features(f: Dict[str, Any]) -> bool:
    required = {"subject", "obj1", "verb_id", "tense", "use_irregular_verb"}
    if not isinstance(f, dict) or not required.issubset(f.keys()):
        return False
    if not _check_np(f["subject"]) or not _check_np(f["obj1"]):
        return False
    if f.get("obj2") is not None and not _check_np(f["obj2"]):
        return False
    return _is_str(f["verb_id"]) and _is_str(f["tense"]) and _is_bool(f["use_irregular_verb"])


def validate_schema(data: Dict[str, Any]) -> bool:
    """Shallow structural validation for generated JSON."""
    meta = data.get("meta", {})
    if not (_is_str(meta.get("version", "")) and _is_str(meta.get("description", ""))):
        return False
    if not isinstance(meta.get("rules", []), list):
        return False
    dictionary = meta.get("dictionary", {})
    for part in ("nouns", "verbs", "adjectives"):
        if not isinstance(dictionary.get(part, {}), dict):
            return False
        if not all(_is_str(k) and _is_str(v) for k, v in dictionary.get(part, {}).items()):
            return False

    sections: List[Dict[str, Any]] = data.get("sections", [])
    if not isinstance(sections, list) or not sections:
        return False
    for sec in sections:
        if not _is_str(sec.get("id", "")):
            return False
        if not isinstance(sec.get("questions", []), list):
            return False
        for q in sec.get("questions", []):
            if not all(_is_str(q.get(k, "")) for k in ("id", "item_type", "section_id", "stem")):
                return False
            if not isinstance(q.get("options", []), list) or len(q["options"]) != 4:
                return False
            for opt in q["options"]:
                if not (_is_str(opt.get("label", "")) and _is_str(opt.get("text", ""))):
                    return False
                if not _is_bool(opt.get("is_correct", False)):
                    return False
                if not _is_str(opt.get("explanation", "")):
                    return False
                if not _check_features(opt.get("features", {})):
                    return False
    return True