git » alan.git » master » property

"""Property-based checks for ALAN generation."""
from __future__ import annotations

import json
import sys
from typing import Dict

from language_spec import (
    generate_language_instance,
    SentenceFeatures,
    NPFeature,
    realize_sentence,
    FEMININE_NOUNS,
    english_gloss,
)
from grammar_check import is_grammatical
from semantic import to_meaning, meanings_equal, semantic_distance
from language_coherence import check_coherence
from meta_schema import validate_schema

# Property thresholds (can be overridden via params)
DIST_MIN = 1
DIST_MAX = 1
MIN_IRREG_USE = 6
MIN_IRREG_CONTRAST = 4
MIN_IRREG_DISTRACTOR = 4
MIN_DITRANSITIVE = 8
MIN_PLURAL_ITEMS = 12
MIN_ADJECTIVE_ITEMS = 12
MIN_FEM_PLURAL_ITEMS = 6
MIN_PAST_SUFFIX_ENFORCED = True

def load(path: str) -> Dict:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def one_correct(q: Dict) -> bool:
    return sum(1 for o in q.get("options", []) if o.get("is_correct")) == 1


def unique_options(q: Dict) -> bool:
    texts = [o["text"] for o in q.get("options", [])]
    return len(texts) == len(set(texts)) and len(texts) == 4


def verb_last(text: str, verbs: Dict[str, str]) -> bool:
    tokens = text.strip().split()
    if not tokens:
        return False
    last = tokens[-1]
    return last in verbs.values() or last.endswith("mimu") or last == "rontmimu"


def _np_from_dict(d: Dict) -> SentenceFeatures:
    return NPFeature(
        noun_id=d["noun_id"],
        feminine=d["feminine"],
        plural=d["plural"],
        adjectives=d["adjectives"],
        role=d["role"],
        use_irregular=d.get("use_irregular", True),
    )


def option_matches_features(opt: Dict, spec) -> bool:
    feat = opt.get("features")
    if not feat:
        return False  # must be present
    sf = SentenceFeatures(
        subject=_np_from_dict(feat["subject"]),
        obj1=_np_from_dict(feat["obj1"]),
        obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None,
        verb_id=feat["verb_id"],
        tense=feat["tense"],
        use_irregular_verb=feat.get("use_irregular_verb", True),
    )
    return opt["text"] == realize_sentence(spec, sf)



def check_irregulars(
    data: Dict,
    spec,
    min_use: int = MIN_IRREG_USE,
    min_contrast: int = MIN_IRREG_CONTRAST,
    min_distractors: int = MIN_IRREG_DISTRACTOR,
) -> bool:
    surfaces_correct = []
    contrast_items = 0
    distractor_irregular = 0
    dual_contrast = {"letul": False, "rontmimu": False}
    for sec in data.get("sections", []):
        for q in sec.get("questions", []):
            opts = q.get("options", [])
            correct = next(o for o in opts if o.get("is_correct"))
            feat = correct.get("features")
            if feat:
                sf = SentenceFeatures(
                    subject=_np_from_dict(feat["subject"]),
                    obj1=_np_from_dict(feat["obj1"]),
                    obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None,
                    verb_id=feat["verb_id"],
                    tense=feat["tense"],
                )
                surfaces_correct.append(realize_sentence(spec, sf))
            else:
                surfaces_correct.append(correct["text"])
            # check contrasts: correct irregular vs regular-like distractor
            corr_text = correct["text"]
            has_contrast = any(
                (("letul" in corr_text and "letul" not in o["text"])
                 or ("rontmimu" in corr_text and "rontmimu" not in o["text"])
                 or ("letul" not in corr_text and "letul" in o["text"])
                 or ("rontmimu" not in corr_text and "rontmimu" in o["text"]))
                for o in opts if not o.get("is_correct")
            )
            if has_contrast:
                contrast_items += 1
            distractor_irregular += sum(
                ("letul" in o["text"]) or ("rontmimu" in o["text"])
                for o in opts
                if not o.get("is_correct")
            )
            # ensure contrastive use where both correct and a distractor use same irregular form
            if ("letul" in corr_text and any("letul" in o["text"] for o in opts if not o.get("is_correct"))):
                dual_contrast["letul"] = True
            if ("rontmimu" in corr_text and any("rontmimu" in o["text"] for o in opts if not o.get("is_correct"))):
                dual_contrast["rontmimu"] = True
    letul_count = sum("letul" in t for t in surfaces_correct)
    ront_count = sum("rontmimu" in t for t in surfaces_correct)
    return (
        letul_count >= min_use
        and ront_count >= min_use
        and contrast_items >= min_contrast
        and distractor_irregular >= min_distractors
        and all(dual_contrast.values())
    )


def meanings_unique_in_options(q: Dict, spec) -> bool:
    """Ensure no two options share identical meaning."""
    meanings = []
    for opt in q["options"]:
        feat = opt.get("features")
        if not feat:
            return False
        sf = SentenceFeatures(
            subject=_np_from_dict(feat["subject"]),
            obj1=_np_from_dict(feat["obj1"]),
            obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None,
            verb_id=feat["verb_id"],
            tense=feat["tense"],
            use_irregular_verb=feat.get("use_irregular_verb", True),
        )
        m = to_meaning(sf)
        if any(meanings_equal(m, existing) for existing in meanings):
            return False
            meanings.append(m)
    return True


def unique_correct_answers(data: Dict, spec) -> bool:
    """Ensure no correct answer surface or meaning repeats across the test."""
    seen_meanings = []
    seen_surfaces = set()
    for sec in data.get("sections", []):
        for q in sec.get("questions", []):
            correct = next((o for o in q.get("options", []) if o.get("is_correct")), None)
            if not correct:
                return False
            feat = correct.get("features")
            if not feat:
                return False
            sf = SentenceFeatures(
                subject=_np_from_dict(feat["subject"]),
                obj1=_np_from_dict(feat["obj1"]),
                obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None,
                verb_id=feat["verb_id"],
                tense=feat["tense"],
                use_irregular_verb=feat.get("use_irregular_verb", True),
            )
            meaning = to_meaning(sf)
            surface = realize_sentence(spec, sf)
            if surface in seen_surfaces:
                return False
            if any(meanings_equal(meaning, m) for m in seen_meanings):
                return False
            seen_surfaces.add(surface)
            seen_meanings.append(meaning)
    return True


def check_role_number_uniqueness(spec) -> bool:
    nouns = spec.lexicon["nouns"]
    for noun_id in nouns:
        forms = set()
        np_sg = NPFeature(noun_id=noun_id, feminine=False, plural=False, adjectives=[], role="RECIPIENT")
        np_pl = NPFeature(noun_id=noun_id, feminine=False, plural=True, adjectives=[], role="RECIPIENT")
        sg_surface = realize_sentence(spec, SentenceFeatures(np_sg, np_sg, None, "see", "PRES")).split()[1]
        pl_surface = realize_sentence(spec, SentenceFeatures(np_pl, np_pl, None, "see", "PRES")).split()[1]
        forms.update([sg_surface, pl_surface])
        if noun_id in FEMININE_NOUNS:
            np_fp = NPFeature(noun_id=noun_id, feminine=True, plural=True, adjectives=[], role="RECIPIENT")
            fp_surface = realize_sentence(
                spec, SentenceFeatures(np_fp, np_fp, None, "see", "PRES")
            ).split()[1]
            forms.add(fp_surface)
        if len(forms) != (3 if noun_id in FEMININE_NOUNS else 2):
            return False
        # receiver marking
        if not sg_surface.startswith("na") or not pl_surface.startswith("na"):
            return False
        if noun_id in FEMININE_NOUNS and not fp_surface.startswith("na"):
            return False
    return True


def check_tense_uniqueness(spec) -> bool:
    for vid, stem in spec.lexicon["verbs"].items():
        pres = stem
        past = spec.irregular_verbs.get(vid, {}).get("PAST", stem + "mimu")
        if pres == past:
            return False
    return True


def check_structural_diversity(
    data: Dict,
    min_irregular: int = MIN_IRREG_USE,
    min_ditransitive: int = MIN_DITRANSITIVE,
    min_plural: int = MIN_PLURAL_ITEMS,
    min_adjective: int = MIN_ADJECTIVE_ITEMS,
    min_fem_plural: int = MIN_FEM_PLURAL_ITEMS,
) -> bool:
    irregular_items = 0
    ditransitive_items = 0
    plural_items = 0
    adjective_items = 0
    fem_plural_items = 0
    for sec in data.get("sections", []):
        for q in sec.get("questions", []):
            correct = next(o for o in q.get("options", []) if o.get("is_correct"))
            feat = correct.get("features")
            if not feat:
                continue
            if correct["text"].find("rontmimu") != -1 or correct["text"].find("letul") != -1:
                irregular_items += 1
            if feat["verb_id"] == "give":
                ditransitive_items += 1
            if feat["subject"]["plural"] or feat["obj1"]["plural"] or (feat.get("obj2") and feat["obj2"]["plural"]):
                plural_items += 1
            if feat["subject"]["adjectives"] or feat["obj1"]["adjectives"] or (feat.get("obj2") and feat["obj2"]["adjectives"]):
                adjective_items += 1
            if (feat["subject"]["feminine"] and feat["subject"]["plural"]) or (
                feat["obj1"]["feminine"] and feat["obj1"]["plural"]
            ):
                fem_plural_items += 1
    return (
        irregular_items >= min_irregular
        and ditransitive_items >= min_ditransitive
        and plural_items >= min_plural
        and adjective_items >= min_adjective
        and fem_plural_items >= min_fem_plural
    )


def check_semantics(q: Dict, spec) -> bool:
    correct = next(o for o in q["options"] if o.get("is_correct"))
    feat = correct.get("features")
    if not feat:
        return False
    correct_sf = SentenceFeatures(
        subject=_np_from_dict(feat["subject"]),
        obj1=_np_from_dict(feat["obj1"]),
        obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None,
        verb_id=feat["verb_id"],
        tense=feat["tense"],
        use_irregular_verb=feat.get("use_irregular_verb", True),
    )
    target_meaning = to_meaning(correct_sf)
    target_gloss = english_gloss(correct_sf)
    # Exactly one matches target
    matches = 0
    for opt in q["options"]:
        ofeat = opt.get("features")
        if not ofeat:
            continue
        sf = SentenceFeatures(
            subject=_np_from_dict(ofeat["subject"]),
            obj1=_np_from_dict(ofeat["obj1"]),
            obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None,
            verb_id=ofeat["verb_id"],
            tense=ofeat["tense"],
            use_irregular_verb=ofeat.get("use_irregular_verb", True),
        )
        if meanings_equal(to_meaning(sf), target_meaning):
            matches += 1
    if matches != 1:
        return False
    # Only the correct option should share the English gloss to avoid ambiguous readings.
    for opt in q["options"]:
        if opt.get("is_correct"):
            continue
        ofeat = opt.get("features")
        if not ofeat:
            continue
        sf = SentenceFeatures(
            subject=_np_from_dict(ofeat["subject"]),
            obj1=_np_from_dict(ofeat["obj1"]),
            obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None,
            verb_id=ofeat["verb_id"],
            tense=ofeat["tense"],
            use_irregular_verb=ofeat.get("use_irregular_verb", True),
        )
        if english_gloss(sf) == target_gloss:
            return False
    # Distractors differ by 1..2 features
    for opt in q["options"]:
        if opt.get("is_correct"):
            continue
        ofeat = opt.get("features")
        if not ofeat:
            continue
        sf = SentenceFeatures(
            subject=_np_from_dict(ofeat["subject"]),
            obj1=_np_from_dict(ofeat["obj1"]),
            obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None,
            verb_id=ofeat["verb_id"],
            tense=ofeat["tense"],
        )
        dist = semantic_distance(to_meaning(sf), target_meaning)
        if dist < DIST_MIN or dist > DIST_MAX:
            return False
        # Distractor must be grammatical per canonical renderer
        if not is_grammatical(opt, spec):
            return False
        # disallow meaning collisions
        for other in q["options"]:
            if other is opt:
                continue
            ofeat = other.get("features")
            if not ofeat:
                continue
            other_sf = SentenceFeatures(
                subject=_np_from_dict(ofeat["subject"]),
                obj1=_np_from_dict(ofeat["obj1"]),
                obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None,
                verb_id=ofeat["verb_id"],
                tense=ofeat["tense"],
                use_irregular_verb=ofeat.get("use_irregular_verb", True),
            )
            if meanings_equal(to_meaning(other_sf), to_meaning(sf)):
                return False
    return True


def check_prefix_and_scope(opt: Dict, spec) -> bool:
    """Validate na-scope, mur on doer, adjectives trailing."""
    feat = opt.get("features")
    if not feat:
        return False
    sf = SentenceFeatures(
        subject=_np_from_dict(feat["subject"]),
        obj1=_np_from_dict(feat["obj1"]),
        obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None,
        verb_id=feat["verb_id"],
        tense=feat["tense"],
        use_irregular_verb=feat.get("use_irregular_verb", True),
    )
    surface = realize_sentence(spec, sf).split()
    subj_tokens = []
    obj1_tokens = []
    obj2_tokens = []
    if sf.obj2:
        subj_tokens = surface[0 : 1 + len(sf.subject.adjectives)]
        obj1_tokens = surface[1 + len(sf.subject.adjectives) : 2 + len(sf.subject.adjectives) + len(sf.obj1.adjectives)]
        obj2_tokens = surface[2 + len(sf.subject.adjectives) + len(sf.obj1.adjectives) : -1]
    else:
        subj_tokens = surface[0 : 1 + len(sf.subject.adjectives)]
        obj1_tokens = surface[1 + len(sf.subject.adjectives) : -1]
    subj_head = subj_tokens[0] if subj_tokens else ""
    obj1_head = obj1_tokens[0] if obj1_tokens else ""
    if not subj_head.endswith("mur"):
        return False
    if not obj1_head.startswith("na"):
        return False
    if sf.obj2:
        obj2_head = obj2_tokens[0] if obj2_tokens else ""
        if not obj2_head.startswith("na"):
            return False
    # adjectives follow noun head: already ensured by slices, but assert lengths
    if len(subj_tokens) != 1 + len(sf.subject.adjectives):
        return False
    if len(obj1_tokens) != 1 + len(sf.obj1.adjectives):
        return False
    if sf.obj2 and len(obj2_tokens) != 1 + len(sf.obj2.adjectives):
        return False
    return True


def check_adjective_position(opt_text: str) -> bool:
    return True  # rendering enforces adjective position


def check_na_scope(opt_text: str) -> bool:
    return True  # enforced by rendering


def main() -> None:
    if len(sys.argv) != 2:
        print("Usage: python property_tests.py generated_test.json")
        sys.exit(1)
    data = load(sys.argv[1])
    spec = generate_language_instance()
    ok = validate_data(data, spec)
    sys.exit(0 if ok else 1)


def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None, quiet: bool = False) -> bool:
    if spec is None:
        spec = generate_language_instance()
    o = overrides or {}
    min_irregular = o.get("min_irregular", MIN_IRREG_USE)
    min_irregular_contrast = o.get("min_irregular_contrast", MIN_IRREG_CONTRAST)
    min_irregular_distractor = o.get("min_irregular_distractor", MIN_IRREG_DISTRACTOR)
    min_ditransitive = o.get("min_ditransitive", MIN_DITRANSITIVE)
    min_plural = o.get("min_plural", MIN_PLURAL_ITEMS)
    min_adjective = o.get("min_adjective", MIN_ADJECTIVE_ITEMS)
    min_fem_plural = o.get("min_fem_plural", MIN_FEM_PLURAL_ITEMS)
    verbs = spec.lexicon["verbs"]
    ok = True
    questions = [q for s in data.get("sections", []) for q in s.get("questions", [])]
    for q in questions:
        if not one_correct(q):
            ok = False
            if not quiet:
                print(f"FAIL one_correct for Q{q.get('number')}")
        if not unique_options(q):
            ok = False
            if not quiet:
                print(f"FAIL unique_options for Q{q.get('number')}")
        for opt in q.get("options", []):
            if not verb_last(opt["text"], verbs):
                ok = False
                if not quiet:
                    print(f"FAIL verb_last for Q{q.get('number')} option {opt['label']}")
            if not option_matches_features(opt, spec):
                ok = False
                if not quiet:
                    print(f"FAIL feature match for Q{q.get('number')} option {opt['label']}")
            if not check_adjective_position(opt["text"]):
                ok = False
                if not quiet:
                    print(f"FAIL adjective position for Q{q.get('number')} option {opt['label']}")
            if not check_na_scope(opt["text"]):
                ok = False
                if not quiet:
                    print(f"FAIL na-scope for Q{q.get('number')} option {opt['label']}")
            if not is_grammatical(opt, spec):
                ok = False
                if not quiet:
                    print(f"FAIL grammar check for Q{q.get('number')} option {opt['label']}")
        if not check_semantics(q, spec):
            ok = False
            if not quiet:
                print(f"FAIL semantic uniqueness/distances for Q{q.get('number')}")
    for q in questions:
        if not meanings_unique_in_options(q, spec):
            ok = False
            if not quiet:
                print(f"FAIL meanings unique across options for Q{q.get('number')}")
        for opt in q.get("options", []):
            if not check_prefix_and_scope(opt, spec):
                ok = False
                if not quiet:
                    print(f"FAIL prefix/scope for Q{q.get('number')} option {opt['label']}")
    if not unique_correct_answers(data, spec):
        ok = False
        if not quiet:
            print("FAIL unique correct answers across test")
    if not check_irregulars(
        data,
        spec,
        min_use=min_irregular,
        min_contrast=min_irregular_contrast,
        min_distractors=min_irregular_distractor,
    ):
        ok = False
        if not quiet:
            print("FAIL irregular coverage (need >=3 letul and >=3 rontmimu in correct answers)")
    if not check_role_number_uniqueness(spec):
        ok = False
        if not quiet:
            print("FAIL role/number uniqueness (singular/plural/fem-plural must be distinct and na-prefixed)")
    if not check_tense_uniqueness(spec):
        ok = False
        if not quiet:
            print("FAIL tense uniqueness (present and past forms must differ)")
    if not check_structural_diversity(
        data,
        min_irregular=min_irregular,
        min_ditransitive=min_ditransitive,
        min_plural=min_plural,
        min_adjective=min_adjective,
        min_fem_plural=min_fem_plural,
    ):
        ok = False
        if not quiet:
            print("FAIL structural diversity quotas")
    if not check_coherence(data):
        ok = False
        if not quiet:
            print("FAIL language coherence checks")
    if not validate_schema(data):
        ok = False
        if not quiet:
            print("FAIL JSON schema validation")
    if ok and not quiet:
        print("All property tests passed.")
    return ok


if __name__ == "__main__":
    main()
git » alan.git » master » tree

[master] / property_tests.py