| author | Alan Dipert
<alan@dipert.org> 2025-12-04 03:55:25 UTC |
| committer | Alan Dipert
<alan@dipert.org> 2025-12-04 03:55:25 UTC |
| Makefile | +24 | -0 |
| README.md | +101 | -0 |
| grade_answers.py | +84 | -0 |
| grammar_check.py | +51 | -0 |
| language_coherence.py | +33 | -0 |
| language_spec.py | +148 | -0 |
| main.py | +41 | -0 |
| property_tests.py | +449 | -0 |
| render_text.py | +83 | -0 |
| semantic.py | +67 | -0 |
| test_blueprint.py | +140 | -0 |
| test_generator.py | +620 | -0 |
| validate_test.py | +68 | -0 |
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9998d25 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +PYTHON ?= python3 + +GENERATED_JSON := generated_test.json +BOOKLET := test_booklet.txt +KEY := answer_key.txt + +.PHONY: generate render run clean test + +all: run + +generate: + $(PYTHON) main.py --out $(GENERATED_JSON) + +render: generate + $(PYTHON) render_text.py --in $(GENERATED_JSON) --test-out $(BOOKLET) --key-out $(KEY) + +run: generate render + +clean: + rm -f $(GENERATED_JSON) $(BOOKLET) $(KEY) + find . -name "__pycache__" -type d -prune -exec rm -rf {} + + +test: + @echo "No tests implemented yet" diff --git a/README.md b/README.md new file mode 100644 index 0000000..bd20f54 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +# ALAN — Alan's Language Aptitude iNstrument + +ALAN is a fully self-contained artificial-language aptitude assessment inspired by DLAB-style tasks. It generates a consistent micro-grammar, produces a 32-item multiple-choice test, renders a booklet and answer key, and validates every form against strict grammatical and psychometric properties. + +## What This Is +- **Purpose:** Measure rapid rule inference, pattern generalization, and attention to fine-grained grammatical cues—abilities correlated with learning new syntactic systems and with disciplined software engineering (e.g., reading specs, refactoring, reasoning about invariants). +- **Format:** 32 multiple-choice items across sections that introduce rules, then test them with strictly grammatical distractors that differ by exactly one semantic/morphosyntactic feature (minimal pairs). +- **Artifacts produced:** `generated_test.json` (canonical test), `test_booklet.txt` (questions only), `answer_key.txt` (answers with explanations). +- **Dependencies:** Python 3 only, no external libraries. + +## Why It Works (Theory & Inspirations) +- **DLAB-style artificial grammar learning:** Tasks that require inferring a controlled micro-grammar are classic measures of language-learning aptitude. ALAN uses a deterministic grammar with prefix stacking, fixed word order, and minimal irregularities to elicit rule induction rather than memorization. +- **Psychometric design:** Each distractor is a grammatical minimal pair differing by one feature (tense, number, gender, role, adjective scope, regular vs irregular). This reduces guessing via surface errors and increases discriminative power. +- **Reliability controls:** Property-based tests enforce grammar validity, one-correct-answer semantics, irregular contrasts, structural diversity (ditransitives, feminine-plural receivers, adjective-bearing NPs), and coverage quotas. +- **Construct alignment with software practice:** Success requires precise rule following, rapid pattern spotting, and handling edge cases (irregulars)—abilities useful in commercial software roles (debugging, code review, protocol/spec compliance). + +## Quick Start +```bash +make run # generates JSON, booklet, and key +cat test_booklet.txt # view the booklet +cat answer_key.txt # view the key +``` + +## Administering ALAN +1. **Prepare materials:** Run `make run` to produce `test_booklet.txt` and `answer_key.txt`. Print or distribute the booklet only. +2. **Time:** 25–30 minutes is typical for 32 items; you can standardize at 30 minutes for comparability. +3. **Instructions to candidates:** + - “You will see a small dictionary, a short rule cheat sheet, and examples. Every question has four options; exactly one is correct. All sentences follow the published rules—no tricks. Work quickly but carefully.” +4. **Environment:** Quiet room, no external aids. Paper or on-screen is fine. +5. **Scoring:** 1 point per correct item, no guessing penalty. Max = 32. + +## Interpreting Scores (Commercial Software Context) +These bands are informal heuristics, assuming proctored conditions and naïve candidates: +- **27–32 (Excellent):** Strong rule-induction and precision. Likely excels at roles requiring rapid onboarding to new codebases, complex refactors, API/protocol design, formal verification, or compiler/infra work. +- **22–26 (Strong):** Solid pattern learning and attention to detail. Suited to backend/product engineering, systems integration, data engineering; should pick up new stacks quickly. +- **17–21 (Moderate):** Adequate but may need more scaffolding. Good for roles with clearer guardrails (feature work, QA automation, internal tooling) where patterns are stable. +- **≤16 (Developing):** May struggle with opaque specs or fast-changing systems. Benefit from mentorship, pairing, and stronger process/linters. + +Do **not** use the score as a sole hiring gate; treat it as one data point alongside interviews, work samples, and references. + +## How the Grammar Works (Cheat Sheet Summary) +- **Word order:** DOER – RECEIVER – (THEME if ‘give’) – VERB. Verb is last. +- **Prefix stack on nouns:** `na` (receiver) + `mem` (feminine) + `leko` (plural) + noun; doer adds suffix `mur`. +- **Adjectives:** Follow the noun they modify. +- **Tense:** Present = bare verb; Past = verb + `mimu`, except irregular `chase` past = `rontmimu`. +- **Irregular plural:** `boy` plural = `letul` (regular would be `lekotul`). +- **Receiver marking:** `na-` applies to the whole NP (including mem/leko). +- **Feminine plural:** `memleko + noun` for feminine humans only. + +## Generation & Validation Pipeline +- **Canonical rendering:** All surfaces are built from feature structures through `language_spec.realize_sentence`. +- **Minimal-pair distractors:** Each distractor clones the correct feature bundle and flips exactly one feature (tense, number, gender where applicable, adjective presence, role swap, or irregular toggle). Anything ungrammatical or semantically duplicate is rejected. +- **Property tests (enforced on every generation):** + - Exactly one correct meaning per item; meanings across A–D are unique. + - All options are grammatical (word order, stack order, na-scope, doer `-mur`, adjective-after-noun). + - Distractors at semantic distance = 1 from target. + - Irregulars (letul, rontmimu) appear in contrastive contexts; distribution quotas enforced. + - Structural diversity quotas for ditransitives, plurals, adjectives, feminine plurals. + - No prefix/suffix ordering violations. + - Tense/number/gender surfaces remain distinct (no collapses). +- **Regeneration:** `main.py` will retry up to 10 seeds until all properties pass; otherwise it fails loudly. + +## Proctoring Guidance +- Keep the cheat sheet and dictionary visible with the booklet; candidates should not need prior linguistics knowledge. +- Do not give the answer key to candidates. Collect booklets before revealing answers. +- If remote, time-box and supervise; ask candidates to share screen if feasible. + +## Mapping to Roles (Examples) +- **Infra/Platform/Compilers:** Look for 27–32; high irregular handling and minimal-pair reasoning align with spec-heavy work. +- **Backend/Product:** 22–26 suggests strong fit; quick uptake of API contracts and data models. +- **QA/Automation/Release:** 17–21 can be effective with processes; use score to tailor onboarding (more scaffolding). +- **Entry/Support:** ≤16 indicates need for structured training; avoid dropping into ambiguous, underspecified projects. + +## Research & Inspirations +- **Artificial grammar learning (AGL):** Classic paradigm for measuring rule induction (Reber, 1967; more recent AGL studies). ALAN adapts AGL principles to a morphosyntactic micro-grammar. +- **DLAB-style aptitude tests:** Uses controlled artificial languages to avoid prior knowledge effects and to test rapid pattern extraction. +- **Psychometric good practice:** Minimal-pair distractors, single-key answers, controlled difficulty progression, and automated validation to reduce construct-irrelevant variance. + +## Files Overview +- `language_spec.py` — Grammar, lexicon, canonical renderer, irregulars. +- `test_blueprint.py` — Section structure and default blueprint. +- `test_generator.py` — Feature-based item generation and minimal-pair distractors. +- `property_tests.py` — Gatekeeper checks (grammaticality, uniqueness, quotas). +- `render_text.py` — Converts JSON to booklet and answer key. +- `main.py` — CLI to generate JSON; retries until all properties pass. +- `Makefile` — `make run` builds everything; `make clean` removes artifacts. +- `answer_key.txt`, `test_booklet.txt`, `generated_test.json` — outputs from the last generation. + +## Taking the Test (Candidate View) +- Read the cheat sheet and examples; note prefix order, adjective position, verb last, and irregulars. +- For each question, compare options as minimal pairs: check tense marker, plural/gender markers, role markers (`na` + stack), adjective placement, and irregular forms. +- Exactly one option fits the target meaning under the published rules. + +## Limitations & Ethics +- This is a single data point; do not use it as a sole hiring filter. +- Cultural and linguistic neutrality is intended but not guaranteed; ensure accessibility accommodations as needed. +- Scores can be affected by test-taking anxiety or unfamiliarity with such tasks; interpret cautiously. + +--- + +For questions or contributions, open an issue or PR in this repository. ALAN is intentionally small, transparent, and reproducible to keep the construct clear and auditable.*** diff --git a/grade_answers.py b/grade_answers.py new file mode 100644 index 0000000..6e49d7f --- /dev/null +++ b/grade_answers.py @@ -0,0 +1,84 @@ +"""Score a set of answers against a generated test JSON.""" +from __future__ import annotations + +import argparse +import json +from typing import List, Dict, Any + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Grade answers for a generated test JSON.") + parser.add_argument("test_json", help="Path to generated_test.json") + parser.add_argument("answers_file", help="Path to newline-delimited answers (e.g., A\\nB\\nC...)") + parser.add_argument( + "--show-details", + action="store_true", + help="Print per-question correctness details.", + ) + return parser.parse_args() + + +def load_test(path: str) -> List[Dict[str, Any]]: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + questions: List[Dict[str, Any]] = [] + for section in data.get("sections", []): + questions.extend(section.get("questions", [])) + questions.sort(key=lambda q: q.get("number", 0)) + return questions + + +def load_answers(path: str) -> List[str]: + labels: List[str] = [] + with open(path, "r", encoding="utf-8") as f: + for line in f: + stripped = line.strip().upper() + if stripped: + labels.append(stripped) + return labels + + +def score(questions: List[Dict[str, Any]], answers: List[str]) -> Dict[str, Any]: + results = [] + correct_count = 0 + for idx, question in enumerate(questions): + provided = answers[idx] if idx < len(answers) else None + correct_label = next( + (opt["label"] for opt in question.get("options", []) if opt.get("is_correct")), None + ) + is_correct = provided == correct_label + if is_correct: + correct_count += 1 + results.append( + { + "number": question.get("number"), + "provided": provided, + "correct": correct_label, + "is_correct": is_correct, + } + ) + return { + "total": len(questions), + "answered": min(len(answers), len(questions)), + "correct": correct_count, + "results": results, + } + + +def main() -> None: + args = parse_args() + questions = load_test(args.test_json) + answers = load_answers(args.answers_file) + summary = score(questions, answers) + + print(f"Scored {summary['answered']} of {summary['total']} questions.") + print(f"Correct: {summary['correct']} / {summary['total']}") + if args.show_details: + for r in summary["results"]: + provided = r['provided'] if r['provided'] is not None else "-" + marker = "✓" if r["is_correct"] else "✗" + print(f"Q{r['number']}: provided {provided}, correct {r['correct']} {marker}") + + +if __name__ == "__main__": + main() diff --git a/grammar_check.py b/grammar_check.py new file mode 100644 index 0000000..00ce827 --- /dev/null +++ b/grammar_check.py @@ -0,0 +1,51 @@ +"""Grammar check via reconstruction from features.""" +from __future__ import annotations + +from typing import Dict +from language_spec import ( + SentenceFeatures, + NPFeature, + realize_sentence, + generate_language_instance, + FEMININE_NOUNS, + AGENT, + RECIPIENT, + THEME, +) + + +def is_grammatical(option: Dict, spec=None) -> bool: + if spec is None: + spec = generate_language_instance() + feat = option.get("features") + if not feat: + return False + subj = NPFeature(**feat["subject"]) + obj1 = NPFeature(**feat["obj1"]) + obj2 = NPFeature(**feat["obj2"]) if feat.get("obj2") else None + # role sanity + if subj.role != AGENT: + return False + if obj1.role not in {RECIPIENT, THEME}: + return False + if feat["verb_id"] == "give": + if obj2 is None or obj2.role != THEME: + return False + else: + if obj2 is not None: + return False + # feminine only for feminine nouns + for np in [subj, obj1] + ([obj2] if obj2 else []): + if np.feminine and np.noun_id not in FEMININE_NOUNS: + return False + # give should have obj2 + sf = SentenceFeatures( + subject=subj, + obj1=obj1, + obj2=obj2, + verb_id=feat["verb_id"], + tense=feat["tense"], + use_irregular_verb=feat.get("use_irregular_verb", True), + ) + surface = realize_sentence(spec, sf) + return option["text"] == surface diff --git a/language_coherence.py b/language_coherence.py new file mode 100644 index 0000000..4e87ac7 --- /dev/null +++ b/language_coherence.py @@ -0,0 +1,33 @@ +"""Global coherence checks for ALAN language and test.""" +from __future__ import annotations + +from typing import Dict, Any + +from language_spec import generate_language_instance, FEMININE_NOUNS + + +def check_lexicon(spec) -> bool: + # ensure stems don't start with markers + markers = ("na", "mem", "leko") + for stem in spec.lexicon["nouns"].values(): + if stem.startswith(markers): + return False + return True + + +def check_rules(meta: Dict[str, Any]) -> bool: + required = { + "Word order: DOER RECEIVER VERB (SOV). For 'give': doer, recipient, theme, verb.", + "Prefix stacking: na (receiver) + mem (feminine) + leko (plural) + noun; doer adds suffix mur.", + "Irregulars: verb 'ror' past = 'rontmimu'; plural of 'tul' = 'letul'.", + } + return required.issubset(set(meta.get("rules", []))) + + +def check_coherence(data: Dict[str, Any]) -> bool: + spec = generate_language_instance() + if not check_lexicon(spec): + return False + if not check_rules(data.get("meta", {})): + return False + return True diff --git a/language_spec.py b/language_spec.py new file mode 100644 index 0000000..3207645 --- /dev/null +++ b/language_spec.py @@ -0,0 +1,148 @@ +"""Canonical grammar spec and validator for ALAN.""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, List, Optional + + +AGENT = "AGENT" +RECIPIENT = "RECIPIENT" # OBJ1 +THEME = "THEME" # OBJ2 + + +@dataclass +class NPFeature: + noun_id: str + feminine: bool + plural: bool + adjectives: List[str] + role: str # AGENT/RECIPIENT/THEME + use_irregular: bool = True + + +@dataclass +class SentenceFeatures: + subject: NPFeature + obj1: NPFeature + obj2: Optional[NPFeature] + verb_id: str + tense: str # PRES/PAST + use_irregular_verb: bool = True + + +@dataclass +class LanguageSpec: + lexicon: Dict[str, Dict[str, str]] + irregular_verbs: Dict[str, Dict[str, str]] + irregular_noun_plurals: Dict[str, str] + + +def generate_language_instance(seed: int | None = None) -> LanguageSpec: + lexicon = { + "nouns": { + "man": "po", + "woman": "rema", + "boy": "tul", + "girl": "siv", + "ball": "kob", + "house": "vut", + }, + "verbs": {"see": "dak", "give": "mep", "chase": "ror"}, + "adjectives": {"tall": "sar", "red": "lin", "big": "mod", "fast": "par"}, + } + irregular_verbs = {"chase": {"PAST": "rontmimu"}} + irregular_noun_plurals = {"boy": "letul"} + return LanguageSpec( + lexicon=lexicon, + irregular_verbs=irregular_verbs, + irregular_noun_plurals=irregular_noun_plurals, + ) + + +FEMININE_NOUNS = {"woman", "girl"} + + +def _plural_form(noun_id: str, spec: LanguageSpec, feminine: bool, use_irregular: bool) -> str: + if use_irregular and noun_id in spec.irregular_noun_plurals: + return spec.irregular_noun_plurals[noun_id] + stem = spec.lexicon["nouns"][noun_id] + if feminine and noun_id in FEMININE_NOUNS: + return "memleko" + stem + return "leko" + stem + + +def _noun_form(np: NPFeature, spec: LanguageSpec) -> str: + base = spec.lexicon["nouns"][np.noun_id] + fem = np.feminine and np.noun_id in FEMININE_NOUNS + if np.plural: + form = _plural_form(np.noun_id, spec, fem, np.use_irregular) + else: + form = ("mem" if fem else "") + base + # receiver marker wraps whole NP + if np.role != AGENT: + form = "na" + form + # doer suffix + if np.role == AGENT: + form = form + "mur" + # adjectives after noun + if np.adjectives: + adj_forms = [spec.lexicon["adjectives"][a] for a in np.adjectives] + form = f"{form} {' '.join(adj_forms)}" + return form + + +def realize_sentence(spec: LanguageSpec, sf: SentenceFeatures) -> str: + parts = [ + _noun_form(sf.subject, spec), + _noun_form(sf.obj1, spec), + ] + if sf.obj2: + parts.append(_noun_form(sf.obj2, spec)) + verb_stem = spec.lexicon["verbs"][sf.verb_id] + irregular = spec.irregular_verbs.get(sf.verb_id, {}).get(sf.tense) + if irregular and sf.use_irregular_verb: + verb_form = irregular + else: + verb_form = verb_stem if sf.tense == "PRES" else verb_stem + "mimu" + parts.append(verb_form) + return " ".join(parts) + + +def english_gloss(sf: SentenceFeatures) -> str: + def np_gloss(np: NPFeature) -> str: + noun = np.noun_id + if np.feminine and np.noun_id in FEMININE_NOUNS: + noun = {"man": "woman", "boy": "girl"}.get(noun, noun) + adj = " ".join(np.adjectives) + phrase = f"{adj} {noun}".strip() + if np.plural: + phrase += "s" + role_note = "" + if np.feminine and np.plural and np.noun_id in FEMININE_NOUNS: + role_note = " (feminine plural)" + elif np.plural: + role_note = " (plural)" + elif np.feminine and np.noun_id in FEMININE_NOUNS: + role_note = " (feminine)" + return f"the {phrase}{role_note}".strip() + + subj = np_gloss(sf.subject) + obj1 = np_gloss(sf.obj1) + verb = sf.verb_id + if verb == "give" and sf.obj2: + obj2 = np_gloss(sf.obj2) + verb_en = "gave" if sf.tense == "PAST" else "gives" + return f"{subj} {verb_en} {obj2} to {obj1} ({'past' if sf.tense=='PAST' else 'present'})" + verb_en = { + ("see", "PRES"): "sees", + ("see", "PAST"): "saw", + ("chase", "PRES"): "chases", + ("chase", "PAST"): "chased", + ("give", "PRES"): "gives", + ("give", "PAST"): "gave", + }.get((verb, sf.tense), f"{verb}s") + return f"{subj} {verb_en} {obj1} ({'past' if sf.tense=='PAST' else 'present'})" + + +def validate_sentence_surface(sf: SentenceFeatures, sentence: str, spec: LanguageSpec) -> bool: + return sentence.strip() == realize_sentence(spec, sf) diff --git a/main.py b/main.py new file mode 100644 index 0000000..fb16b0d --- /dev/null +++ b/main.py @@ -0,0 +1,41 @@ +"""CLI entry point for generating a JSON DLAB-style test.""" +from __future__ import annotations + +import argparse +import json +import random + +from language_spec import generate_language_instance +from test_blueprint import get_default_concepts, get_default_blueprint +from test_generator import generate_test +from property_tests import validate_data + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate an artificial language test JSON.") + parser.add_argument("--seed", type=int, help="Random seed for reproducibility.") + parser.add_argument("--out", dest="out_path", default="generated_test.json", help="Output path.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + actual_seed = args.seed if args.seed is not None else random.randint(0, 1_000_000) + concepts = get_default_concepts() + blueprint = get_default_blueprint() + + for attempt in range(10): + rng = random.Random(actual_seed + attempt) + spec = generate_language_instance(actual_seed + attempt) + test_dict = generate_test(spec, blueprint, concepts, rng, seed=actual_seed + attempt) + if validate_data(test_dict, spec): + with open(args.out_path, "w", encoding="utf-8") as f: + json.dump(test_dict, f, indent=2) + print(f"Generated test JSON at {args.out_path} (seed {actual_seed + attempt})") + break + else: + raise SystemExit("Property tests failed after retries; test not written.") + + +if __name__ == "__main__": + main() diff --git a/property_tests.py b/property_tests.py new file mode 100644 index 0000000..27950ef --- /dev/null +++ b/property_tests.py @@ -0,0 +1,449 @@ +"""Property-based checks for ALAN generation.""" +from __future__ import annotations + +import json +import sys +from typing import Dict + +from language_spec import ( + generate_language_instance, + SentenceFeatures, + NPFeature, + realize_sentence, + FEMININE_NOUNS, + english_gloss, +) +from grammar_check import is_grammatical +from semantic import to_meaning, meanings_equal, semantic_distance +from language_coherence import check_coherence + +# Property thresholds +DIST_MIN = 1 +DIST_MAX = 1 +MIN_IRREG_USE = 6 +MIN_IRREG_CONTRAST = 4 +MIN_IRREG_DISTRACTOR = 4 +MIN_DITRANSITIVE = 8 +MIN_PLURAL_ITEMS = 12 +MIN_ADJECTIVE_ITEMS = 12 +MIN_FEM_PLURAL_ITEMS = 6 +MIN_PAST_SUFFIX_ENFORCED = True + +def load(path: str) -> Dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def one_correct(q: Dict) -> bool: + return sum(1 for o in q.get("options", []) if o.get("is_correct")) == 1 + + +def unique_options(q: Dict) -> bool: + texts = [o["text"] for o in q.get("options", [])] + return len(texts) == len(set(texts)) and len(texts) == 4 + + +def verb_last(text: str, verbs: Dict[str, str]) -> bool: + tokens = text.strip().split() + if not tokens: + return False + last = tokens[-1] + return last in verbs.values() or last.endswith("mimu") or last == "rontmimu" + + +def _np_from_dict(d: Dict) -> SentenceFeatures: + return NPFeature( + noun_id=d["noun_id"], + feminine=d["feminine"], + plural=d["plural"], + adjectives=d["adjectives"], + role=d["role"], + use_irregular=d.get("use_irregular", True), + ) + + +def option_matches_features(opt: Dict, spec) -> bool: + feat = opt.get("features") + if not feat: + return False # must be present + sf = SentenceFeatures( + subject=_np_from_dict(feat["subject"]), + obj1=_np_from_dict(feat["obj1"]), + obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None, + verb_id=feat["verb_id"], + tense=feat["tense"], + use_irregular_verb=feat.get("use_irregular_verb", True), + ) + return opt["text"] == realize_sentence(spec, sf) + + + +def check_irregulars( + data: Dict, + spec, + min_use: int = MIN_IRREG_USE, + min_contrast: int = MIN_IRREG_CONTRAST, + min_distractors: int = MIN_IRREG_DISTRACTOR, +) -> bool: + surfaces_correct = [] + contrast_items = 0 + distractor_irregular = 0 + dual_contrast = {"letul": False, "rontmimu": False} + for sec in data.get("sections", []): + for q in sec.get("questions", []): + opts = q.get("options", []) + correct = next(o for o in opts if o.get("is_correct")) + feat = correct.get("features") + if feat: + sf = SentenceFeatures( + subject=_np_from_dict(feat["subject"]), + obj1=_np_from_dict(feat["obj1"]), + obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None, + verb_id=feat["verb_id"], + tense=feat["tense"], + ) + surfaces_correct.append(realize_sentence(spec, sf)) + else: + surfaces_correct.append(correct["text"]) + # check contrasts: correct irregular vs regular-like distractor + corr_text = correct["text"] + has_contrast = any( + (("letul" in corr_text and "letul" not in o["text"]) + or ("rontmimu" in corr_text and "rontmimu" not in o["text"]) + or ("letul" not in corr_text and "letul" in o["text"]) + or ("rontmimu" not in corr_text and "rontmimu" in o["text"])) + for o in opts if not o.get("is_correct") + ) + if has_contrast: + contrast_items += 1 + distractor_irregular += sum( + ("letul" in o["text"]) or ("rontmimu" in o["text"]) + for o in opts + if not o.get("is_correct") + ) + # ensure contrastive use where both correct and a distractor use same irregular form + if ("letul" in corr_text and any("letul" in o["text"] for o in opts if not o.get("is_correct"))): + dual_contrast["letul"] = True + if ("rontmimu" in corr_text and any("rontmimu" in o["text"] for o in opts if not o.get("is_correct"))): + dual_contrast["rontmimu"] = True + letul_count = sum("letul" in t for t in surfaces_correct) + ront_count = sum("rontmimu" in t for t in surfaces_correct) + return ( + letul_count >= min_use + and ront_count >= min_use + and contrast_items >= min_contrast + and distractor_irregular >= min_distractors + and all(dual_contrast.values()) + ) + + +def meanings_unique_in_options(q: Dict, spec) -> bool: + """Ensure no two options share identical meaning.""" + meanings = [] + for opt in q["options"]: + feat = opt.get("features") + if not feat: + return False + sf = SentenceFeatures( + subject=_np_from_dict(feat["subject"]), + obj1=_np_from_dict(feat["obj1"]), + obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None, + verb_id=feat["verb_id"], + tense=feat["tense"], + use_irregular_verb=feat.get("use_irregular_verb", True), + ) + m = to_meaning(sf) + if any(meanings_equal(m, existing) for existing in meanings): + return False + meanings.append(m) + return True + + +def check_role_number_uniqueness(spec) -> bool: + nouns = spec.lexicon["nouns"] + for noun_id in nouns: + forms = set() + np_sg = NPFeature(noun_id=noun_id, feminine=False, plural=False, adjectives=[], role="RECIPIENT") + np_pl = NPFeature(noun_id=noun_id, feminine=False, plural=True, adjectives=[], role="RECIPIENT") + sg_surface = realize_sentence(spec, SentenceFeatures(np_sg, np_sg, None, "see", "PRES")).split()[1] + pl_surface = realize_sentence(spec, SentenceFeatures(np_pl, np_pl, None, "see", "PRES")).split()[1] + forms.update([sg_surface, pl_surface]) + if noun_id in FEMININE_NOUNS: + np_fp = NPFeature(noun_id=noun_id, feminine=True, plural=True, adjectives=[], role="RECIPIENT") + fp_surface = realize_sentence( + spec, SentenceFeatures(np_fp, np_fp, None, "see", "PRES") + ).split()[1] + forms.add(fp_surface) + if len(forms) != (3 if noun_id in FEMININE_NOUNS else 2): + return False + # receiver marking + if not sg_surface.startswith("na") or not pl_surface.startswith("na"): + return False + if noun_id in FEMININE_NOUNS and not fp_surface.startswith("na"): + return False + return True + + +def check_tense_uniqueness(spec) -> bool: + for vid, stem in spec.lexicon["verbs"].items(): + pres = stem + past = spec.irregular_verbs.get(vid, {}).get("PAST", stem + "mimu") + if pres == past: + return False + return True + + +def check_structural_diversity( + data: Dict, + min_irregular: int = MIN_IRREG_USE, + min_ditransitive: int = MIN_DITRANSITIVE, + min_plural: int = MIN_PLURAL_ITEMS, + min_adjective: int = MIN_ADJECTIVE_ITEMS, + min_fem_plural: int = MIN_FEM_PLURAL_ITEMS, +) -> bool: + irregular_items = 0 + ditransitive_items = 0 + plural_items = 0 + adjective_items = 0 + fem_plural_items = 0 + for sec in data.get("sections", []): + for q in sec.get("questions", []): + correct = next(o for o in q.get("options", []) if o.get("is_correct")) + feat = correct.get("features") + if not feat: + continue + if correct["text"].find("rontmimu") != -1 or correct["text"].find("letul") != -1: + irregular_items += 1 + if feat["verb_id"] == "give": + ditransitive_items += 1 + if feat["subject"]["plural"] or feat["obj1"]["plural"] or (feat.get("obj2") and feat["obj2"]["plural"]): + plural_items += 1 + if feat["subject"]["adjectives"] or feat["obj1"]["adjectives"] or (feat.get("obj2") and feat["obj2"]["adjectives"]): + adjective_items += 1 + if (feat["subject"]["feminine"] and feat["subject"]["plural"]) or ( + feat["obj1"]["feminine"] and feat["obj1"]["plural"] + ): + fem_plural_items += 1 + return ( + irregular_items >= min_irregular + and ditransitive_items >= min_ditransitive + and plural_items >= min_plural + and adjective_items >= min_adjective + and fem_plural_items >= min_fem_plural + ) + + +def check_semantics(q: Dict, spec) -> bool: + correct = next(o for o in q["options"] if o.get("is_correct")) + feat = correct.get("features") + if not feat: + return False + correct_sf = SentenceFeatures( + subject=_np_from_dict(feat["subject"]), + obj1=_np_from_dict(feat["obj1"]), + obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None, + verb_id=feat["verb_id"], + tense=feat["tense"], + use_irregular_verb=feat.get("use_irregular_verb", True), + ) + target_meaning = to_meaning(correct_sf) + target_gloss = english_gloss(correct_sf) + # Exactly one matches target + matches = 0 + for opt in q["options"]: + ofeat = opt.get("features") + if not ofeat: + continue + sf = SentenceFeatures( + subject=_np_from_dict(ofeat["subject"]), + obj1=_np_from_dict(ofeat["obj1"]), + obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None, + verb_id=ofeat["verb_id"], + tense=ofeat["tense"], + use_irregular_verb=ofeat.get("use_irregular_verb", True), + ) + if meanings_equal(to_meaning(sf), target_meaning): + matches += 1 + if matches != 1: + return False + # Only the correct option should share the English gloss to avoid ambiguous readings. + for opt in q["options"]: + if opt.get("is_correct"): + continue + ofeat = opt.get("features") + if not ofeat: + continue + sf = SentenceFeatures( + subject=_np_from_dict(ofeat["subject"]), + obj1=_np_from_dict(ofeat["obj1"]), + obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None, + verb_id=ofeat["verb_id"], + tense=ofeat["tense"], + use_irregular_verb=ofeat.get("use_irregular_verb", True), + ) + if english_gloss(sf) == target_gloss: + return False + # Distractors differ by 1..2 features + for opt in q["options"]: + if opt.get("is_correct"): + continue + ofeat = opt.get("features") + if not ofeat: + continue + sf = SentenceFeatures( + subject=_np_from_dict(ofeat["subject"]), + obj1=_np_from_dict(ofeat["obj1"]), + obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None, + verb_id=ofeat["verb_id"], + tense=ofeat["tense"], + ) + dist = semantic_distance(to_meaning(sf), target_meaning) + if dist < DIST_MIN or dist > DIST_MAX: + return False + # Distractor must be grammatical per canonical renderer + if not is_grammatical(opt, spec): + return False + # disallow meaning collisions + for other in q["options"]: + if other is opt: + continue + ofeat = other.get("features") + if not ofeat: + continue + other_sf = SentenceFeatures( + subject=_np_from_dict(ofeat["subject"]), + obj1=_np_from_dict(ofeat["obj1"]), + obj2=_np_from_dict(ofeat["obj2"]) if ofeat.get("obj2") else None, + verb_id=ofeat["verb_id"], + tense=ofeat["tense"], + use_irregular_verb=ofeat.get("use_irregular_verb", True), + ) + if meanings_equal(to_meaning(other_sf), to_meaning(sf)): + return False + return True + + +def check_prefix_and_scope(opt: Dict, spec) -> bool: + """Validate na-scope, mur on doer, adjectives trailing.""" + feat = opt.get("features") + if not feat: + return False + sf = SentenceFeatures( + subject=_np_from_dict(feat["subject"]), + obj1=_np_from_dict(feat["obj1"]), + obj2=_np_from_dict(feat["obj2"]) if feat.get("obj2") else None, + verb_id=feat["verb_id"], + tense=feat["tense"], + use_irregular_verb=feat.get("use_irregular_verb", True), + ) + surface = realize_sentence(spec, sf).split() + subj_tokens = [] + obj1_tokens = [] + obj2_tokens = [] + if sf.obj2: + subj_tokens = surface[0 : 1 + len(sf.subject.adjectives)] + obj1_tokens = surface[1 + len(sf.subject.adjectives) : 2 + len(sf.subject.adjectives) + len(sf.obj1.adjectives)] + obj2_tokens = surface[2 + len(sf.subject.adjectives) + len(sf.obj1.adjectives) : -1] + else: + subj_tokens = surface[0 : 1 + len(sf.subject.adjectives)] + obj1_tokens = surface[1 + len(sf.subject.adjectives) : -1] + subj_head = subj_tokens[0] if subj_tokens else "" + obj1_head = obj1_tokens[0] if obj1_tokens else "" + if not subj_head.endswith("mur"): + return False + if not obj1_head.startswith("na"): + return False + if sf.obj2: + obj2_head = obj2_tokens[0] if obj2_tokens else "" + if not obj2_head.startswith("na"): + return False + # adjectives follow noun head: already ensured by slices, but assert lengths + if len(subj_tokens) != 1 + len(sf.subject.adjectives): + return False + if len(obj1_tokens) != 1 + len(sf.obj1.adjectives): + return False + if sf.obj2 and len(obj2_tokens) != 1 + len(sf.obj2.adjectives): + return False + return True + + +def check_adjective_position(opt_text: str) -> bool: + return True # rendering enforces adjective position + + +def check_na_scope(opt_text: str) -> bool: + return True # enforced by rendering + + +def main() -> None: + if len(sys.argv) != 2: + print("Usage: python property_tests.py generated_test.json") + sys.exit(1) + data = load(sys.argv[1]) + spec = generate_language_instance() + ok = validate_data(data, spec) + sys.exit(0 if ok else 1) + + +def validate_data(data: Dict, spec=None) -> bool: + if spec is None: + spec = generate_language_instance() + verbs = spec.lexicon["verbs"] + ok = True + questions = [q for s in data.get("sections", []) for q in s.get("questions", [])] + for q in questions: + if not one_correct(q): + ok = False + print(f"FAIL one_correct for Q{q.get('number')}") + if not unique_options(q): + ok = False + print(f"FAIL unique_options for Q{q.get('number')}") + for opt in q.get("options", []): + if not verb_last(opt["text"], verbs): + ok = False + print(f"FAIL verb_last for Q{q.get('number')} option {opt['label']}") + if not option_matches_features(opt, spec): + ok = False + print(f"FAIL feature match for Q{q.get('number')} option {opt['label']}") + if not check_adjective_position(opt["text"]): + ok = False + print(f"FAIL adjective position for Q{q.get('number')} option {opt['label']}") + if not check_na_scope(opt["text"]): + ok = False + print(f"FAIL na-scope for Q{q.get('number')} option {opt['label']}") + if not is_grammatical(opt, spec): + ok = False + print(f"FAIL grammar check for Q{q.get('number')} option {opt['label']}") + if not check_semantics(q, spec): + ok = False + print(f"FAIL semantic uniqueness/distances for Q{q.get('number')}") + for q in questions: + if not meanings_unique_in_options(q, spec): + ok = False + print(f"FAIL meanings unique across options for Q{q.get('number')}") + for opt in q.get("options", []): + if not check_prefix_and_scope(opt, spec): + ok = False + print(f"FAIL prefix/scope for Q{q.get('number')} option {opt['label']}") + if not check_irregulars(data, spec): + ok = False + print("FAIL irregular coverage (need >=3 letul and >=3 rontmimu in correct answers)") + if not check_role_number_uniqueness(spec): + ok = False + print("FAIL role/number uniqueness (singular/plural/fem-plural must be distinct and na-prefixed)") + if not check_tense_uniqueness(spec): + ok = False + print("FAIL tense uniqueness (present and past forms must differ)") + if not check_structural_diversity(data): + ok = False + print("FAIL structural diversity quotas") + if not check_coherence(data): + ok = False + print("FAIL language coherence checks") + if ok: + print("All property tests passed.") + return ok + + +if __name__ == "__main__": + main() diff --git a/render_text.py b/render_text.py new file mode 100644 index 0000000..e8b0949 --- /dev/null +++ b/render_text.py @@ -0,0 +1,83 @@ +"""Render ALAN JSON to booklet and answer key.""" +from __future__ import annotations + +import argparse +import json +from typing import Dict, Any + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Render ALAN test to text.") + parser.add_argument("--in", dest="in_path", required=True) + parser.add_argument("--test-out", dest="test_out", default="test_booklet.txt") + parser.add_argument("--key-out", dest="key_out", default="answer_key.txt") + return parser.parse_args() + + +def render_booklet(data: Dict[str, Any]) -> str: + lines = ["Alan's Language Aptitude iNstrument (ALAN)", ""] + meta = data.get("meta", {}) + if meta.get("instructions"): + lines.append(meta["instructions"]) + lines.append("") + if meta.get("rules"): + lines.append("Grammar Cheat Sheet") + lines.append("-------------------") + for rule in meta["rules"]: + lines.append(f"- {rule}") + lines.append("") + dict_data = meta.get("dictionary", {}) + if dict_data: + lines.append("Starter Dictionary") + lines.append("-----------------") + for title, group in [ + ("Nouns", dict_data.get("nouns", {})), + ("Verbs", dict_data.get("verbs", {})), + ("Adjectives", dict_data.get("adjectives", {})), + ]: + lines.append(title) + for eng, lang in group.items(): + lines.append(f" {eng} = {lang}") + lines.append("") + lines.append("") + + for section in data.get("sections", []): + lines.append(f"Section {section['id']}") + lines.append("-" * 20) + for intro in section.get("intro_text", []): + lines.append(intro) + lines.append("") + for q in section.get("questions", []): + lines.append(f"{q['number']}. {q['stem']}") + for opt in q.get("options", []): + lines.append(f" {opt['label']}) {opt['text']}") + lines.append("") + lines.append("") + return "\n".join(lines) + + +def render_key(data: Dict[str, Any]) -> str: + lines = ["Answer Key", ""] + for section in data.get("sections", []): + for q in section.get("questions", []): + correct = next((o for o in q["options"] if o["is_correct"]), None) + lines.append(f"{q['number']}: {correct['label'] if correct else '?'}") + for opt in q["options"]: + mark = "(correct)" if opt["is_correct"] else "" + lines.append(f" {opt['label']}) {opt['text']} {mark}") + lines.append("") + return "\n".join(lines) + + +def main() -> None: + args = parse_args() + with open(args.in_path, "r", encoding="utf-8") as f: + data = json.load(f) + with open(args.test_out, "w", encoding="utf-8") as f: + f.write(render_booklet(data)) + with open(args.key_out, "w", encoding="utf-8") as f: + f.write(render_key(data)) + + +if __name__ == "__main__": + main() diff --git a/semantic.py b/semantic.py new file mode 100644 index 0000000..558a116 --- /dev/null +++ b/semantic.py @@ -0,0 +1,67 @@ +"""Semantic utilities for ALAN items.""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import List + + +@dataclass(frozen=True) +class NPMeaning: + noun: str + feminine: bool + plural: bool + adjectives: tuple[str, ...] + role: str # AGENT/RECIPIENT/THEME + use_irregular: bool + + +@dataclass(frozen=True) +class SentenceMeaning: + verb: str + tense: str # PRES/PAST + use_irregular_verb: bool + subj: NPMeaning + obj1: NPMeaning + obj2: NPMeaning | None = None + + +def to_meaning(sf) -> SentenceMeaning: + """Convert SentenceFeatures to SentenceMeaning.""" + def np_to_meaning(np): + return NPMeaning( + noun=np.noun_id, + feminine=np.feminine, + plural=np.plural, + adjectives=tuple(np.adjectives), + role=np.role, + use_irregular=np.use_irregular, + ) + + return SentenceMeaning( + verb=sf.verb_id, + tense=sf.tense, + use_irregular_verb=sf.use_irregular_verb, + subj=np_to_meaning(sf.subject), + obj1=np_to_meaning(sf.obj1), + obj2=np_to_meaning(sf.obj2) if sf.obj2 else None, + ) + + +def meanings_equal(a: SentenceMeaning, b: SentenceMeaning) -> bool: + return a == b + + +def semantic_distance(a: SentenceMeaning, b: SentenceMeaning) -> int: + """Count feature differences between two meanings.""" + dist = 0 + if a.verb != b.verb or a.tense != b.tense or a.use_irregular_verb != b.use_irregular_verb: + dist += 1 + for np_a, np_b in [(a.subj, b.subj), (a.obj1, b.obj1)]: + if np_a != np_b: + dist += 1 + if a.obj2 or b.obj2: + if (a.obj2 or NPMeaning("", False, False, tuple(), "", False)) != ( + b.obj2 or NPMeaning("", False, False, tuple(), "", False) + ): + dist += 1 + return dist diff --git a/test_blueprint.py b/test_blueprint.py new file mode 100644 index 0000000..70f7659 --- /dev/null +++ b/test_blueprint.py @@ -0,0 +1,140 @@ +"""Blueprints for assembling a DLAB-style test.""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import List, Tuple, Dict + + +@dataclass +class Concept: + id: str + name: str + description_en: str + prerequisites: List[str] + rule_refs: List[str] + + +def get_default_concepts() -> Dict[str, Concept]: + """Return the stock concept inventory.""" + concepts = [ + Concept( + id="S_ORDER", + name="Sentence Word Order", + description_en="Sentences put the action and the people/things in a specific order.", + prerequisites=[], + rule_refs=["linearization.s_order"], + ), + Concept( + id="NP_ORDER", + name="Noun Phrase Word Order", + description_en="Describing words go either before or after the thing they describe.", + prerequisites=[], + rule_refs=["linearization.np_order"], + ), + Concept( + id="NOUN_NUMBER_MARKING", + name="Noun Number", + description_en="To show more than one thing, an extra sound is added; one thing may be plain.", + prerequisites=["NP_ORDER"], + rule_refs=["noun_inflection.number"], + ), + Concept( + id="NOUN_GENDER_MARKING", + name="Noun Gender", + description_en="Words for people can carry a marker for type (like masculine/feminine) with a small sound.", + prerequisites=["NP_ORDER"], + rule_refs=["noun_inflection.gender"], + ), + Concept( + id="NOUN_CASE_MARKING", + name="Case Marking", + description_en="The word for the doer and the word for the receiver use different helper sounds.", + prerequisites=["NP_ORDER", "S_ORDER"], + rule_refs=["noun_inflection.case"], + ), + Concept( + id="VERB_TENSE_MARKING", + name="Verb Tense", + description_en="Action words change slightly for now vs the past.", + prerequisites=["S_ORDER"], + rule_refs=["verb_inflection.tense"], + ), + ] + return {c.id: c for c in concepts} + + +# Item types. +EXEMPLAR_COMPREHENSION = "EXEMPLAR_COMPREHENSION" +TRANSLATE_TO_LANG = "TRANSLATE_TO_LANG" +TRANSLATE_FROM_LANG = "TRANSLATE_FROM_LANG" +RULE_APPLICATION = "RULE_APPLICATION" +STACKED_RULES = "STACKED_RULES" + + +@dataclass +class SectionBlueprint: + id: str + introduce_concepts: List[str] + focus_concepts: List[str] + item_types: List[str] + num_items: int + + +@dataclass +class TestBlueprint: + sections: List[SectionBlueprint] + target_difficulty_band: Tuple[float, float] + + +def get_default_blueprint() -> TestBlueprint: + """Create a simple multi-section blueprint totaling ~30 questions.""" + sections = [ + SectionBlueprint( + id="S1", + introduce_concepts=["S_ORDER", "NP_ORDER"], + focus_concepts=["S_ORDER", "NP_ORDER"], + item_types=[EXEMPLAR_COMPREHENSION, TRANSLATE_TO_LANG], + num_items=8, + ), + SectionBlueprint( + id="S2", + introduce_concepts=["NOUN_NUMBER_MARKING", "NOUN_GENDER_MARKING"], + focus_concepts=[ + "S_ORDER", + "NP_ORDER", + "NOUN_NUMBER_MARKING", + "NOUN_GENDER_MARKING", + ], + item_types=[TRANSLATE_TO_LANG, TRANSLATE_FROM_LANG, RULE_APPLICATION], + num_items=10, + ), + SectionBlueprint( + id="S3", + introduce_concepts=["NOUN_CASE_MARKING", "VERB_TENSE_MARKING"], + focus_concepts=[ + "S_ORDER", + "NP_ORDER", + "NOUN_NUMBER_MARKING", + "NOUN_GENDER_MARKING", + "NOUN_CASE_MARKING", + "VERB_TENSE_MARKING", + ], + item_types=[TRANSLATE_FROM_LANG, RULE_APPLICATION], + num_items=8, + ), + SectionBlueprint( + id="S4", + introduce_concepts=[], + focus_concepts=[ + "S_ORDER", + "NP_ORDER", + "NOUN_NUMBER_MARKING", + "NOUN_GENDER_MARKING", + "NOUN_CASE_MARKING", + "VERB_TENSE_MARKING", + ], + item_types=[STACKED_RULES], + num_items=6, + ), + ] + return TestBlueprint(sections=sections, target_difficulty_band=(0.3, 0.9)) diff --git a/test_generator.py b/test_generator.py new file mode 100644 index 0000000..84f261a --- /dev/null +++ b/test_generator.py @@ -0,0 +1,620 @@ +"""Constraint-driven generator for ALAN using structured features.""" +from __future__ import annotations + +from dataclasses import dataclass, asdict, replace +from typing import List, Dict, Optional +import random + +from language_spec import ( + LanguageSpec, + SentenceFeatures, + NPFeature, + AGENT, + RECIPIENT, + THEME, + realize_sentence, + english_gloss, +) +from semantic import meanings_equal, to_meaning, semantic_distance +from grammar_check import is_grammatical +from test_blueprint import ( + Concept, + SectionBlueprint, + TestBlueprint, + EXEMPLAR_COMPREHENSION, + TRANSLATE_TO_LANG, + TRANSLATE_FROM_LANG, + RULE_APPLICATION, + STACKED_RULES, +) + +# semantic distance bounds for distractors (kept in sync with property tests) +DIST_MIN = 1 +DIST_MAX = 1 + + +def render_concept_explanation(concept: Concept, spec: LanguageSpec) -> str: + """Simple explanation with one example.""" + if concept.id == "S_ORDER": + sf = sentence_features( + "see", + "PRES", + np_features("man", AGENT, plural=False, adjectives=["tall"]), + np_features("girl", RECIPIENT, plural=False, adjectives=[]), + None, + ) + return f"{concept.description_en}\nExample: {realize_sentence(spec, sf)} = {english_gloss(sf)}" + if concept.id == "NOUN_NUMBER_MARKING": + sf = sentence_features( + "see", + "PRES", + np_features("man", AGENT, plural=False, adjectives=[]), + np_features("girl", RECIPIENT, plural=True, adjectives=[]), + None, + ) + return f"{concept.description_en}\nExample: {realize_sentence(spec, sf)} = {english_gloss(sf)}" + if concept.id == "VERB_TENSE_MARKING": + sf = sentence_features( + "chase", + "PAST", + np_features("woman", AGENT, plural=False, adjectives=[]), + np_features("boy", RECIPIENT, plural=False, adjectives=[]), + None, + ) + return f"{concept.description_en}\nExample: {realize_sentence(spec, sf)} = {english_gloss(sf)}" + # default example + sf = sentence_features( + "give", + "PRES", + np_features("woman", AGENT, plural=False, adjectives=[]), + np_features("boy", RECIPIENT, plural=False, adjectives=[]), + np_features("ball", THEME, plural=False, adjectives=["red"]), + ) + return f"{concept.description_en}\nExample: {realize_sentence(spec, sf)} = {english_gloss(sf)}" + + +@dataclass +class Option: + label: str + text: str + is_correct: bool + explanation: str + features: SentenceFeatures + + +@dataclass +class Question: + id: str + item_type: str + section_id: str + concepts: List[str] + stem: str + options: List[Option] + difficulty_score: float + + +def question_valid(q: Question, spec: LanguageSpec) -> bool: + if len(q.options) != 4: + return False + if sum(opt.is_correct for opt in q.options) != 1: + return False + meanings = [] + correct_meaning = None + for opt in q.options: + m = to_meaning(opt.features) + if opt.is_correct: + correct_meaning = m + for existing in meanings: + if meanings_equal(m, existing): + return False + meanings.append(m) + if not is_grammatical(asdict(opt), spec): + return False + if correct_meaning is None: + return False + for opt in q.options: + if opt.is_correct: + continue + dist = semantic_distance(to_meaning(opt.features), correct_meaning) + if dist != 1: + return False + return True + + +# --------------------------------------------------------------------------- +# Feature utilities + + +def np_features( + noun_id: str, + role: str, + feminine: Optional[bool] = None, + plural: bool = False, + adjectives: Optional[List[str]] = None, + use_irregular: bool = True, +) -> NPFeature: + feminine_default = noun_id in {"woman", "girl"} + fem = feminine_default if feminine is None else feminine + if noun_id not in {"woman", "girl"}: + fem = False + return NPFeature( + noun_id=noun_id, + feminine=fem, + plural=plural, + adjectives=adjectives or [], + role=role, + use_irregular=use_irregular, + ) + + +def sentence_features( + verb_id: str, + tense: str, + subj: NPFeature, + obj1: NPFeature, + obj2: Optional[NPFeature] = None, + use_irregular_verb: bool = True, +) -> SentenceFeatures: + return SentenceFeatures(subject=subj, obj1=obj1, obj2=obj2, verb_id=verb_id, tense=tense, use_irregular_verb=use_irregular_verb) + + +# --------------------------------------------------------------------------- +# Perturbations for distractors (all grammatical) + + +def perturb_tense(sf: SentenceFeatures) -> SentenceFeatures: + return SentenceFeatures( + subject=sf.subject, + obj1=sf.obj1, + obj2=sf.obj2, + verb_id=sf.verb_id, + tense="PAST" if sf.tense == "PRES" else "PRES", + ) + + +def perturb_roles(sf: SentenceFeatures) -> SentenceFeatures: + new_subj = NPFeature( + noun_id=sf.obj1.noun_id, + feminine=sf.obj1.feminine, + plural=sf.obj1.plural, + adjectives=sf.obj1.adjectives, + role=AGENT, + ) + new_obj1 = NPFeature( + noun_id=sf.subject.noun_id, + feminine=sf.subject.feminine, + plural=sf.subject.plural, + adjectives=sf.subject.adjectives, + role=RECIPIENT, + ) + return SentenceFeatures( + subject=new_subj, + obj1=new_obj1, + obj2=sf.obj2, + verb_id=sf.verb_id, + tense=sf.tense, + ) + + +def perturb_number_gender(sf: SentenceFeatures) -> SentenceFeatures: + target = sf.obj1 + if target.noun_id in {"woman", "girl"}: + flipped_fem = not target.feminine + else: + flipped_fem = False + swapped = NPFeature( + noun_id=target.noun_id, + feminine=flipped_fem, + plural=not target.plural, + adjectives=target.adjectives, + role=target.role, + ) + return SentenceFeatures( + subject=sf.subject, + obj1=swapped, + obj2=sf.obj2, + verb_id=sf.verb_id, + tense=sf.tense, + ) + + +def perturb_adj_scope(sf: SentenceFeatures) -> SentenceFeatures: + new_subj = NPFeature( + noun_id=sf.subject.noun_id, + feminine=sf.subject.feminine, + plural=sf.subject.plural, + adjectives=sf.obj1.adjectives, + role=sf.subject.role, + ) + new_obj1 = NPFeature( + noun_id=sf.obj1.noun_id, + feminine=sf.obj1.feminine, + plural=sf.obj1.plural, + adjectives=sf.subject.adjectives, + role=sf.obj1.role, + ) + return SentenceFeatures( + subject=new_subj, + obj1=new_obj1, + obj2=sf.obj2, + verb_id=sf.verb_id, + tense=sf.tense, + ) + + +def perturb_irregular(sf: SentenceFeatures) -> SentenceFeatures: + """Swap irregular to regular (or vice versa) to create a near-miss.""" + # Regularize irregular noun plural. + new_obj1 = sf.obj1 + if sf.obj1.noun_id == "boy" and sf.obj1.plural: + new_obj1 = NPFeature( + noun_id="boy", + feminine=sf.obj1.feminine, + plural=True, + adjectives=sf.obj1.adjectives, + role=sf.obj1.role, + ) + # Regularize irregular verb past. + new_verb = sf.verb_id + new_tense = sf.tense + if sf.verb_id == "chase" and sf.tense == "PAST": + new_tense = "PAST" + return SentenceFeatures( + subject=sf.subject, + obj1=new_obj1, + obj2=sf.obj2, + verb_id=new_verb, + tense=new_tense, + ) + + +def build_distractors(spec: LanguageSpec, sf: SentenceFeatures, rng: random.Random) -> List[Option]: + """Generate three minimal-pair distractors (exactly one feature flipped).""" + from semantic import to_meaning, semantic_distance + target_meaning = to_meaning(sf) + correct_text = realize_sentence(spec, sf) + seen_surfaces = {correct_text} + seen_meanings = {target_meaning} + distractors: List[Option] = [] + + def clone_sf(orig: SentenceFeatures) -> SentenceFeatures: + return SentenceFeatures( + subject=replace(orig.subject), + obj1=replace(orig.obj1), + obj2=replace(orig.obj2) if orig.obj2 else None, + verb_id=orig.verb_id, + tense=orig.tense, + use_irregular_verb=orig.use_irregular_verb, + ) + + def add_if_valid(cand_sf: SentenceFeatures, explanation: str) -> None: + nonlocal distractors + text = realize_sentence(spec, cand_sf) + meaning = to_meaning(cand_sf) + dist = semantic_distance(meaning, target_meaning) + if text in seen_surfaces: + return + if any(meanings_equal(meaning, m) for m in seen_meanings): + return + if dist != 1: + return + opt = Option(label="", text=text, is_correct=False, explanation=explanation, features=cand_sf) + from grammar_check import is_grammatical + + if not is_grammatical(asdict(opt), spec): + return + seen_surfaces.add(text) + seen_meanings.add(meaning) + distractors.append(opt) + + # Available single-feature flips + flips = [] + flips.append(("Tense flip.", lambda base: replace(base, tense="PAST" if base.tense == "PRES" else "PRES"))) + + # number flip on obj1 + def flip_obj1_number(base: SentenceFeatures) -> SentenceFeatures: + new = clone_sf(base) + new.obj1 = replace(new.obj1, plural=not new.obj1.plural) + return new + + flips.append(("Number flip (receiver).", flip_obj1_number)) + + # subject number flip + def flip_subj_number(base: SentenceFeatures) -> SentenceFeatures: + new = clone_sf(base) + new.subject = replace(new.subject, plural=not new.subject.plural) + return new + + flips.append(("Number flip (doer).", flip_subj_number)) + + # obj2 number flip when present + def flip_obj2_number(base: SentenceFeatures) -> SentenceFeatures: + if base.obj2 is None: + return base + new = clone_sf(base) + new.obj2 = replace(new.obj2, plural=not new.obj2.plural) + return new + + flips.append(("Number flip (theme).", flip_obj2_number)) + + # gender flip on obj1 when allowed + def flip_obj1_gender(base: SentenceFeatures) -> SentenceFeatures: + new = clone_sf(base) + if new.obj1.noun_id not in {"woman", "girl"}: + return new + new.obj1 = replace(new.obj1, feminine=not new.obj1.feminine) + return new + + flips.append(("Gender flip (receiver).", flip_obj1_gender)) + + # adjective toggle on obj1 + def flip_obj1_adj(base: SentenceFeatures) -> SentenceFeatures: + new = clone_sf(base) + if new.obj1.adjectives: + new.obj1 = replace(new.obj1, adjectives=[]) + else: + new.obj1 = replace(new.obj1, adjectives=["red"]) + return new + + flips.append(("Adjective scope change.", flip_obj1_adj)) + + # role flip on obj1 (recipient vs theme) stays grammatical + def flip_obj1_role(base: SentenceFeatures) -> SentenceFeatures: + new = clone_sf(base) + new_role = THEME if new.obj1.role == RECIPIENT else RECIPIENT + new.obj1 = replace(new.obj1, role=new_role) + return new + + flips.append(("Role flip (receiver/theme).", flip_obj1_role)) + + # irregular flip: toggle use of irregular noun plural or verb past + def flip_irregular(base: SentenceFeatures) -> SentenceFeatures: + new = clone_sf(base) + if new.obj1.noun_id == "boy" and new.obj1.plural: + new.obj1 = replace(new.obj1, use_irregular=not new.obj1.use_irregular) + elif new.verb_id == "chase" and new.tense == "PAST": + new = replace(new, use_irregular_verb=not new.use_irregular_verb) + return new + + flips.append(("Irregular vs regular.", flip_irregular)) + + # apply flips in shuffled order to diversify + rng.shuffle(flips) + for expl, fn in flips: + if len(distractors) >= 3: + break + add_if_valid(fn(sf), expl) + # if still short, retry flips on shuffled order (different seeds) until filled or attempts exhausted + attempts = 0 + while len(distractors) < 3 and attempts < 20: + expl, fn = rng.choice(flips) + add_if_valid(fn(sf), expl) + attempts += 1 + return distractors if len(distractors) == 3 else [] + + +# --------------------------------------------------------------------------- +# Item generation + + +def _base_features(spec: LanguageSpec, rng: random.Random, difficulty: str) -> SentenceFeatures: + verb_id = rng.choice(["see", "chase", "give"]) + tense = "PAST" if (difficulty == "late" or rng.random() < 0.4) else "PRES" + + subj = np_features( + noun_id=rng.choice(["man", "woman"]), + role=AGENT, + plural=difficulty != "early" and rng.random() < 0.4, + adjectives=["tall"] if rng.random() < 0.6 else [], + ) + + if verb_id == "give": + obj1 = np_features( + noun_id=rng.choice(["boy", "girl"]), + role=RECIPIENT, + plural=difficulty != "early" and rng.random() < 0.4, + adjectives=["fast"] if rng.random() < 0.4 else [], + ) + obj2 = np_features( + noun_id=rng.choice(["ball", "house"]), + role=THEME, + plural=difficulty == "late" and rng.random() < 0.5, + adjectives=["red"] if rng.random() < 0.6 else [], + ) + else: + obj1 = np_features( + noun_id=rng.choice(["boy", "girl", "man", "woman"]), + role=RECIPIENT, + plural=difficulty != "early" and rng.random() < 0.5, + adjectives=["red"] if rng.random() < 0.6 else [], + ) + obj2 = None + + return sentence_features(verb_id=verb_id, tense=tense, subj=subj, obj1=obj1, obj2=obj2) + + +def _difficulty_score(sf: SentenceFeatures, irregular: bool) -> float: + score = 0 + for np in [sf.subject, sf.obj1] + ([sf.obj2] if sf.obj2 else []): + score += 1 if np.plural else 0 + score += 1 if np.feminine else 0 + score += len(np.adjectives) + if sf.obj2: + score += 1 + if irregular: + score += 1 + if score <= 2: + return 0.2 + if score <= 4: + return 0.5 + return 0.8 + + +def generate_item( + spec: LanguageSpec, + concepts: List[str], + section_id: str, + item_type: str, + rng: random.Random, + difficulty: str = "mid", + sf_override: Optional[SentenceFeatures] = None, +) -> Question: + sf = sf_override or _base_features(spec, rng, difficulty) + correct_text = realize_sentence(spec, sf) + gloss = english_gloss(sf) + distractors = build_distractors(spec, sf, rng) + options = [Option(label="", text=correct_text, is_correct=True, explanation="Correct", features=sf)] + distractors + # ensure uniqueness + texts = set() + unique_options = [] + for opt in options: + if opt.text in texts: + continue + texts.add(opt.text) + unique_options.append(opt) + options = unique_options[:4] + rng.shuffle(options) + labels = ["A", "B", "C", "D"] + for i, opt in enumerate(options): + opt.label = labels[i] + + if item_type == TRANSLATE_TO_LANG: + stem = f"Translate into the language: {gloss}" + elif item_type == TRANSLATE_FROM_LANG: + stem = f"What does this sentence mean? {correct_text}" + else: + stem = f"Use the rules to choose the correct sentence. Target meaning: {gloss}" + + irregular = (sf.obj1.noun_id == "boy" and sf.obj1.plural) or (sf.verb_id == "chase" and sf.tense == "PAST") + difficulty_score = _difficulty_score(sf, irregular=irregular) + + return Question( + id=f"{section_id}_{rng.randrange(10_000)}", + item_type=item_type, + section_id=section_id, + concepts=concepts, + stem=stem, + options=options, + difficulty_score=difficulty_score, + ) + + +# --------------------------------------------------------------------------- +# Test orchestration + + +def generate_test( + spec: LanguageSpec, blueprint: TestBlueprint, concepts: Dict[str, Concept], rng: random.Random, seed: int | None = None +) -> Dict: + sections_out = [] + question_counter = 1 + item_map = [] + irregular_noun_slots = {8, 12, 16, 20, 24, 28} + irregular_verb_slots = {10, 14, 18, 22, 26, 32} + ditransitive_slots = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30} + fem_plural_slots = {5, 11, 17, 23, 29} + + for section in blueprint.sections: + questions: List[Question] = [] + section_intro = [render_concept_explanation(concepts[cid], spec) for cid in section.introduce_concepts] + + idx = 0 + while len(questions) < section.num_items: + item_type = section.item_types[idx % len(section.item_types)] + idx += 1 + current_number = question_counter + len(questions) + difficulty_tag = "early" if current_number <= 8 else "mid" if current_number <= 24 else "late" + + sf_override: Optional[SentenceFeatures] = None + if current_number in irregular_noun_slots: + base = _base_features(spec, rng, difficulty_tag) + base.obj1.noun_id = "boy" + base.obj1.feminine = False + base.obj1.plural = True # letul + sf_override = base + elif current_number in irregular_verb_slots: + base = _base_features(spec, rng, difficulty_tag) + base.verb_id = "chase" + base.tense = "PAST" # rontmimu + base.obj2 = None + sf_override = base + elif current_number in fem_plural_slots: + base = _base_features(spec, rng, difficulty_tag) + base.obj1 = np_features( + noun_id=rng.choice(["woman", "girl"]), + role=RECIPIENT, + feminine=True, + plural=True, + adjectives=base.obj1.adjectives or ["red"], + ) + sf_override = base + elif current_number in ditransitive_slots: + base = _base_features(spec, rng, difficulty_tag) + base.verb_id = "give" + # ensure ditransitive objects are well formed + base.obj2 = np_features( + noun_id=rng.choice(["ball", "house"]), + role=THEME, + plural=difficulty_tag == "late" and rng.random() < 0.5, + adjectives=["red"] if rng.random() < 0.6 else [], + ) + base.obj1.role = RECIPIENT + sf_override = base + + q = generate_item( + spec, section.focus_concepts, section.id, item_type, rng, difficulty=difficulty_tag, sf_override=sf_override + ) + # enforce invariants: one correct, 4 unique options + if not question_valid(q, spec): + continue + questions.append(q) + item_map.append( + { + "number": current_number, + "stem": q.stem, + "difficulty": difficulty_tag, + "constructs": q.concepts, + } + ) + + questions_dicts = [] + for q in questions: + q_dict = asdict(q) + q_dict["number"] = question_counter + question_counter += 1 + questions_dicts.append(q_dict) + + sections_out.append( + { + "id": section.id, + "introduce_concepts": section.introduce_concepts, + "intro_text": section_intro, + "questions": questions_dicts, + } + ) + + return { + "meta": { + "version": "0.2", + "description": "Alan's Language Aptitude iNstrument (ALAN)", + "seed": seed, + "dictionary": spec.lexicon, + "instructions": ( + "Read each short rule and the examples that follow it. Words may get small bits at " + "the start or end to show who does what or when it happens—copy these patterns from " + "the examples. You do not need any linguistics background. For each question, pick " + "the best option (A-D). All correct answers keep the order: doer, receiver, verb." + ), + "rules": [ + "Word order: DOER RECEIVER VERB (SOV). For 'give': doer, recipient, theme, verb.", + "Adjectives follow the noun they describe.", + "Prefix stacking: na (receiver) + mem (feminine) + leko (plural) + noun; doer adds suffix mur.", + "Feminine plural: memleko + noun (e.g., memlekorema).", + "Irregulars: verb 'ror' past = 'rontmimu'; plural of 'tul' = 'letul'.", + "Receiver marker na- applies to the whole noun phrase (e.g., namemlekorema).", + "Past tense: verb takes suffix 'mimu' unless irregular.", + ], + "item_map": item_map, + }, + "sections": sections_out, + } diff --git a/validate_test.py b/validate_test.py new file mode 100644 index 0000000..4c95664 --- /dev/null +++ b/validate_test.py @@ -0,0 +1,68 @@ +"""Validator for ALAN grammar and test invariants.""" +from __future__ import annotations + +import json +import sys +from typing import Dict + +from language_spec import ( + generate_language_instance, + SentenceFeatures, + NPFeature, + AGENT, + RECIPIENT, + THEME, + realize_sentence, +) + + +def load(path: str) -> Dict: + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def has_one_correct(question: Dict) -> bool: + return sum(1 for o in question.get("options", []) if o.get("is_correct")) == 1 + + +def unique_options(question: Dict) -> bool: + texts = [o["text"] for o in question.get("options", [])] + return len(texts) == len(set(texts)) + + +def validate_option(text: str, sf: SentenceFeatures, spec) -> bool: + return text == realize_sentence(spec, sf) + + +def parse_surface(text: str) -> SentenceFeatures | None: + # Not a full parser—reject parsing; rely on generation path for correctness. + return None + + +def validate_test(data: Dict) -> int: + errors = 0 + spec = generate_language_instance() + questions = [q for s in data.get("sections", []) for q in s.get("questions", [])] + if len(questions) != 32: + print(f"ERROR: expected 32 questions, found {len(questions)}") + errors += 1 + for q in questions: + if not has_one_correct(q): + print(f"ERROR: question {q.get('number')} has !=1 correct option") + errors += 1 + if not unique_options(q): + print(f"ERROR: question {q.get('number')} has duplicate option text") + errors += 1 + if errors == 0: + print("Validation passed.") + else: + print(f"Validation failed with {errors} issue(s).") + return errors + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python validate_test.py generated_test.json") + sys.exit(1) + data = load(sys.argv[1]) + sys.exit(validate_test(data))