"""Unified item and test generator using deterministic backtracking."""
from __future__ import annotations
from dataclasses import dataclass, asdict, replace
from typing import Dict, List, Optional, Set
import random
import os
import time
from language_spec import (
LanguageSpec,
SentenceFeatures,
NPFeature,
AGENT,
RECIPIENT,
THEME,
realize_sentence,
english_gloss,
)
from semantic import meanings_equal, to_meaning, semantic_distance
from grammar_check import is_grammatical
from test_blueprint import TestBlueprint, Concept, TRANSLATE_TO_LANG, TRANSLATE_FROM_LANG
from property_tests import (
MIN_IRREG_USE,
MIN_DITRANSITIVE,
MIN_PLURAL_ITEMS,
MIN_ADJECTIVE_ITEMS,
MIN_FEM_PLURAL_ITEMS,
)
# ---------------------------------------------------------------------------
# Data structures
@dataclass
class Option:
label: str
text: str
is_correct: bool
explanation: str
features: SentenceFeatures
@dataclass
class Question:
id: str
item_type: str
section_id: str
concepts: List[str]
stem: str
options: List[Option]
difficulty_score: float
def question_valid(q: Question, spec: LanguageSpec) -> bool:
if len(q.options) != 4:
return False
if sum(opt.is_correct for opt in q.options) != 1:
return False
meanings = []
glosses = []
correct_meaning = None
for opt in q.options:
m = to_meaning(opt.features)
g = english_gloss(opt.features)
if opt.is_correct:
correct_meaning = m
for existing in meanings:
if meanings_equal(m, existing):
return False
if g in glosses:
return False
meanings.append(m)
glosses.append(g)
if not is_grammatical(asdict(opt), spec):
return False
if correct_meaning is None:
return False
for opt in q.options:
if opt.is_correct:
continue
dist = semantic_distance(to_meaning(opt.features), correct_meaning)
if dist != 1:
return False
return True
# ---------------------------------------------------------------------------
# Feature utilities
def np_features(
noun_id: str,
role: str,
feminine: Optional[bool] = None,
plural: bool = False,
adjectives: Optional[List[str]] = None,
use_irregular: bool = True,
) -> NPFeature:
feminine_default = noun_id in {"woman", "girl"}
fem = feminine_default if feminine is None else feminine
if noun_id not in {"woman", "girl"}:
fem = False
return NPFeature(
noun_id=noun_id,
feminine=fem,
plural=plural,
adjectives=adjectives or [],
role=role,
use_irregular=use_irregular,
)
def sentence_features(
verb_id: str,
tense: str,
subj: NPFeature,
obj1: NPFeature,
obj2: Optional[NPFeature] = None,
use_irregular_verb: bool = True,
) -> SentenceFeatures:
return SentenceFeatures(subject=subj, obj1=obj1, obj2=obj2, verb_id=verb_id, tense=tense, use_irregular_verb=use_irregular_verb)
def section_constraints(unlocked: set[str]):
allow_plural = "NOUN_NUMBER_MARKING" in unlocked
allow_feminine = "NOUN_GENDER_MARKING" in unlocked
allow_past = "VERB_TENSE_MARKING" in unlocked
allow_ditransitive = allow_plural or allow_feminine
allow_irregulars = allow_past or allow_plural
allow_adjectives = "NP_ORDER" in unlocked
def allowed_people(include_feminine: bool) -> List[str]:
base = ["man", "boy"]
if include_feminine:
base += ["woman", "girl"]
return base
return type(
"Cons",
(),
{
"allowed_verbs": ["see", "chase"] + (["give"] if allow_ditransitive else []),
"allowed_agent_nouns": allowed_people(allow_feminine),
"allowed_recipient_nouns": allowed_people(allow_feminine),
"allowed_theme_nouns": ["ball", "house"],
"allow_plural": allow_plural,
"allow_feminine": allow_feminine,
"allow_past": allow_past,
"allow_ditransitive": allow_ditransitive,
"allow_irregulars": allow_irregulars,
"allow_adjectives": allow_adjectives,
},
)()
# ---------------------------------------------------------------------------
# Distractors and item generation
def build_distractors(spec: LanguageSpec, sf: SentenceFeatures, rng: random.Random) -> List[Option]:
target_meaning = to_meaning(sf)
correct_text = realize_sentence(spec, sf)
seen_surfaces = {correct_text}
seen_meanings = {target_meaning}
distractors: List[Option] = []
def clone_sf(orig: SentenceFeatures) -> SentenceFeatures:
return SentenceFeatures(
subject=replace(orig.subject),
obj1=replace(orig.obj1),
obj2=replace(orig.obj2) if orig.obj2 else None,
verb_id=orig.verb_id,
tense=orig.tense,
use_irregular_verb=orig.use_irregular_verb,
)
def add_if_valid(cand_sf: SentenceFeatures, explanation: str) -> None:
nonlocal distractors
text = realize_sentence(spec, cand_sf)
meaning = to_meaning(cand_sf)
dist = semantic_distance(meaning, target_meaning)
if text in seen_surfaces:
return
if any(meanings_equal(meaning, m) for m in seen_meanings):
return
if dist != 1:
return
opt = Option(label="", text=text, is_correct=False, explanation=explanation, features=cand_sf)
if not is_grammatical(asdict(opt), spec):
return
seen_surfaces.add(text)
seen_meanings.add(meaning)
distractors.append(opt)
flips = []
flips.append(("Tense flip.", lambda base: replace(base, tense="PAST" if base.tense == "PRES" else "PRES")))
def flip_obj1_number(base: SentenceFeatures) -> SentenceFeatures:
new = clone_sf(base)
new.obj1 = replace(new.obj1, plural=not new.obj1.plural)
return new
flips.append(("Number flip (receiver).", flip_obj1_number))
def flip_subj_number(base: SentenceFeatures) -> SentenceFeatures:
new = clone_sf(base)
new.subject = replace(new.subject, plural=not new.subject.plural)
return new
flips.append(("Number flip (doer).", flip_subj_number))
def flip_obj2_number(base: SentenceFeatures) -> SentenceFeatures:
if base.obj2 is None:
return base
new = clone_sf(base)
new.obj2 = replace(new.obj2, plural=not new.obj2.plural)
return new
flips.append(("Number flip (theme).", flip_obj2_number))
def flip_obj1_gender(base: SentenceFeatures) -> SentenceFeatures:
new = clone_sf(base)
if new.obj1.noun_id not in {"woman", "girl"}:
return new
new.obj1 = replace(new.obj1, feminine=not new.obj1.feminine)
return new
flips.append(("Gender flip (receiver).", flip_obj1_gender))
def flip_obj1_adj(base: SentenceFeatures) -> SentenceFeatures:
new = clone_sf(base)
if new.obj1.adjectives:
new.obj1 = replace(new.obj1, adjectives=[])
else:
new.obj1 = replace(new.obj1, adjectives=["red"])
return new
flips.append(("Adjective scope change.", flip_obj1_adj))
def flip_obj1_role(base: SentenceFeatures) -> SentenceFeatures:
new = clone_sf(base)
new_role = THEME if new.obj1.role == RECIPIENT else RECIPIENT
new.obj1 = replace(new.obj1, role=new_role)
return new
flips.append(("Role flip (receiver/theme).", flip_obj1_role))
def flip_irregular(base: SentenceFeatures) -> SentenceFeatures:
new = clone_sf(base)
if new.obj1.noun_id == "boy" and new.obj1.plural:
new.obj1 = replace(new.obj1, use_irregular=not new.obj1.use_irregular)
elif new.verb_id == "chase" and new.tense == "PAST":
new = replace(new, use_irregular_verb=not new.use_irregular_verb)
return new
flips.append(("Irregular vs regular.", flip_irregular))
rng.shuffle(flips)
for expl, fn in flips:
if len(distractors) >= 3:
break
add_if_valid(fn(sf), expl)
attempts = 0
while len(distractors) < 3 and attempts < 20:
expl, fn = rng.choice(flips)
add_if_valid(fn(sf), expl)
attempts += 1
return distractors if len(distractors) == 3 else []
def _base_features(cons, rng: random.Random) -> SentenceFeatures:
verb_id = rng.choice(cons.allowed_verbs)
tense = "PAST" if (cons.allow_past and rng.random() < 0.4) else "PRES"
subj = np_features(
noun_id=rng.choice(cons.allowed_agent_nouns),
role=AGENT,
plural=cons.allow_plural and rng.random() < 0.4,
adjectives=["tall"] if (cons.allow_adjectives and rng.random() < 0.6) else [],
)
if verb_id == "give":
obj1 = np_features(
noun_id=rng.choice(cons.allowed_recipient_nouns),
role=RECIPIENT,
plural=cons.allow_plural and rng.random() < 0.4,
adjectives=["fast"] if (cons.allow_adjectives and rng.random() < 0.4) else [],
)
obj2 = np_features(
noun_id=rng.choice(cons.allowed_theme_nouns),
role=THEME,
plural=cons.allow_plural and rng.random() < 0.5,
adjectives=["red"] if (cons.allow_adjectives and rng.random() < 0.6) else [],
)
else:
obj1 = np_features(
noun_id=rng.choice(cons.allowed_recipient_nouns),
role=RECIPIENT,
plural=cons.allow_plural and rng.random() < 0.5,
adjectives=["red"] if (cons.allow_adjectives and rng.random() < 0.6) else [],
)
obj2 = None
return sentence_features(verb_id=verb_id, tense=tense, subj=subj, obj1=obj1, obj2=obj2)
def generate_item(
spec: LanguageSpec,
concepts: List[str],
section_id: str,
item_type: str,
rng: random.Random,
sf_override: Optional[SentenceFeatures] = None,
constraints=None,
) -> Question:
cons = constraints
sf = sf_override or _base_features(cons, rng)
correct_text = realize_sentence(spec, sf)
gloss = english_gloss(sf)
distractors = build_distractors(spec, sf, rng)
options = [Option(label="", text=correct_text, is_correct=True, explanation="Correct", features=sf)] + distractors
texts = set()
unique_options = []
for opt in options:
if opt.text in texts:
continue
texts.add(opt.text)
unique_options.append(opt)
options = unique_options[:4]
rng.shuffle(options)
labels = ["A", "B", "C", "D"]
for i, opt in enumerate(options):
opt.label = labels[i]
if item_type == TRANSLATE_TO_LANG:
stem = f"Translate into the language: {gloss}"
elif item_type == TRANSLATE_FROM_LANG:
stem = f"What does this sentence mean? {correct_text}"
else:
stem = f"Use the rules to choose the correct sentence. Target meaning: {gloss}"
return Question(
id=f"{section_id}_{rng.randrange(10_000)}",
item_type=item_type,
section_id=section_id,
concepts=concepts,
stem=stem,
options=options,
difficulty_score=0.5,
)
# ---------------------------------------------------------------------------
# Backtracking synthesis
def _question_attrs(q) -> Dict[str, bool]:
correct = next(o for o in q.options if o.is_correct)
feats = correct.features
clue_keys = [
("AGENT", feats.subject.noun_id, tuple(feats.subject.adjectives)),
("RECIPIENT", feats.obj1.noun_id, tuple(feats.obj1.adjectives)),
]
if feats.obj2:
clue_keys.append(("THEME", feats.obj2.noun_id, tuple(feats.obj2.adjectives)))
return {
"meaning_key": repr(to_meaning(feats)),
"surface": correct.text,
"irreg_noun": feats.obj1.noun_id == "boy" and feats.obj1.plural and feats.obj1.use_irregular,
"irreg_verb": feats.verb_id == "chase" and feats.tense == "PAST" and feats.use_irregular_verb,
"ditransitive": feats.obj2 is not None,
"plural": any(np.plural for np in [feats.subject, feats.obj1] + ([feats.obj2] if feats.obj2 else [])),
"adjective": any(np.adjectives for np in [feats.subject, feats.obj1] + ([feats.obj2] if feats.obj2 else [])),
"fem_plural": feats.obj1.feminine and feats.obj1.plural if hasattr(feats.obj1, "feminine") else False,
"clue_keys": clue_keys,
}
def _generate_candidates_for_section(spec: LanguageSpec, section, unlocked: set[str], rng: random.Random, max_trials: int = 20000) -> List[Dict]:
cons = section_constraints(unlocked)
candidates: List[Dict] = []
seen_meaning: Set[str] = set()
seen_surface: Set[str] = set()
verbs = cons.allowed_verbs
tenses = ["PRES"] + (["PAST"] if cons.allow_past else [])
subj_nouns = cons.allowed_agent_nouns
obj1_nouns = cons.allowed_recipient_nouns
obj2_nouns = cons.allowed_theme_nouns if cons.allow_ditransitive else []
subj_pl_flags = [False, True] if cons.allow_plural else [False]
obj1_pl_flags = [False, True] if cons.allow_plural else [False]
adj_flags = [False, True] if cons.allow_adjectives else [False]
for verb in verbs:
for tense in tenses:
for subj_n in subj_nouns:
for obj1_n in obj1_nouns:
obj2_loop = obj2_nouns if verb == "give" else [None]
for obj2_n in obj2_loop:
for subj_pl in subj_pl_flags:
for obj1_pl in obj1_pl_flags:
for subj_adj_flag in adj_flags:
for obj1_adj_flag in adj_flags:
subj_adj = ["tall"] if subj_adj_flag else []
obj1_adj = ["red"] if obj1_adj_flag else []
subj = np_features(subj_n, AGENT, plural=subj_pl, adjectives=subj_adj)
obj1 = np_features(obj1_n, RECIPIENT, plural=obj1_pl, adjectives=obj1_adj)
obj2 = None
if obj2_n:
obj2 = np_features(obj2_n, THEME, plural=False, adjectives=["red"] if cons.allow_adjectives else [])
sf = sentence_features(verb, tense, subj, obj1, obj2, use_irregular_verb=True)
for item_type in section.item_types:
q = generate_item(
spec,
section.focus_concepts,
section.id,
item_type,
rng,
constraints=cons,
sf_override=sf,
)
if not question_valid(q, spec):
continue
attrs = _question_attrs(q)
if attrs["meaning_key"] in seen_meaning or attrs["surface"] in seen_surface:
continue
seen_meaning.add(attrs["meaning_key"])
seen_surface.add(attrs["surface"])
candidates.append({"question": q, **attrs})
if len(candidates) >= max_trials:
return candidates
return candidates
def _max_possible(attr: str, section_idx: int, section_slots: List[int], pools: List[List[Dict]], used_meaning: Set[str], used_surface: Set[str], chosen: List[List[Dict]]) -> int:
total = 0
for s in range(section_idx, len(pools)):
remaining_slots = section_slots[s] - len(chosen[s])
if remaining_slots <= 0:
continue
avail = [
c
for c in pools[s]
if c["meaning_key"] not in used_meaning and c["surface"] not in used_surface
]
count_attr = sum(1 for c in avail if c[attr])
total += min(remaining_slots, count_attr)
return total
def _backtrack(
section_idx: int,
section_slots: List[int],
pools: List[List[Dict]],
used_meaning: Set[str],
used_surface: Set[str],
clue_counts: Dict[tuple, int],
max_clue_reuse: int,
quotas: Dict[str, int],
chosen: List[List[Dict]],
start_time: float,
max_seconds: Optional[float],
) -> bool:
if max_seconds is not None and (time.time() - start_time) > max_seconds:
return False
if section_idx == len(pools):
return all(v <= 0 for v in quotas.values())
if len(chosen[section_idx]) == section_slots[section_idx]:
return _backtrack(section_idx + 1, section_slots, pools, used_meaning, used_surface, clue_counts, max_clue_reuse, quotas, chosen, start_time, max_seconds)
for attr, remaining in quotas.items():
if remaining <= 0:
continue
max_avail = _max_possible(attr, section_idx, section_slots, pools, used_meaning, used_surface, chosen)
if remaining > max_avail:
return False
avail = [
c
for c in pools[section_idx]
if c["meaning_key"] not in used_meaning
and c["surface"] not in used_surface
and all(clue_counts.get(k, 0) < max_clue_reuse for k in c["clue_keys"])
]
if not avail:
return False
def score(c):
return sum(1 for attr, rem in quotas.items() if rem > 0 and c[attr])
avail.sort(key=score, reverse=True)
avail = avail[:100]
for cand in avail:
new_quotas = quotas.copy()
for attr in ["irreg_noun", "irreg_verb", "ditransitive", "plural", "adjective", "fem_plural"]:
if cand[attr]:
new_quotas[attr] = max(0, new_quotas[attr] - 1)
used_meaning.add(cand["meaning_key"])
used_surface.add(cand["surface"])
for k in cand["clue_keys"]:
clue_counts[k] = clue_counts.get(k, 0) + 1
chosen[section_idx].append(cand)
if _backtrack(section_idx, section_slots, pools, used_meaning, used_surface, clue_counts, max_clue_reuse, new_quotas, chosen, start_time, max_seconds):
return True
chosen[section_idx].pop()
used_meaning.remove(cand["meaning_key"])
used_surface.remove(cand["surface"])
for k in cand["clue_keys"]:
clue_counts[k] = clue_counts.get(k, 0) - 1
return False
def generate_test(
spec: LanguageSpec,
blueprint: TestBlueprint,
concepts: Dict[str, Concept],
rng: random.Random,
seed: int | None = None,
git_sha: str | None = None,
hardness: str = "medium",
) -> Dict:
max_seconds = float(os.environ.get("BACKTRACK_TIMEOUT", "20"))
profiles = {
"easy": {"irreg": 6, "ditransitive": 8, "plural": 12, "adjective": 12, "fem_plural": 6, "max_clue_reuse": 10},
"medium": {"irreg": 6, "ditransitive": 8, "plural": 12, "adjective": 12, "fem_plural": 6, "max_clue_reuse": 10},
"hard": {"irreg": 7, "ditransitive": 9, "plural": 14, "adjective": 14, "fem_plural": 7, "max_clue_reuse": 8},
"extreme": {"irreg": 8, "ditransitive": 10, "plural": 16, "adjective": 16, "fem_plural": 8, "max_clue_reuse": 6},
}
if hardness not in profiles:
raise SystemExit(f"Unknown hardness '{hardness}'. Choose from {list(profiles.keys())}.")
quotas = profiles[hardness].copy()
max_clue_reuse = quotas.pop("max_clue_reuse")
total_items = sum(section.num_items for section in blueprint.sections)
for k, v in quotas.items():
quotas[k] = min(v, total_items)
quotas["irreg"] = max(quotas["irreg"], MIN_IRREG_USE)
quotas["ditransitive"] = max(quotas["ditransitive"], MIN_DITRANSITIVE)
quotas["plural"] = max(quotas["plural"], MIN_PLURAL_ITEMS)
quotas["adjective"] = max(quotas["adjective"], MIN_ADJECTIVE_ITEMS)
quotas["fem_plural"] = max(quotas["fem_plural"], MIN_FEM_PLURAL_ITEMS)
pools: List[List[Dict]] = []
section_slots: List[int] = []
unlocked: set[str] = set()
for section in blueprint.sections:
unlocked |= set(section.introduce_concepts)
cand = _generate_candidates_for_section(spec, section, unlocked, rng)
if len(cand) < section.num_items:
raise SystemExit(f"Insufficient candidates for section {section.id}")
rng.shuffle(cand)
pools.append(cand)
section_slots.append(section.num_items)
quotas_expanded = {
"irreg_noun": quotas["irreg"],
"irreg_verb": quotas["irreg"],
"ditransitive": quotas["ditransitive"],
"plural": quotas["plural"],
"adjective": quotas["adjective"],
"fem_plural": quotas["fem_plural"],
}
used_meaning: Set[str] = set()
used_surface: Set[str] = set()
chosen: List[List[Dict]] = [[] for _ in pools]
clue_counts: Dict[tuple, int] = {}
start_time = time.time()
if not _backtrack(0, section_slots, pools, used_meaning, used_surface, clue_counts, max_clue_reuse, quotas_expanded, chosen, start_time, max_seconds):
raise SystemExit("Backtracking generator could not satisfy quotas.")
sections_out = []
question_counter = 1
unlocked = set()
for sec_idx, section in enumerate(blueprint.sections):
unlocked |= set(section.introduce_concepts)
section_intro = [f"{concepts[cid].description_en}" for cid in section.introduce_concepts]
questions = []
for cand in chosen[sec_idx]:
q = cand["question"]
qd = asdict(q)
qd["number"] = question_counter
question_counter += 1
questions.append(qd)
sections_out.append(
{
"id": section.id,
"introduce_concepts": section.introduce_concepts,
"intro_text": section_intro,
"questions": questions,
}
)
proword = {"easy": "MAPLE", "medium": "CEDAR", "hard": "CYPRESS", "extreme": "SEQUOIA"}[hardness]
meta = {
"version": "backtrack-0.2",
"description": "Alan's Language Aptitude iNstrument (ALAN) synthesized via backtracking search",
"seed": seed,
"git_sha": git_sha,
"generation_params": {"profile": proword},
"dictionary": spec.lexicon,
"instructions": (
"You will see a brief dictionary, a handful of rules, and examples. Words may take small "
"prefixes or suffixes to mark who does what or when it happens—copy these patterns from the examples. "
"You do not need linguistics training; apply the rules logically. In every question exactly one option (A–D) "
"matches the target meaning. Correct answers always follow the stated word order: doer (subject), receiver (object), verb."
),
"rules": [
"Word order: DOER RECEIVER VERB (SOV). For 'give': doer, recipient, theme, verb.",
"Adjectives follow the noun they describe.",
"Prefix stacking: na (receiver) + mem (feminine) + leko (plural) + noun; doer adds suffix mur.",
"Feminine plural: memleko + noun (e.g., memlekorema).",
"Irregulars: verb 'ror' past = 'rontmimu'; plural of 'tul' = 'letul'.",
"Receiver marker na- applies to the whole noun phrase (e.g., namemlekorema).",
"Past tense: verb takes suffix 'mimu' unless irregular.",
],
}
test_dict = {"meta": meta, "sections": sections_out}
return test_dict