"""Canonical grammar spec and validator for ALAN."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Optional
AGENT = "AGENT"
RECIPIENT = "RECIPIENT" # OBJ1
THEME = "THEME" # OBJ2
@dataclass
class NPFeature:
noun_id: str
feminine: bool
plural: bool
adjectives: List[str]
role: str # AGENT/RECIPIENT/THEME
use_irregular: bool = True
@dataclass
class SentenceFeatures:
subject: NPFeature
obj1: NPFeature
obj2: Optional[NPFeature]
verb_id: str
tense: str # PRES/PAST
use_irregular_verb: bool = True
@dataclass
class LanguageSpec:
lexicon: Dict[str, Dict[str, str]]
irregular_verbs: Dict[str, Dict[str, str]]
irregular_noun_plurals: Dict[str, str]
def generate_language_instance(seed: int | None = None) -> LanguageSpec:
lexicon = {
"nouns": {
"man": "po",
"woman": "rema",
"boy": "tul",
"girl": "siv",
"ball": "kob",
"house": "vut",
},
"verbs": {"see": "dak", "give": "mep", "chase": "ror"},
"adjectives": {"tall": "sar", "red": "lin", "big": "mod", "fast": "par"},
}
irregular_verbs = {"chase": {"PAST": "rontmimu"}}
irregular_noun_plurals = {"boy": "letul"}
return LanguageSpec(
lexicon=lexicon,
irregular_verbs=irregular_verbs,
irregular_noun_plurals=irregular_noun_plurals,
)
FEMININE_NOUNS = {"woman", "girl"}
def _plural_form(noun_id: str, spec: LanguageSpec, feminine: bool, use_irregular: bool) -> str:
if use_irregular and noun_id in spec.irregular_noun_plurals:
return spec.irregular_noun_plurals[noun_id]
stem = spec.lexicon["nouns"][noun_id]
if feminine and noun_id in FEMININE_NOUNS:
return "memleko" + stem
return "leko" + stem
def _noun_form(np: NPFeature, spec: LanguageSpec) -> str:
base = spec.lexicon["nouns"][np.noun_id]
fem = np.feminine and np.noun_id in FEMININE_NOUNS
if np.plural:
form = _plural_form(np.noun_id, spec, fem, np.use_irregular)
else:
form = ("mem" if fem else "") + base
# receiver marker wraps whole NP
if np.role != AGENT:
form = "na" + form
# doer suffix
if np.role == AGENT:
form = form + "mur"
# adjectives after noun
if np.adjectives:
adj_forms = [spec.lexicon["adjectives"][a] for a in np.adjectives]
form = f"{form} {' '.join(adj_forms)}"
return form
def realize_sentence(spec: LanguageSpec, sf: SentenceFeatures) -> str:
parts = [
_noun_form(sf.subject, spec),
_noun_form(sf.obj1, spec),
]
if sf.obj2:
parts.append(_noun_form(sf.obj2, spec))
verb_stem = spec.lexicon["verbs"][sf.verb_id]
irregular = spec.irregular_verbs.get(sf.verb_id, {}).get(sf.tense)
if irregular and sf.use_irregular_verb:
verb_form = irregular
else:
verb_form = verb_stem if sf.tense == "PRES" else verb_stem + "mimu"
parts.append(verb_form)
return " ".join(parts)
def english_gloss(sf: SentenceFeatures) -> str:
def np_gloss(np: NPFeature) -> str:
noun = np.noun_id
if np.feminine and np.noun_id in FEMININE_NOUNS:
noun = {"man": "woman", "boy": "girl"}.get(noun, noun)
adj = " ".join(np.adjectives)
phrase = f"{adj} {noun}".strip()
if np.plural:
phrase += "s"
role_note = ""
if np.feminine and np.plural and np.noun_id in FEMININE_NOUNS:
role_note = " (feminine plural)"
elif np.plural:
role_note = " (plural)"
elif np.feminine and np.noun_id in FEMININE_NOUNS:
role_note = " (feminine)"
return f"the {phrase}{role_note}".strip()
subj = np_gloss(sf.subject)
obj1 = np_gloss(sf.obj1)
verb = sf.verb_id
if verb == "give" and sf.obj2:
obj2 = np_gloss(sf.obj2)
verb_en = "gave" if sf.tense == "PAST" else "gives"
return f"{subj} {verb_en} {obj2} to {obj1} ({'past' if sf.tense=='PAST' else 'present'})"
verb_en = {
("see", "PRES"): "sees",
("see", "PAST"): "saw",
("chase", "PRES"): "chases",
("chase", "PAST"): "chased",
("give", "PRES"): "gives",
("give", "PAST"): "gave",
}.get((verb, sf.tense), f"{verb}s")
return f"{subj} {verb_en} {obj1} ({'past' if sf.tense=='PAST' else 'present'})"
def validate_sentence_surface(sf: SentenceFeatures, sentence: str, spec: LanguageSpec) -> bool:
return sentence.strip() == realize_sentence(spec, sf)