| author | Alan Dipert
<alan@dipert.org> 2025-12-04 04:21:42 UTC |
| committer | Alan Dipert
<alan@dipert.org> 2025-12-04 04:21:42 UTC |
| parent | ff9d1230a7867e9492d0a9f874f2db770582fbd0 |
| main.py | +5 | -1 |
| test_generator.py | +68 | -1 |
diff --git a/main.py b/main.py index c9009e5..2632ca5 100644 --- a/main.py +++ b/main.py @@ -23,6 +23,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--min-plural", type=int, default=12, help="Minimum plural-bearing items.") parser.add_argument("--min-adjective", type=int, default=12, help="Minimum adjective-bearing items.") parser.add_argument("--min-fem-plural", type=int, default=6, help="Minimum feminine-plural items.") + parser.add_argument("--min-feature-load", type=int, default=1, help="Minimum feature load per item (hardness).") return parser.parse_args() @@ -41,16 +42,19 @@ def main() -> None: "min_plural": args.min_plural, "min_adjective": args.min_adjective, "min_fem_plural": args.min_fem_plural, + "min_feature_load": args.min_feature_load, } for attempt in range(max_attempts): rng = random.Random(actual_seed + attempt) spec = generate_language_instance(actual_seed + attempt) test_dict = generate_test(spec, blueprint, concepts, rng, seed=actual_seed + attempt, params=params) - if validate_data(test_dict, spec, overrides=params): + if validate_data(test_dict, spec, overrides=params, quiet=True): with open(args.out_path, "w", encoding="utf-8") as f: json.dump(test_dict, f, indent=2) print(f"Generated test JSON at {args.out_path} (seed {actual_seed + attempt})") + # final visible validation for transparency + validate_data(test_dict, spec, overrides=params, quiet=False) return raise SystemExit(f"Property tests failed after {max_attempts} attempts; test not written.") diff --git a/test_generator.py b/test_generator.py index 9e19dbc..74b353a 100644 --- a/test_generator.py +++ b/test_generator.py @@ -532,6 +532,64 @@ def _difficulty_score(sf: SentenceFeatures, irregular: bool) -> float: return 0.8 +def _feature_load(sf: SentenceFeatures) -> int: + load = 0 + for np in [sf.subject, sf.obj1] + ([sf.obj2] if sf.obj2 else []): + load += 1 if np.plural else 0 + load += len(np.adjectives) + load += 1 if (np.feminine and np.noun_id in {"woman", "girl"}) else 0 + load += 1 if sf.tense == "PAST" else 0 + load += 1 if (sf.verb_id == "chase" and sf.tense == "PAST" and sf.use_irregular_verb) else 0 + if sf.obj1.noun_id == "boy" and sf.obj1.plural and sf.obj1.use_irregular: + load += 1 + return load + + +def _boost_feature_load(sf: SentenceFeatures, target: int) -> SentenceFeatures: + """Increase feature load deterministically without breaking rules.""" + current = _feature_load(sf) + if current >= target: + return sf + subj = sf.subject + obj1 = sf.obj1 + obj2 = sf.obj2 + verb_id = sf.verb_id + tense = sf.tense + use_irregular_verb = sf.use_irregular_verb + + def add_adj(np: NPFeature, adj: str) -> NPFeature: + if adj in np.adjectives: + return np + return NPFeature(np.noun_id, np.feminine, np.plural, np.adjectives + [adj], np.role, np.use_irregular) + + steps = [ + lambda: ("tense", None), + lambda: ("obj1_adj", None), + lambda: ("subj_adj", None), + lambda: ("obj1_plural", None), + lambda: ("subj_plural", None), + lambda: ("obj2_adj", None), + ] + for _ in range(12): + current = _feature_load(SentenceFeatures(subj, obj1, obj2, verb_id, tense, use_irregular_verb)) + if current >= target: + break + step_name, _ = steps[_ % len(steps)]() + if step_name == "tense" and tense == "PRES": + tense = "PAST" + elif step_name == "obj1_adj": + obj1 = add_adj(obj1, "red") + elif step_name == "subj_adj": + subj = add_adj(subj, "tall") + elif step_name == "obj1_plural" and not obj1.plural: + obj1 = NPFeature(obj1.noun_id, obj1.feminine, True, obj1.adjectives, obj1.role, obj1.use_irregular) + elif step_name == "subj_plural" and not subj.plural: + subj = NPFeature(subj.noun_id, subj.feminine, True, subj.adjectives, subj.role, subj.use_irregular) + elif step_name == "obj2_adj" and obj2: + obj2 = add_adj(obj2, "red") + return SentenceFeatures(subj, obj1, obj2, verb_id, tense, use_irregular_verb) + + def generate_item( spec: LanguageSpec, concepts: List[str], @@ -540,8 +598,10 @@ def generate_item( rng: random.Random, difficulty: str = "mid", sf_override: Optional[SentenceFeatures] = None, + min_feature_load: int = 1, ) -> Question: sf = sf_override or _base_features(spec, rng, difficulty) + sf = _boost_feature_load(sf, min_feature_load) correct_text = realize_sentence(spec, sf) gloss = english_gloss(sf) distractors = build_distractors(spec, sf, rng) @@ -632,7 +692,14 @@ def generate_test( sf_override, delta = _planned_features(spec, rng, difficulty_tag, remaining.copy(), current_number) q = generate_item( - spec, section.focus_concepts, section.id, item_type, rng, difficulty=difficulty_tag, sf_override=sf_override + spec, + section.focus_concepts, + section.id, + item_type, + rng, + difficulty=difficulty_tag, + sf_override=sf_override, + min_feature_load=cfg.get("min_feature_load", 1), ) # enforce invariants: one correct, 4 unique options if not question_valid(q, spec):