git » alan.git » commit cec2db1

Add configurable minimum feature load to control hardness

author Alan Dipert
2025-12-04 04:21:42 UTC
committer Alan Dipert
2025-12-04 04:21:42 UTC
parent ff9d1230a7867e9492d0a9f874f2db770582fbd0

Add configurable minimum feature load to control hardness

main.py +5 -1
test_generator.py +68 -1

diff --git a/main.py b/main.py
index c9009e5..2632ca5 100644
--- a/main.py
+++ b/main.py
@@ -23,6 +23,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--min-plural", type=int, default=12, help="Minimum plural-bearing items.")
     parser.add_argument("--min-adjective", type=int, default=12, help="Minimum adjective-bearing items.")
     parser.add_argument("--min-fem-plural", type=int, default=6, help="Minimum feminine-plural items.")
+    parser.add_argument("--min-feature-load", type=int, default=1, help="Minimum feature load per item (hardness).")
     return parser.parse_args()
 
 
@@ -41,16 +42,19 @@ def main() -> None:
         "min_plural": args.min_plural,
         "min_adjective": args.min_adjective,
         "min_fem_plural": args.min_fem_plural,
+        "min_feature_load": args.min_feature_load,
     }
 
     for attempt in range(max_attempts):
         rng = random.Random(actual_seed + attempt)
         spec = generate_language_instance(actual_seed + attempt)
         test_dict = generate_test(spec, blueprint, concepts, rng, seed=actual_seed + attempt, params=params)
-        if validate_data(test_dict, spec, overrides=params):
+        if validate_data(test_dict, spec, overrides=params, quiet=True):
             with open(args.out_path, "w", encoding="utf-8") as f:
                 json.dump(test_dict, f, indent=2)
             print(f"Generated test JSON at {args.out_path} (seed {actual_seed + attempt})")
+            # final visible validation for transparency
+            validate_data(test_dict, spec, overrides=params, quiet=False)
             return
     raise SystemExit(f"Property tests failed after {max_attempts} attempts; test not written.")
 
diff --git a/test_generator.py b/test_generator.py
index 9e19dbc..74b353a 100644
--- a/test_generator.py
+++ b/test_generator.py
@@ -532,6 +532,64 @@ def _difficulty_score(sf: SentenceFeatures, irregular: bool) -> float:
     return 0.8
 
 
+def _feature_load(sf: SentenceFeatures) -> int:
+    load = 0
+    for np in [sf.subject, sf.obj1] + ([sf.obj2] if sf.obj2 else []):
+        load += 1 if np.plural else 0
+        load += len(np.adjectives)
+        load += 1 if (np.feminine and np.noun_id in {"woman", "girl"}) else 0
+    load += 1 if sf.tense == "PAST" else 0
+    load += 1 if (sf.verb_id == "chase" and sf.tense == "PAST" and sf.use_irregular_verb) else 0
+    if sf.obj1.noun_id == "boy" and sf.obj1.plural and sf.obj1.use_irregular:
+        load += 1
+    return load
+
+
+def _boost_feature_load(sf: SentenceFeatures, target: int) -> SentenceFeatures:
+    """Increase feature load deterministically without breaking rules."""
+    current = _feature_load(sf)
+    if current >= target:
+        return sf
+    subj = sf.subject
+    obj1 = sf.obj1
+    obj2 = sf.obj2
+    verb_id = sf.verb_id
+    tense = sf.tense
+    use_irregular_verb = sf.use_irregular_verb
+
+    def add_adj(np: NPFeature, adj: str) -> NPFeature:
+        if adj in np.adjectives:
+            return np
+        return NPFeature(np.noun_id, np.feminine, np.plural, np.adjectives + [adj], np.role, np.use_irregular)
+
+    steps = [
+        lambda: ("tense", None),
+        lambda: ("obj1_adj", None),
+        lambda: ("subj_adj", None),
+        lambda: ("obj1_plural", None),
+        lambda: ("subj_plural", None),
+        lambda: ("obj2_adj", None),
+    ]
+    for _ in range(12):
+        current = _feature_load(SentenceFeatures(subj, obj1, obj2, verb_id, tense, use_irregular_verb))
+        if current >= target:
+            break
+        step_name, _ = steps[_ % len(steps)]()
+        if step_name == "tense" and tense == "PRES":
+            tense = "PAST"
+        elif step_name == "obj1_adj":
+            obj1 = add_adj(obj1, "red")
+        elif step_name == "subj_adj":
+            subj = add_adj(subj, "tall")
+        elif step_name == "obj1_plural" and not obj1.plural:
+            obj1 = NPFeature(obj1.noun_id, obj1.feminine, True, obj1.adjectives, obj1.role, obj1.use_irregular)
+        elif step_name == "subj_plural" and not subj.plural:
+            subj = NPFeature(subj.noun_id, subj.feminine, True, subj.adjectives, subj.role, subj.use_irregular)
+        elif step_name == "obj2_adj" and obj2:
+            obj2 = add_adj(obj2, "red")
+    return SentenceFeatures(subj, obj1, obj2, verb_id, tense, use_irregular_verb)
+
+
 def generate_item(
     spec: LanguageSpec,
     concepts: List[str],
@@ -540,8 +598,10 @@ def generate_item(
     rng: random.Random,
     difficulty: str = "mid",
     sf_override: Optional[SentenceFeatures] = None,
+    min_feature_load: int = 1,
 ) -> Question:
     sf = sf_override or _base_features(spec, rng, difficulty)
+    sf = _boost_feature_load(sf, min_feature_load)
     correct_text = realize_sentence(spec, sf)
     gloss = english_gloss(sf)
     distractors = build_distractors(spec, sf, rng)
@@ -632,7 +692,14 @@ def generate_test(
 
             sf_override, delta = _planned_features(spec, rng, difficulty_tag, remaining.copy(), current_number)
             q = generate_item(
-                spec, section.focus_concepts, section.id, item_type, rng, difficulty=difficulty_tag, sf_override=sf_override
+                spec,
+                section.focus_concepts,
+                section.id,
+                item_type,
+                rng,
+                difficulty=difficulty_tag,
+                sf_override=sf_override,
+                min_feature_load=cfg.get("min_feature_load", 1),
             )
             # enforce invariants: one correct, 4 unique options
             if not question_valid(q, spec):