git » alan.git » commit 51701ce

Add configurable generation params and expose in meta/booklet

author Alan Dipert
2025-12-04 04:05:10 UTC
committer Alan Dipert
2025-12-04 04:05:10 UTC
parent dfe0517d18f0c85fce9370e8bb0bf03c065a255d

Add configurable generation params and expose in meta/booklet

main.py +8 -0
property_tests.py +25 -4
render_text.py +6 -0
test_generator.py +8 -1

diff --git a/main.py b/main.py
index fb16b0d..87839f9 100644
--- a/main.py
+++ b/main.py
@@ -15,6 +15,14 @@ def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(description="Generate an artificial language test JSON.")
     parser.add_argument("--seed", type=int, help="Random seed for reproducibility.")
     parser.add_argument("--out", dest="out_path", default="generated_test.json", help="Output path.")
+    parser.add_argument("--min-irregular", type=int, default=6, help="Minimum irregular uses.")
+    parser.add_argument(
+        "--min-irregular-contrast", type=int, default=4, help="Minimum irregular contrast items (correct vs distractor)."
+    )
+    parser.add_argument("--min-ditransitive", type=int, default=8, help="Minimum ditransitive items.")
+    parser.add_argument("--min-plural", type=int, default=12, help="Minimum plural-bearing items.")
+    parser.add_argument("--min-adjective", type=int, default=12, help="Minimum adjective-bearing items.")
+    parser.add_argument("--min-fem-plural", type=int, default=6, help="Minimum feminine-plural items.")
     return parser.parse_args()
 
 
diff --git a/property_tests.py b/property_tests.py
index b97fcad..2a6ae3b 100644
--- a/property_tests.py
+++ b/property_tests.py
@@ -18,7 +18,7 @@ from semantic import to_meaning, meanings_equal, semantic_distance
 from language_coherence import check_coherence
 from meta_schema import validate_schema
 
-# Property thresholds
+# Property thresholds (can be overridden via params)
 DIST_MIN = 1
 DIST_MAX = 1
 MIN_IRREG_USE = 6
@@ -386,9 +386,17 @@ def main() -> None:
     sys.exit(0 if ok else 1)
 
 
-def validate_data(data: Dict, spec=None) -> bool:
+def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None) -> bool:
     if spec is None:
         spec = generate_language_instance()
+    o = overrides or {}
+    min_irregular = o.get("min_irregular", MIN_IRREG_USE)
+    min_irregular_contrast = o.get("min_irregular_contrast", MIN_IRREG_CONTRAST)
+    min_irregular_distractor = o.get("min_irregular_distractor", MIN_IRREG_DISTRACTOR)
+    min_ditransitive = o.get("min_ditransitive", MIN_DITRANSITIVE)
+    min_plural = o.get("min_plural", MIN_PLURAL_ITEMS)
+    min_adjective = o.get("min_adjective", MIN_ADJECTIVE_ITEMS)
+    min_fem_plural = o.get("min_fem_plural", MIN_FEM_PLURAL_ITEMS)
     verbs = spec.lexicon["verbs"]
     ok = True
     questions = [q for s in data.get("sections", []) for q in s.get("questions", [])]
@@ -426,7 +434,13 @@ def validate_data(data: Dict, spec=None) -> bool:
             if not check_prefix_and_scope(opt, spec):
                 ok = False
                 print(f"FAIL prefix/scope for Q{q.get('number')} option {opt['label']}")
-    if not check_irregulars(data, spec):
+    if not check_irregulars(
+        data,
+        spec,
+        min_use=min_irregular,
+        min_contrast=min_irregular_contrast,
+        min_distractors=min_irregular_distractor,
+    ):
         ok = False
         print("FAIL irregular coverage (need >=3 letul and >=3 rontmimu in correct answers)")
     if not check_role_number_uniqueness(spec):
@@ -435,7 +449,14 @@ def validate_data(data: Dict, spec=None) -> bool:
     if not check_tense_uniqueness(spec):
         ok = False
         print("FAIL tense uniqueness (present and past forms must differ)")
-    if not check_structural_diversity(data):
+    if not check_structural_diversity(
+        data,
+        min_irregular=min_irregular,
+        min_ditransitive=min_ditransitive,
+        min_plural=min_plural,
+        min_adjective=min_adjective,
+        min_fem_plural=min_fem_plural,
+    ):
         ok = False
         print("FAIL structural diversity quotas")
     if not check_coherence(data):
diff --git a/render_text.py b/render_text.py
index e8b0949..c1851e5 100644
--- a/render_text.py
+++ b/render_text.py
@@ -20,6 +20,12 @@ def render_booklet(data: Dict[str, Any]) -> str:
     if meta.get("instructions"):
         lines.append(meta["instructions"])
         lines.append("")
+    params = meta.get("generation_params")
+    if params:
+        lines.append("Generation parameters:")
+        for k, v in params.items():
+            lines.append(f"- {k}: {v}")
+        lines.append("")
     if meta.get("rules"):
         lines.append("Grammar Cheat Sheet")
         lines.append("-------------------")
diff --git a/test_generator.py b/test_generator.py
index 84f261a..8061d4c 100644
--- a/test_generator.py
+++ b/test_generator.py
@@ -504,7 +504,12 @@ def generate_item(
 
 
 def generate_test(
-    spec: LanguageSpec, blueprint: TestBlueprint, concepts: Dict[str, Concept], rng: random.Random, seed: int | None = None
+    spec: LanguageSpec,
+    blueprint: TestBlueprint,
+    concepts: Dict[str, Concept],
+    rng: random.Random,
+    seed: int | None = None,
+    params: Dict[str, int] | None = None,
 ) -> Dict:
     sections_out = []
     question_counter = 1
@@ -593,6 +598,7 @@ def generate_test(
             }
         )
 
+    meta_params = params or {}
     return {
         "meta": {
             "version": "0.2",
@@ -605,6 +611,7 @@ def generate_test(
                 "the examples. You do not need any linguistics background. For each question, pick "
                 "the best option (A-D). All correct answers keep the order: doer, receiver, verb."
             ),
+            "generation_params": meta_params,
             "rules": [
                 "Word order: DOER RECEIVER VERB (SOV). For 'give': doer, recipient, theme, verb.",
                 "Adjectives follow the noun they describe.",