git » alan.git » commit ae04aa7

Update README with hardness examples; silence property test output when desired

author Alan Dipert
2025-12-04 04:52:06 UTC
committer Alan Dipert
2025-12-04 04:52:06 UTC
parent acb004d6305c09c647eb1418e27cde3332007394

Update README with hardness examples; silence property test output when desired

README.md +30 -0
property_tests.py +39 -23

diff --git a/README.md b/README.md
index a8bae54..24ac745 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,36 @@ cat test_booklet.txt   # view the booklet
 cat answer_key.txt     # view the key
 ```
 
+## Generate Different Hardness Levels
+- **Standard (balanced):**
+  ```bash
+  python3 main.py --seed 424242 --out generated_test.json
+  python3 render_text.py --in generated_test.json --test-out test_booklet.txt --key-out answer_key.txt
+  ```
+- **Hard (recommended for programmers):**
+  ```bash
+  python3 main.py \
+    --seed 424242 \
+    --hardness-multiplier 2.0 \
+    --min-feature-load 5 \
+    --min-irregular 12 --min-irregular-contrast 8 \
+    --min-ditransitive 12 --min-plural 16 --min-adjective 16 --min-fem-plural 10 \
+    --out generated_test.json
+  python3 render_text.py --in generated_test.json --test-out test_booklet.txt --key-out answer_key.txt
+  ```
+- **Very Hard (use cautiously; may need retries):**
+  ```bash
+  python3 main.py \
+    --seed 424242 \
+    --hardness-multiplier 2.5 \
+    --min-feature-load 6 \
+    --min-irregular 14 --min-irregular-contrast 10 \
+    --min-ditransitive 14 --min-plural 18 --min-adjective 18 --min-fem-plural 12 \
+    --out generated_test.json
+  python3 render_text.py --in generated_test.json --test-out test_booklet.txt --key-out answer_key.txt
+  ```
+The chosen parameters are recorded in `generation_params` inside the JSON and printed in the booklet for reproducibility.
+
 ## Administering ALAN
 1. **Prepare materials:** Run `make run` to produce `test_booklet.txt` and `answer_key.txt`. Print or distribute the booklet only.
 2. **Time:** 25–30 minutes is typical for 32 items; you can standardize at 30 minutes for comparability.
diff --git a/property_tests.py b/property_tests.py
index 2a6ae3b..96747f5 100644
--- a/property_tests.py
+++ b/property_tests.py
@@ -386,7 +386,7 @@ def main() -> None:
     sys.exit(0 if ok else 1)
 
 
-def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None) -> bool:
+def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None, quiet: bool = False) -> bool:
     if spec is None:
         spec = generate_language_instance()
     o = overrides or {}
@@ -403,37 +403,47 @@ def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None
     for q in questions:
         if not one_correct(q):
             ok = False
-            print(f"FAIL one_correct for Q{q.get('number')}")
+            if not quiet:
+                print(f"FAIL one_correct for Q{q.get('number')}")
         if not unique_options(q):
             ok = False
-            print(f"FAIL unique_options for Q{q.get('number')}")
+            if not quiet:
+                print(f"FAIL unique_options for Q{q.get('number')}")
         for opt in q.get("options", []):
             if not verb_last(opt["text"], verbs):
                 ok = False
-                print(f"FAIL verb_last for Q{q.get('number')} option {opt['label']}")
+                if not quiet:
+                    print(f"FAIL verb_last for Q{q.get('number')} option {opt['label']}")
             if not option_matches_features(opt, spec):
                 ok = False
-                print(f"FAIL feature match for Q{q.get('number')} option {opt['label']}")
+                if not quiet:
+                    print(f"FAIL feature match for Q{q.get('number')} option {opt['label']}")
             if not check_adjective_position(opt["text"]):
                 ok = False
-                print(f"FAIL adjective position for Q{q.get('number')} option {opt['label']}")
-        if not check_na_scope(opt["text"]):
-            ok = False
-            print(f"FAIL na-scope for Q{q.get('number')} option {opt['label']}")
-        if not is_grammatical(opt, spec):
+                if not quiet:
+                    print(f"FAIL adjective position for Q{q.get('number')} option {opt['label']}")
+            if not check_na_scope(opt["text"]):
+                ok = False
+                if not quiet:
+                    print(f"FAIL na-scope for Q{q.get('number')} option {opt['label']}")
+            if not is_grammatical(opt, spec):
+                ok = False
+                if not quiet:
+                    print(f"FAIL grammar check for Q{q.get('number')} option {opt['label']}")
+        if not check_semantics(q, spec):
             ok = False
-            print(f"FAIL grammar check for Q{q.get('number')} option {opt['label']}")
-    if not check_semantics(q, spec):
-        ok = False
-        print(f"FAIL semantic uniqueness/distances for Q{q.get('number')}")
+            if not quiet:
+                print(f"FAIL semantic uniqueness/distances for Q{q.get('number')}")
     for q in questions:
         if not meanings_unique_in_options(q, spec):
             ok = False
-            print(f"FAIL meanings unique across options for Q{q.get('number')}")
+            if not quiet:
+                print(f"FAIL meanings unique across options for Q{q.get('number')}")
         for opt in q.get("options", []):
             if not check_prefix_and_scope(opt, spec):
                 ok = False
-                print(f"FAIL prefix/scope for Q{q.get('number')} option {opt['label']}")
+                if not quiet:
+                    print(f"FAIL prefix/scope for Q{q.get('number')} option {opt['label']}")
     if not check_irregulars(
         data,
         spec,
@@ -442,13 +452,16 @@ def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None
         min_distractors=min_irregular_distractor,
     ):
         ok = False
-        print("FAIL irregular coverage (need >=3 letul and >=3 rontmimu in correct answers)")
+        if not quiet:
+            print("FAIL irregular coverage (need >=3 letul and >=3 rontmimu in correct answers)")
     if not check_role_number_uniqueness(spec):
         ok = False
-        print("FAIL role/number uniqueness (singular/plural/fem-plural must be distinct and na-prefixed)")
+        if not quiet:
+            print("FAIL role/number uniqueness (singular/plural/fem-plural must be distinct and na-prefixed)")
     if not check_tense_uniqueness(spec):
         ok = False
-        print("FAIL tense uniqueness (present and past forms must differ)")
+        if not quiet:
+            print("FAIL tense uniqueness (present and past forms must differ)")
     if not check_structural_diversity(
         data,
         min_irregular=min_irregular,
@@ -458,14 +471,17 @@ def validate_data(data: Dict, spec=None, overrides: Dict[str, int] | None = None
         min_fem_plural=min_fem_plural,
     ):
         ok = False
-        print("FAIL structural diversity quotas")
+        if not quiet:
+            print("FAIL structural diversity quotas")
     if not check_coherence(data):
         ok = False
-        print("FAIL language coherence checks")
+        if not quiet:
+            print("FAIL language coherence checks")
     if not validate_schema(data):
         ok = False
-        print("FAIL JSON schema validation")
-    if ok:
+        if not quiet:
+            print("FAIL JSON schema validation")
+    if ok and not quiet:
         print("All property tests passed.")
     return ok