git » alan.git » master » tree

[master] / test_blueprint.py

"""Blueprints for assembling a DLAB-style test."""
from __future__ import annotations

from dataclasses import dataclass
from typing import List, Tuple, Dict


@dataclass
class Concept:
    id: str
    name: str
    description_en: str
    prerequisites: List[str]
    rule_refs: List[str]


def get_default_concepts() -> Dict[str, Concept]:
    """Return the stock concept inventory."""
    concepts = [
        Concept(
            id="S_ORDER",
            name="Sentence Word Order",
            description_en="Sentences put the action and the people/things in a specific order.",
            prerequisites=[],
            rule_refs=["linearization.s_order"],
        ),
        Concept(
            id="NP_ORDER",
            name="Noun Phrase Word Order",
            description_en="Describing words go either before or after the thing they describe.",
            prerequisites=[],
            rule_refs=["linearization.np_order"],
        ),
        Concept(
            id="NOUN_NUMBER_MARKING",
            name="Noun Number",
            description_en="To show more than one thing, an extra sound is added; one thing may be plain.",
            prerequisites=["NP_ORDER"],
            rule_refs=["noun_inflection.number"],
        ),
        Concept(
            id="NOUN_GENDER_MARKING",
            name="Noun Gender",
            description_en="Words for people can carry a marker for type (like masculine/feminine) with a small sound.",
            prerequisites=["NP_ORDER"],
            rule_refs=["noun_inflection.gender"],
        ),
        Concept(
            id="NOUN_CASE_MARKING",
            name="Case Marking",
            description_en="The word for the doer and the word for the receiver use different helper sounds.",
            prerequisites=["NP_ORDER", "S_ORDER"],
            rule_refs=["noun_inflection.case"],
        ),
        Concept(
            id="VERB_TENSE_MARKING",
            name="Verb Tense",
            description_en="Action words change slightly for now vs the past.",
            prerequisites=["S_ORDER"],
            rule_refs=["verb_inflection.tense"],
        ),
    ]
    return {c.id: c for c in concepts}


# Item types.
EXEMPLAR_COMPREHENSION = "EXEMPLAR_COMPREHENSION"
TRANSLATE_TO_LANG = "TRANSLATE_TO_LANG"
TRANSLATE_FROM_LANG = "TRANSLATE_FROM_LANG"  # kept for future use; not used in default blueprint
RULE_APPLICATION = "RULE_APPLICATION"
STACKED_RULES = "STACKED_RULES"


@dataclass
class SectionBlueprint:
    id: str
    introduce_concepts: List[str]
    focus_concepts: List[str]
    item_types: List[str]
    num_items: int


@dataclass
class TestBlueprint:
    sections: List[SectionBlueprint]
    target_difficulty_band: Tuple[float, float]


def get_default_blueprint() -> TestBlueprint:
    """Create a simple multi-section blueprint totaling ~30 questions."""
    sections = [
        SectionBlueprint(
            id="S1",
            introduce_concepts=["S_ORDER", "NP_ORDER"],
            focus_concepts=["S_ORDER", "NP_ORDER"],
            item_types=[EXEMPLAR_COMPREHENSION, TRANSLATE_TO_LANG],
            num_items=8,
        ),
        SectionBlueprint(
            id="S2",
            introduce_concepts=["NOUN_NUMBER_MARKING", "NOUN_GENDER_MARKING"],
            focus_concepts=[
                "S_ORDER",
                "NP_ORDER",
                "NOUN_NUMBER_MARKING",
                "NOUN_GENDER_MARKING",
            ],
            item_types=[TRANSLATE_TO_LANG, RULE_APPLICATION],
            num_items=10,
        ),
        SectionBlueprint(
            id="S3",
            introduce_concepts=["NOUN_CASE_MARKING", "VERB_TENSE_MARKING"],
            focus_concepts=[
                "S_ORDER",
                "NP_ORDER",
                "NOUN_NUMBER_MARKING",
                "NOUN_GENDER_MARKING",
                "NOUN_CASE_MARKING",
                "VERB_TENSE_MARKING",
            ],
            item_types=[RULE_APPLICATION, TRANSLATE_TO_LANG],
            num_items=8,
        ),
        SectionBlueprint(
            id="S4",
            introduce_concepts=[],
            focus_concepts=[
                "S_ORDER",
                "NP_ORDER",
                "NOUN_NUMBER_MARKING",
                "NOUN_GENDER_MARKING",
                "NOUN_CASE_MARKING",
                "VERB_TENSE_MARKING",
            ],
            item_types=[STACKED_RULES],
            num_items=6,
        ),
    ]
    return TestBlueprint(sections=sections, target_difficulty_band=(0.3, 0.9))