Config loader, validator, and resolved-config writer (RFC-0017).
This module is the bridge between the YAML files under
:data:DEFAULTS_DIR and the dataclass schema in
:mod:geno_lewm.config.schema. Three responsibilities:
- :func:
load_config — read a YAML payload, type-check it, reject
unknown top-level keys (RFC-0017 §3.3), and return a frozen
:class:GenoLeWMConfig.
- :func:
write_resolved_config — emit a config object as canonical
YAML (sorted keys, no anchors) so ${run_id}/config.resolved.yaml
is reproducible (RFC-0017 §3.5).
- :func:
describe_field — schema introspection used by the
--explain CLI flag (PR #29).
We deliberately do not load Hydra at runtime. Hydra-style
composition (defaults: blocks, multi-run sweeps) lands with
PR #29 + a future loader change; for Phase 1 a single YAML file plus
optional CLI --set key=value overrides covers the scoring /
training / eval / planning paths.
load_config
load_config(source: Path | str | Mapping[str, Any]) -> GenoLeWMConfig
Load + validate a config payload; return a frozen :class:GenoLeWMConfig.
source may be:
- A :class:
Path (or str) — read the file as YAML.
- A :class:
Mapping — treat it as the already-parsed payload (used
by --set override merging in PR #29 and by the unit tests).
Validation:
- Unknown top-level keys → :class:
UnknownTopLevelKeyError.
- Missing required subsystem keys → :class:
MissingConfigError.
- Wrong value type on any field → :class:
ConfigError.
Source code in geno_lewm/config/loader.py
| def load_config(source: Path | str | Mapping[str, Any]) -> GenoLeWMConfig:
"""Load + validate a config payload; return a frozen :class:`GenoLeWMConfig`.
``source`` may be:
* A :class:`Path` (or ``str``) — read the file as YAML.
* A :class:`Mapping` — treat it as the already-parsed payload (used
by ``--set`` override merging in PR #29 and by the unit tests).
Validation:
* Unknown top-level keys → :class:`UnknownTopLevelKeyError`.
* Missing required subsystem keys → :class:`MissingConfigError`.
* Wrong value type on any field → :class:`ConfigError`.
"""
if isinstance(source, Mapping):
payload: Any = source
elif isinstance(source, str | Path):
payload = _resolve_payload(source)
else:
raise InputError(
"config payload must be a mapping at the top level",
details={"got": type(source).__name__},
)
if not isinstance(payload, Mapping):
raise InputError(
"config payload must be a mapping at the top level",
details={"got": type(payload).__name__},
)
return _build_top_level(dict(payload))
|
load_default
load_default(name: str) -> GenoLeWMConfig
Shorthand for load_config(DEFAULTS_DIR / f"{name}.yaml").
Accepts the documented command names (train / score /
eval / plan). Raises :class:MissingConfigError if the
YAML template is missing.
Source code in geno_lewm/config/loader.py
| def load_default(name: str) -> GenoLeWMConfig:
"""Shorthand for ``load_config(DEFAULTS_DIR / f"{name}.yaml")``.
Accepts the documented command names (``train`` / ``score`` /
``eval`` / ``plan``). Raises :class:`MissingConfigError` if the
YAML template is missing.
"""
target = DEFAULTS_DIR / f"{name}.yaml"
if not target.is_file():
raise MissingConfigError(
f"no default config for command {name!r}",
details={"path": str(target), "known": sorted(_known_defaults())},
)
return load_config(target)
|
config_to_dict
config_to_dict(cfg: GenoLeWMConfig) -> dict[str, Any]
Return a plain dict view of cfg for serialization.
Source code in geno_lewm/config/loader.py
| def config_to_dict(cfg: GenoLeWMConfig) -> dict[str, Any]:
"""Return a plain dict view of ``cfg`` for serialization."""
result = _asdict_with_tuples(cfg)
assert isinstance(result, dict)
return result
|
write_resolved_config
write_resolved_config(cfg: GenoLeWMConfig, path: Path | str) -> Path
Write cfg as canonical YAML to path; return the absolute path.
Canonical = sort_keys=True, default_flow_style=False, no
anchors. The result hashes byte-stably so the manifest's
training.config_file hash matches between machines.
Source code in geno_lewm/config/loader.py
| def write_resolved_config(cfg: GenoLeWMConfig, path: Path | str) -> Path:
"""Write ``cfg`` as canonical YAML to ``path``; return the absolute path.
Canonical = ``sort_keys=True``, ``default_flow_style=False``, no
anchors. The result hashes byte-stably so the manifest's
``training.config_file`` hash matches between machines.
"""
target = Path(path)
target.parent.mkdir(parents=True, exist_ok=True)
text = yaml.safe_dump(
config_to_dict(cfg),
sort_keys=True,
default_flow_style=False,
allow_unicode=True,
)
target.write_text(text, encoding="utf-8")
return target.resolve()
|
describe_field
describe_field(dotted_key: str) -> dict[str, Any]
Return {type, default, doc} for dotted_key (e.g. encoder.dtype).
Raises :class:MissingConfigError if the key is not in the schema.
Source code in geno_lewm/config/loader.py
| def describe_field(dotted_key: str) -> dict[str, Any]:
"""Return ``{type, default, doc}`` for ``dotted_key`` (e.g. ``encoder.dtype``).
Raises :class:`MissingConfigError` if the key is not in the schema.
"""
parts = dotted_key.split(".")
if not parts or not parts[0]:
raise InputError("--explain key must not be empty", details={"key": dotted_key})
cls: type = GenoLeWMConfig
field_obj: dataclasses.Field[Any] | None = None
parent_doc = cls.__doc__ or ""
for i, part in enumerate(parts):
if not is_dataclass(cls):
raise MissingConfigError(
"--explain: path leaves the schema before resolving",
details={"key": dotted_key, "where": ".".join(parts[: i + 1])},
)
try:
field_obj = next(f for f in fields(cls) if f.name == part)
except StopIteration as exc:
known = [f.name for f in fields(cls)]
raise MissingConfigError(
"--explain: key not found in schema",
details={"key": dotted_key, "where": part, "known": sorted(known)},
) from exc
next_type = get_type_hints(cls).get(part)
if is_dataclass(field_obj.type) and isinstance(field_obj.type, type):
cls = field_obj.type
parent_doc = cls.__doc__ or ""
continue
if next_type is not None and isinstance(next_type, type) and is_dataclass(next_type):
cls = next_type
parent_doc = cls.__doc__ or ""
continue
# Leaf field reached.
return _format_field_info(field_obj, parent_doc=parent_doc, type_hint=next_type)
if field_obj is None: # pragma: no cover - guarded above
raise InputError("--explain key did not resolve to a field", details={"key": dotted_key})
return _format_field_info(field_obj, parent_doc=parent_doc, type_hint=None)
|
iter_subsystem_names
iter_subsystem_names() -> Iterable[str]
Yield the subsystem keys recognised by the loader.
Source code in geno_lewm/config/loader.py
| def iter_subsystem_names() -> Iterable[str]:
"""Yield the subsystem keys recognised by the loader."""
return (name for name, _ in _SUBSYSTEM_MAP)
|