Skip to content

geno_lewm.training.preflight

preflight

Clean-machine preflight for Carbon-backed GenoLeWM training.

TrainingPreflightRequest dataclass

TrainingPreflightRequest(dataset_dir: Path, carbon_model_dir: Path, training_config: Path, run_dir: Path, allow_fixture_dataset: bool = False, require_native_runtime: bool = True, require_accelerator: bool = True, min_cuda_vram_gb: float = MIN_CUDA_VRAM_GB)

Inputs needed before launching a Carbon-backed training run.

TrainingPreflightIssue dataclass

TrainingPreflightIssue(severity: Severity, code: str, path: str, message: str)

One preflight issue.

DependencyProbe dataclass

DependencyProbe(import_name: str, package: str, required: bool, available: bool, version: str | None, reason: str)

Importability probe for one training dependency.

AcceleratorProbe dataclass

AcceleratorProbe(requested_device: str | None, required: bool, available: bool, device_count: int, device_name: str | None, total_memory_bytes: int | None, min_memory_bytes: int, reason: str, issue_code: str | None = None)

CUDA accelerator readiness probe for Carbon-backed training.

TrainingPreflightReport dataclass

TrainingPreflightReport(schema_version: str, generated_by: str, generated_at: str, ok: bool, dataset_snapshot_id: str | None, training_config: dict[str, object], run_dir: dict[str, object], dataset: dict[str, object], carbon: dict[str, object], accelerator: AcceleratorProbe, dependencies: tuple[DependencyProbe, ...], issues: tuple[TrainingPreflightIssue, ...])

Machine-readable readiness evidence for the real training path.

build_training_preflight_report

build_training_preflight_report(request: TrainingPreflightRequest, *, generated_at: str | None = None, dependency_probe: DependencyProbeFn | None = None, accelerator_probe: AcceleratorProbeFn | None = None) -> TrainingPreflightReport

Build clean-machine readiness evidence for Carbon-backed training.

Source code in geno_lewm/training/preflight.py
def build_training_preflight_report(
    request: TrainingPreflightRequest,
    *,
    generated_at: str | None = None,
    dependency_probe: DependencyProbeFn | None = None,
    accelerator_probe: AcceleratorProbeFn | None = None,
) -> TrainingPreflightReport:
    """Build clean-machine readiness evidence for Carbon-backed training."""
    issues: list[TrainingPreflightIssue] = []
    dependency_probe = dependency_probe or _probe_dependency
    accelerator_probe = accelerator_probe or _probe_accelerator
    dataset = _inspect_dataset(request.dataset_dir, request.allow_fixture_dataset, issues)
    carbon = _inspect_carbon_model_dir(request.carbon_model_dir, issues)
    training_config = _inspect_training_config(request.training_config, issues)
    run_dir = _inspect_run_dir(request.run_dir)
    min_cuda_memory_bytes = _min_cuda_memory_bytes(request.min_cuda_vram_gb)
    accelerator = accelerator_probe(
        _requested_training_device(training_config),
        request.require_accelerator,
        min_cuda_memory_bytes,
    )
    dependencies = tuple(
        dependency_probe(name, request.require_native_runtime) for name in REQUIRED_TRAINING_MODULES
    )
    if accelerator.required and not accelerator.available:
        _issue(
            issues,
            "error",
            accelerator.issue_code or "training.accelerator_unavailable",
            request.training_config,
            accelerator.reason,
        )
    for probe in dependencies:
        if probe.required and not probe.available:
            _issue(
                issues,
                "error",
                "training.dependency_unavailable",
                probe.import_name,
                probe.reason,
            )
    snapshot_id = dataset.get("snapshot_id")
    return TrainingPreflightReport(
        schema_version=SCHEMA_VERSION,
        generated_by=GENERATED_BY,
        generated_at=_utc_now() if generated_at is None else generated_at,
        ok=not any(issue.severity == "error" for issue in issues),
        dataset_snapshot_id=snapshot_id if isinstance(snapshot_id, str) else None,
        training_config=training_config,
        run_dir=run_dir,
        dataset=dataset,
        carbon=carbon,
        accelerator=accelerator,
        dependencies=dependencies,
        issues=tuple(issues),
    )

write_training_preflight_report

write_training_preflight_report(request: TrainingPreflightRequest, output: Path | None = None, *, generated_at: str | None = None, dependency_probe: DependencyProbeFn | None = None, accelerator_probe: AcceleratorProbeFn | None = None) -> TrainingPreflightReport

Write training_preflight_report.json and return the report.

Source code in geno_lewm/training/preflight.py
def write_training_preflight_report(
    request: TrainingPreflightRequest,
    output: Path | None = None,
    *,
    generated_at: str | None = None,
    dependency_probe: DependencyProbeFn | None = None,
    accelerator_probe: AcceleratorProbeFn | None = None,
) -> TrainingPreflightReport:
    """Write ``training_preflight_report.json`` and return the report."""
    report = build_training_preflight_report(
        request,
        generated_at=generated_at,
        dependency_probe=dependency_probe,
        accelerator_probe=accelerator_probe,
    )
    output = request.run_dir / REPORT_NAME if output is None else output
    output.parent.mkdir(parents=True, exist_ok=True)
    output.write_text(
        json.dumps(report.to_dict(), indent=2, sort_keys=True) + "\n", encoding="utf-8"
    )
    return report