Skip to content

geno_lewm.planning

planning

Latent-planning primitives for GenoLeWM.

This package ships the pure-Python action-cost library and factored action sampler from RFC-0008. The evaluator-first CEM core lives in geno_lewm.planning.cem; predictor-backed planning and the CLI remain separate integration work.

DEFAULT_TYPE_COSTS module-attribute

DEFAULT_TYPE_COSTS: Mapping[EditType, float] = MappingProxyType(_DEFAULT_TYPE_COSTS)

Default non-negative type costs for weighted_type_cost.

The defaults keep SNVs cheapest, assign a higher penalty to simple indels and MNVs, and make mixed indels the most expensive v1 edit class. Structural variants are outside the v1 planner surface.

DEFAULT_ACTION_TYPE_WEIGHTS module-attribute

DEFAULT_ACTION_TYPE_WEIGHTS: tuple[EditTypeWeight, ...] = DEFAULT_EDIT_TYPE_WEIGHTS

Initial planner proposal over {SNV, INS, DEL, MNV, INDEL}.

ActionSampler

ActionSampler(window: str, *, seed: int | None = None, rng: Random | None = None, edge_margin: int = DEFAULT_EDGE_MARGIN, type_weights: Sequence[EditTypeWeight] = DEFAULT_ACTION_TYPE_WEIGHTS, length_dist: Mapping[int, float] | Sequence[float] | None = None, position_bin_bp: int = 8, position_weights: Mapping[int, float] | Sequence[float] | None = None, max_attempts: int = 256)

Sample valid RelEdit actions from a factored proposal.

The proposal follows RFC-0008 §3.3: edit type is categorical, position is uniform or binned-categorical over the window interior, and bases are sampled conditionally on the chosen edit type.

Source code in geno_lewm/planning/sampling.py
def __init__(
    self,
    window: str,
    *,
    seed: int | None = None,
    rng: random.Random | None = None,
    edge_margin: int = DEFAULT_EDGE_MARGIN,
    type_weights: Sequence[EditTypeWeight] = DEFAULT_ACTION_TYPE_WEIGHTS,
    length_dist: Mapping[int, float] | Sequence[float] | None = None,
    position_bin_bp: int = 8,
    position_weights: Mapping[int, float] | Sequence[float] | None = None,
    max_attempts: int = 256,
) -> None:
    if rng is not None and seed is not None:
        raise InputError("pass either rng or seed, not both")
    _validate_window(window, edge_margin)
    _require_positive_int("position_bin_bp", position_bin_bp)
    _require_positive_int("max_attempts", max_attempts)

    self.window = window
    self.edge_margin = edge_margin
    self.position_bin_bp = position_bin_bp
    self.max_attempts = max_attempts
    self._rng = rng if rng is not None else random.Random(seed)
    self._type_weights = _normalize_type_weights(type_weights)
    self._length_dist = length_dist
    self._position_weights = _normalize_position_weights(position_weights)

sample_edit

sample_edit(edit_type: EditType | int | None = None) -> RelEdit

Sample one shape-consistent edit inside the configured window.

Source code in geno_lewm/planning/sampling.py
def sample_edit(self, edit_type: EditType | int | None = None) -> RelEdit:
    """Sample one shape-consistent edit inside the configured window."""
    normalized_type = self._sample_type() if edit_type is None else _normalize_type(edit_type)

    for _attempt in range(self.max_attempts):
        edit = self._sample_for_type(normalized_type)
        if edit is not None:
            return edit

    raise InputError(
        "could not sample a valid edit from the window interior",
        details={
            "window_len": len(self.window),
            "edge_margin": self.edge_margin,
            "edit_type": int(normalized_type),
            "max_attempts": self.max_attempts,
        },
        remediation="provide a longer ACGT window or reduce edge_margin",
    )

sample_sequence

sample_sequence(horizon: int) -> tuple[RelEdit, ...]

Sample a candidate edit sequence of length horizon.

Source code in geno_lewm/planning/sampling.py
def sample_sequence(self, horizon: int) -> tuple[RelEdit, ...]:
    """Sample a candidate edit sequence of length ``horizon``."""
    _require_nonnegative_int("horizon", horizon)
    return tuple(self.sample_edit() for _ in range(horizon))

sample_sequences

sample_sequences(n: int, horizon: int) -> tuple[tuple[RelEdit, ...], ...]

Sample n candidate edit sequences.

Source code in geno_lewm/planning/sampling.py
def sample_sequences(self, n: int, horizon: int) -> tuple[tuple[RelEdit, ...], ...]:
    """Sample ``n`` candidate edit sequences."""
    _require_nonnegative_int("n", n)
    _require_nonnegative_int("horizon", horizon)
    return tuple(self.sample_sequence(horizon) for _ in range(n))

bp_cost

bp_cost(edits: Sequence[RelEdit]) -> float

Return the total base-pair cost for edits.

Source code in geno_lewm/planning/costs.py
def bp_cost(edits: Sequence[RelEdit]) -> float:
    """Return the total base-pair cost for ``edits``."""
    return sum(edit_bp_cost(edit) for edit in edits)

count_cost

count_cost(edits: Sequence[RelEdit]) -> float

Return the number of edits in a candidate sequence.

Source code in geno_lewm/planning/costs.py
def count_cost(edits: Sequence[RelEdit]) -> float:
    """Return the number of edits in a candidate sequence."""
    return float(len(edits))

custom_cost

custom_cost(edits: Sequence[RelEdit], cost_fn: Callable[[Sequence[RelEdit]], float]) -> float

Evaluate and validate a user-provided cost function.

Source code in geno_lewm/planning/costs.py
def custom_cost(edits: Sequence[RelEdit], cost_fn: Callable[[Sequence[RelEdit]], float]) -> float:
    """Evaluate and validate a user-provided cost function."""
    value = cost_fn(tuple(edits))
    return _validate_cost_value("custom cost", value)

edit_bp_cost

edit_bp_cost(edit: RelEdit) -> float

Return the base-pair cost contribution of a single edit.

SNVs cost one base. Insertions and deletions cost their event length excluding the VCF anchor base. MNVs cost their substituted span. Mixed indels cost the larger touched span because both the deleted and inserted sequence are material to the action.

Source code in geno_lewm/planning/costs.py
def edit_bp_cost(edit: RelEdit) -> float:
    """Return the base-pair cost contribution of a single edit.

    SNVs cost one base. Insertions and deletions cost their event
    length excluding the VCF anchor base. MNVs cost their substituted
    span. Mixed indels cost the larger touched span because both the
    deleted and inserted sequence are material to the action.
    """
    _require_shape_consistent(edit)
    ref_len = len(edit.ref_bases)
    alt_len = len(edit.alt_bases)

    if edit.edit_type is EditType.SNV:
        return 1.0
    if edit.edit_type is EditType.INS:
        return float(alt_len - ref_len)
    if edit.edit_type is EditType.DEL:
        return float(ref_len - alt_len)
    if edit.edit_type is EditType.MNV:
        return float(ref_len)
    if edit.edit_type is EditType.INDEL:
        return float(max(ref_len, alt_len))

    raise InputError(
        "SV edits are outside the v1 planning cost surface",
        details={"edit_type": int(edit.edit_type)},
    )

weighted_type_cost

weighted_type_cost(edits: Sequence[RelEdit], weights: Mapping[EditType, float] = DEFAULT_TYPE_COSTS) -> float

Return the sum of per-edit-type costs for edits.

Source code in geno_lewm/planning/costs.py
def weighted_type_cost(
    edits: Sequence[RelEdit],
    weights: Mapping[EditType, float] = DEFAULT_TYPE_COSTS,
) -> float:
    """Return the sum of per-edit-type costs for ``edits``."""
    normalized = _validate_type_costs(weights)
    total = 0.0
    for edit in edits:
        _require_shape_consistent(edit)
        try:
            total += normalized[edit.edit_type]
        except KeyError as exc:
            raise InputError(
                "missing cost weight for edit type",
                details={"edit_type": int(edit.edit_type)},
                remediation="provide a non-negative finite cost for every sampled edit type",
            ) from exc
    return total