Skip to content

geno_lewm.planning.sampling

sampling

Factored edit sampler for RFC-0008 latent planning.

DEFAULT_ACTION_TYPE_WEIGHTS module-attribute

DEFAULT_ACTION_TYPE_WEIGHTS: tuple[EditTypeWeight, ...] = DEFAULT_EDIT_TYPE_WEIGHTS

Initial planner proposal over {SNV, INS, DEL, MNV, INDEL}.

ActionSampler

ActionSampler(window: str, *, seed: int | None = None, rng: Random | None = None, edge_margin: int = DEFAULT_EDGE_MARGIN, type_weights: Sequence[EditTypeWeight] = DEFAULT_ACTION_TYPE_WEIGHTS, length_dist: Mapping[int, float] | Sequence[float] | None = None, position_bin_bp: int = 8, position_weights: Mapping[int, float] | Sequence[float] | None = None, max_attempts: int = 256)

Sample valid RelEdit actions from a factored proposal.

The proposal follows RFC-0008 §3.3: edit type is categorical, position is uniform or binned-categorical over the window interior, and bases are sampled conditionally on the chosen edit type.

Source code in geno_lewm/planning/sampling.py
def __init__(
    self,
    window: str,
    *,
    seed: int | None = None,
    rng: random.Random | None = None,
    edge_margin: int = DEFAULT_EDGE_MARGIN,
    type_weights: Sequence[EditTypeWeight] = DEFAULT_ACTION_TYPE_WEIGHTS,
    length_dist: Mapping[int, float] | Sequence[float] | None = None,
    position_bin_bp: int = 8,
    position_weights: Mapping[int, float] | Sequence[float] | None = None,
    max_attempts: int = 256,
) -> None:
    if rng is not None and seed is not None:
        raise InputError("pass either rng or seed, not both")
    _validate_window(window, edge_margin)
    _require_positive_int("position_bin_bp", position_bin_bp)
    _require_positive_int("max_attempts", max_attempts)

    self.window = window
    self.edge_margin = edge_margin
    self.position_bin_bp = position_bin_bp
    self.max_attempts = max_attempts
    self._rng = rng if rng is not None else random.Random(seed)
    self._type_weights = _normalize_type_weights(type_weights)
    self._length_dist = length_dist
    self._position_weights = _normalize_position_weights(position_weights)

sample_edit

sample_edit(edit_type: EditType | int | None = None) -> RelEdit

Sample one shape-consistent edit inside the configured window.

Source code in geno_lewm/planning/sampling.py
def sample_edit(self, edit_type: EditType | int | None = None) -> RelEdit:
    """Sample one shape-consistent edit inside the configured window."""
    normalized_type = self._sample_type() if edit_type is None else _normalize_type(edit_type)

    for _attempt in range(self.max_attempts):
        edit = self._sample_for_type(normalized_type)
        if edit is not None:
            return edit

    raise InputError(
        "could not sample a valid edit from the window interior",
        details={
            "window_len": len(self.window),
            "edge_margin": self.edge_margin,
            "edit_type": int(normalized_type),
            "max_attempts": self.max_attempts,
        },
        remediation="provide a longer ACGT window or reduce edge_margin",
    )

sample_sequence

sample_sequence(horizon: int) -> tuple[RelEdit, ...]

Sample a candidate edit sequence of length horizon.

Source code in geno_lewm/planning/sampling.py
def sample_sequence(self, horizon: int) -> tuple[RelEdit, ...]:
    """Sample a candidate edit sequence of length ``horizon``."""
    _require_nonnegative_int("horizon", horizon)
    return tuple(self.sample_edit() for _ in range(horizon))

sample_sequences

sample_sequences(n: int, horizon: int) -> tuple[tuple[RelEdit, ...], ...]

Sample n candidate edit sequences.

Source code in geno_lewm/planning/sampling.py
def sample_sequences(self, n: int, horizon: int) -> tuple[tuple[RelEdit, ...], ...]:
    """Sample ``n`` candidate edit sequences."""
    _require_nonnegative_int("n", n)
    _require_nonnegative_int("horizon", horizon)
    return tuple(self.sample_sequence(horizon) for _ in range(n))