Skip to content

FrozenFrame

freezeframe.frame.FrozenFrame

FrozenFrame(batch, *, validate=True)

Immutable, schema-typed DataFrame backed by a pyarrow.RecordBatch.

Subclass and annotate with native Python types to declare a schema::

class UserMetrics(FrozenFrame):
    user_id: int
    name:    str
    score:   float
    active:  bool | None

See the module docstring for construction and usage examples.

Construct directly from a pa.RecordBatch.

Prefer from_dict or from_arrow for everyday use — they provide clearer error messages when data doesn't match the schema.

Parameters:

Name Type Description Default
batch RecordBatch

The Arrow RecordBatch to wrap.

required
validate bool

When True (default), validate the batch against the declared schema before storing. Pass False only when the data is already known to be valid, e.g. inside transform operations.

True
Source code in src\freezeframe\frame.py
def __init__(self, batch: pa.RecordBatch, *, validate: bool = True) -> None:
    """Construct directly from a ``pa.RecordBatch``.

    Prefer ``from_dict`` or ``from_arrow`` for everyday use — they
    provide clearer error messages when data doesn't match the schema.

    Parameters
    ----------
    batch:
        The Arrow RecordBatch to wrap.
    validate:
        When ``True`` (default), validate the batch against the declared
        schema before storing.  Pass ``False`` only when the data is
        already known to be valid, e.g. inside transform operations.
    """
    if validate:
        _validate_batch(type(self).__schema__, batch)
    # Use object.__setattr__ to bypass our own mutation guard.
    object.__setattr__(self, "_batch", batch)

__getattr__

__getattr__(name)

Attribute-style column access: df.score.

Only called when normal attribute lookup fails, so internal attributes (_batch, __schema__, …) are unaffected.

Source code in src\freezeframe\frame.py
def __getattr__(self, name: str) -> FrozenSeries:
    """Attribute-style column access: ``df.score``.

    Only called when normal attribute lookup fails, so internal
    attributes (``_batch``, ``__schema__``, …) are unaffected.
    """
    # Guard against calls before _batch is initialised (e.g. during
    # unpickling or repr of a partially-constructed object).
    if "_batch" not in self.__dict__:
        raise AttributeError(name)
    schema = type(self).__schema__
    if name in schema.names:
        return FrozenSeries(self._batch.column(name))
    raise AttributeError(
        f"'{type(self).__name__}' has no attribute '{name}'. "
        f"Available columns: {schema.names}"
    )

__getitem__

__getitem__(key)

Return the column key as a FrozenSeries.

Source code in src\freezeframe\frame.py
def __getitem__(self, key: str) -> FrozenSeries:
    """Return the column ``key`` as a ``FrozenSeries``."""
    schema = type(self).__schema__
    if key not in schema.names:
        raise KeyError(
            f"Column '{key}' not found. Available columns: {schema.names}"
        )
    return FrozenSeries(self._batch.column(key))

__hash__

__hash__()

Stable hash based on the Arrow IPC serialisation of the batch.

Computed once on first call and cached internally.

Source code in src\freezeframe\frame.py
def __hash__(self) -> int:
    """Stable hash based on the Arrow IPC serialisation of the batch.

    Computed once on first call and cached internally.
    """
    try:
        return object.__getattribute__(self, "_hash_cache")
    except AttributeError:
        buf = io.BytesIO()
        with pa.ipc.new_stream(buf, self._batch.schema) as writer:
            writer.write_batch(self._batch)
        digest = hashlib.sha256(buf.getvalue()).digest()
        h = int.from_bytes(digest[:8], "big")
        object.__setattr__(self, "_hash_cache", h)
        return h

__iter__

__iter__()

Iterate row-wise, yielding each row as a plain Python dict.

Source code in src\freezeframe\frame.py
def __iter__(self) -> typing.Iterator[dict[str, Any]]:
    """Iterate row-wise, yielding each row as a plain Python dict."""
    batch = self._batch
    names = type(self).__schema__.names
    for i in range(len(batch)):
        yield {name: batch.column(name)[i].as_py() for name in names}

__len__

__len__()

Return the number of rows.

Source code in src\freezeframe\frame.py
def __len__(self) -> int:
    """Return the number of rows."""
    return len(self._batch)

from_arrow classmethod

from_arrow(batch, *, validate=True)

Construct from an existing pa.RecordBatch.

Parameters:

Name Type Description Default
batch RecordBatch

An Arrow RecordBatch whose schema must match the declared schema.

required
validate bool

Run schema validation. Defaults to True.

True

Returns:

Type Description
Self

A new instance of the concrete subclass.

Raises:

Type Description
SchemaValidationError

If the batch does not conform to the declared schema.

Source code in src\freezeframe\frame.py
@classmethod
def from_arrow(
    cls,
    batch: pa.RecordBatch,
    *,
    validate: bool = True,
) -> Self:
    """Construct from an existing ``pa.RecordBatch``.

    Parameters
    ----------
    batch:
        An Arrow RecordBatch whose schema must match the declared schema.
    validate:
        Run schema validation.  Defaults to ``True``.

    Returns
    -------
    Self
        A new instance of the concrete subclass.

    Raises
    ------
    SchemaValidationError
        If the batch does not conform to the declared schema.
    """
    if validate:
        _validate_batch(cls.__schema__, batch)
    return cls._from_batch(batch)

from_dict classmethod

from_dict(data, *, validate=True)

Construct from a column-oriented dictionary.

Parameters:

Name Type Description Default
data dict[str, Any]

Mapping of column name to a sequence of values (list, numpy array, or any iterable accepted by pa.array).

required
validate bool

Run schema validation after building the batch. Defaults to True. Pass False only in hot paths where the data is already trusted.

True

Returns:

Type Description
Self

A new instance of the concrete subclass.

Raises:

Type Description
SchemaValidationError

If the data has unexpected or missing columns, type mismatches, or null values in a non-nullable column.

Source code in src\freezeframe\frame.py
@classmethod
def from_dict(
    cls,
    data: dict[str, Any],
    *,
    validate: bool = True,
) -> Self:
    """Construct from a column-oriented dictionary.

    Parameters
    ----------
    data:
        Mapping of column name to a sequence of values (list, numpy
        array, or any iterable accepted by ``pa.array``).
    validate:
        Run schema validation after building the batch.  Defaults to
        ``True``.  Pass ``False`` only in hot paths where the data is
        already trusted.

    Returns
    -------
    Self
        A new instance of the concrete subclass.

    Raises
    ------
    SchemaValidationError
        If the data has unexpected or missing columns, type mismatches,
        or null values in a non-nullable column.
    """
    schema = cls.__schema__

    declared = set(schema.names)
    provided = set(data.keys())

    if extra := provided - declared:
        raise SchemaValidationError(
            f"Unexpected key(s) not declared in schema: {sorted(extra)}. "
            "Remove them or add matching annotations to the FrozenFrame class."
        )
    if missing := declared - provided:
        raise SchemaValidationError(
            f"Missing key(s) required by schema: {sorted(missing)}. "
            "Ensure all declared fields are present in the data."
        )

    arrays: list[pa.Array] = []
    for f in schema:
        try:
            arrays.append(pa.array(data[f.name], type=f.type))
        except (pa.ArrowInvalid, pa.ArrowTypeError) as exc:
            raise SchemaValidationError(
                f"Column '{f.name}': could not convert data to {f.type!r}. {exc}"
            ) from exc

    batch = pa.record_batch(arrays, schema=schema)

    if validate:
        _validate_batch(schema, batch)

    return cls._from_batch(batch)