FrozenFrame¶

freezeframe.frame.FrozenFrame ¶

FrozenFrame(batch, *, validate=True)

Immutable, schema-typed DataFrame backed by a pyarrow.RecordBatch.

Subclass and annotate with native Python types to declare a schema::

class UserMetrics(FrozenFrame):
    user_id: int
    name:    str
    score:   float
    active:  bool | None

See the module docstring for construction and usage examples.

Construct directly from a pa.RecordBatch.

Prefer from_dict or from_arrow for everyday use — they provide clearer error messages when data doesn't match the schema.

Parameters:

Name	Type	Description	Default
`batch`	`RecordBatch`	The Arrow RecordBatch to wrap.	required
`validate`	`bool`	When `True` (default), validate the batch against the declared schema before storing. Pass `False` only when the data is already known to be valid, e.g. inside transform operations.	`True`

Source code in src\freezeframe\frame.py

def __init__(self, batch: pa.RecordBatch, *, validate: bool = True) -> None:
    """Construct directly from a ``pa.RecordBatch``.

    Prefer ``from_dict`` or ``from_arrow`` for everyday use — they
    provide clearer error messages when data doesn't match the schema.

    Parameters
    ----------
    batch:
        The Arrow RecordBatch to wrap.
    validate:
        When ``True`` (default), validate the batch against the declared
        schema before storing.  Pass ``False`` only when the data is
        already known to be valid, e.g. inside transform operations.
    """
    if validate:
        _validate_batch(type(self).__schema__, batch)
    # Use object.__setattr__ to bypass our own mutation guard.
    object.__setattr__(self, "_batch", batch)

getattr ¶

__getattr__(name)

Attribute-style column access: df.score.

Only called when normal attribute lookup fails, so internal attributes (_batch, __schema__, …) are unaffected.

Source code in src\freezeframe\frame.py

def __getattr__(self, name: str) -> FrozenSeries:
    """Attribute-style column access: ``df.score``.

    Only called when normal attribute lookup fails, so internal
    attributes (``_batch``, ``__schema__``, …) are unaffected.
    """
    # Guard against calls before _batch is initialised (e.g. during
    # unpickling or repr of a partially-constructed object).
    if "_batch" not in self.__dict__:
        raise AttributeError(name)
    schema = type(self).__schema__
    if name in schema.names:
        return FrozenSeries(self._batch.column(name))
    raise AttributeError(
        f"'{type(self).__name__}' has no attribute '{name}'. "
        f"Available columns: {schema.names}"
    )

getitem ¶

__getitem__(key)

Return the column key as a FrozenSeries.

Source code in src\freezeframe\frame.py

def __getitem__(self, key: str) -> FrozenSeries:
    """Return the column ``key`` as a ``FrozenSeries``."""
    schema = type(self).__schema__
    if key not in schema.names:
        raise KeyError(
            f"Column '{key}' not found. Available columns: {schema.names}"
        )
    return FrozenSeries(self._batch.column(key))

hash ¶

__hash__()

Stable hash based on the Arrow IPC serialisation of the batch.

Computed once on first call and cached internally.

Source code in src\freezeframe\frame.py

def __hash__(self) -> int:
    """Stable hash based on the Arrow IPC serialisation of the batch.

    Computed once on first call and cached internally.
    """
    try:
        return object.__getattribute__(self, "_hash_cache")
    except AttributeError:
        buf = io.BytesIO()
        with pa.ipc.new_stream(buf, self._batch.schema) as writer:
            writer.write_batch(self._batch)
        digest = hashlib.sha256(buf.getvalue()).digest()
        h = int.from_bytes(digest[:8], "big")
        object.__setattr__(self, "_hash_cache", h)
        return h

iter ¶

__iter__()

Iterate row-wise, yielding each row as a plain Python dict.

Source code in src\freezeframe\frame.py

def __iter__(self) -> typing.Iterator[dict[str, Any]]:
    """Iterate row-wise, yielding each row as a plain Python dict."""
    batch = self._batch
    names = type(self).__schema__.names
    for i in range(len(batch)):
        yield {name: batch.column(name)[i].as_py() for name in names}

len ¶

__len__()

Return the number of rows.

Source code in src\freezeframe\frame.py

def __len__(self) -> int:
    """Return the number of rows."""
    return len(self._batch)

from_arrow `classmethod` ¶

from_arrow(batch, *, validate=True)

Construct from an existing pa.RecordBatch.

Parameters:

Name	Type	Description	Default
`batch`	`RecordBatch`	An Arrow RecordBatch whose schema must match the declared schema.	required
`validate`	`bool`	Run schema validation. Defaults to `True`.	`True`

Returns:

Type	Description
`Self`	A new instance of the concrete subclass.

Raises:

Type	Description
`SchemaValidationError`	If the batch does not conform to the declared schema.

Source code in src\freezeframe\frame.py

@classmethod
def from_arrow(
    cls,
    batch: pa.RecordBatch,
    *,
    validate: bool = True,
) -> Self:
    """Construct from an existing ``pa.RecordBatch``.

    Parameters
    ----------
    batch:
        An Arrow RecordBatch whose schema must match the declared schema.
    validate:
        Run schema validation.  Defaults to ``True``.

    Returns
    -------
    Self
        A new instance of the concrete subclass.

    Raises
    ------
    SchemaValidationError
        If the batch does not conform to the declared schema.
    """
    if validate:
        _validate_batch(cls.__schema__, batch)
    return cls._from_batch(batch)

from_dict `classmethod` ¶

from_dict(data, *, validate=True)

Construct from a column-oriented dictionary.

Parameters:

Name	Type	Description	Default
`data`	`dict[str, Any]`	Mapping of column name to a sequence of values (list, numpy array, or any iterable accepted by `pa.array`).	required
`validate`	`bool`	Run schema validation after building the batch. Defaults to `True`. Pass `False` only in hot paths where the data is already trusted.	`True`

Returns:

Type	Description
`Self`	A new instance of the concrete subclass.

Raises:

Type	Description
`SchemaValidationError`	If the data has unexpected or missing columns, type mismatches, or null values in a non-nullable column.

Source code in src\freezeframe\frame.py

@classmethod
def from_dict(
    cls,
    data: dict[str, Any],
    *,
    validate: bool = True,
) -> Self:
    """Construct from a column-oriented dictionary.

    Parameters
    ----------
    data:
        Mapping of column name to a sequence of values (list, numpy
        array, or any iterable accepted by ``pa.array``).
    validate:
        Run schema validation after building the batch.  Defaults to
        ``True``.  Pass ``False`` only in hot paths where the data is
        already trusted.

    Returns
    -------
    Self
        A new instance of the concrete subclass.

    Raises
    ------
    SchemaValidationError
        If the data has unexpected or missing columns, type mismatches,
        or null values in a non-nullable column.
    """
    schema = cls.__schema__

    declared = set(schema.names)
    provided = set(data.keys())

    if extra := provided - declared:
        raise SchemaValidationError(
            f"Unexpected key(s) not declared in schema: {sorted(extra)}. "
            "Remove them or add matching annotations to the FrozenFrame class."
        )
    if missing := declared - provided:
        raise SchemaValidationError(
            f"Missing key(s) required by schema: {sorted(missing)}. "
            "Ensure all declared fields are present in the data."
        )

    arrays: list[pa.Array] = []
    for f in schema:
        try:
            arrays.append(pa.array(data[f.name], type=f.type))
        except (pa.ArrowInvalid, pa.ArrowTypeError) as exc:
            raise SchemaValidationError(
                f"Column '{f.name}': could not convert data to {f.type!r}. {exc}"
            ) from exc

    batch = pa.record_batch(arrays, schema=schema)

    if validate:
        _validate_batch(schema, batch)

    return cls._from_batch(batch)

FrozenFrame¶

freezeframe.frame.FrozenFrame ¶

__getattr__ ¶

__getitem__ ¶

__hash__ ¶

__iter__ ¶

__len__ ¶

from_arrow classmethod ¶

from_dict classmethod ¶

getattr ¶

getitem ¶

hash ¶

iter ¶

len ¶

from_arrow `classmethod` ¶

from_dict `classmethod` ¶