admin管理员组

文章数量:1332345

I'm trying to implement a Lazily evaluated generic field type for Pydantic v2. This is the simple implementation I have. You can assign either a value, a function or an async function to the lazy field and, it's only evaluated when you access it. If you use this in any normal class, it works perfectly. But it doesn't work as a Pydantic field.

The problem is __set__ is never called here. __get__ is called twice for some reason though. I know Pydantic does some weird stuff internally which might be the reason. Any help would be highly appreciated to resolve this.

import asyncio
import inspect
from typing import Any, Awaitable, Callable, Generic, Optional, TypeVar, Union, cast

from pydantic import BaseModel, GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema

T = TypeVar("T")


class LazyField(Generic[T]):
    """A lazy field that can hold a value, function, or async function.
    The value is evaluated only when accessed and then cached.
    """

    def __init__(self, value=None) -> None:
        print("LazyField.__init__")

        self._value: Optional[T] = None
        self._loader: Optional[Callable[[], Union[T, Awaitable[T]]]] = None
        self._is_loaded: bool = False

    def __get__(self, obj: Any, objtype=None) -> T:
        print("LazyField.__get__")

        if obj is None:
            return self  # type: ignore

        if not self._is_loaded:
            if self._loader is None:
                if self._value is None:
                    raise AttributeError("LazyField has no value or loader set")
                return self._value

            if inspect.iscoroutinefunction(self._loader):
                try:
                    loop = asyncio.get_running_loop()
                except RuntimeError:
                    loop = asyncio.new_event_loop()
                self._value = loop.run_until_complete(self._loader())  # type: ignore
            else:
                self._value = self._loader()  # type: ignore

            self._is_loaded = True
            self._loader = None

        assert self._value is not None
        return self._value

    def __set__(
        self, obj: Any, value: Union[T, Callable[[], T], Callable[[], Awaitable[T]]]
    ) -> None:
        print("LazyField.__set__")

        self._is_loaded = False
        if callable(value):
            self._loader = cast(
                Union[Callable[[], T], Callable[[], Awaitable[T]]], value
            )
            self._value = None
        else:
            self._loader = None
            self._value = cast(T, value)

    @classmethod
    def __get_pydantic_core_schema__(
        cls, source_type: type[Any], handler: GetCoreSchemaHandler
    ) -> CoreSchema:
        print("LazyField.__get_pydantic_core_schema__")

        # Extract the inner type from LazyField[T]
        inner_type = (
            source_type.__args__[0] if hasattr(source_type, "__args__") else Any
        )
        # Generate schema for the inner type
        inner_schema = handler.generate_schema(inner_type)

        schema = core_schema.json_or_python_schema(
            json_schema=inner_schema,
            python_schema=core_schema.union_schema(
                [
                    # Handle direct value assignment
                    inner_schema,
                    # Handle callable assignment
                    core_schema.callable_schema(),
                    # Handle coroutine function assignment
                    core_schema.callable_schema(),
                ]
            ),
            serialization=core_schema.plain_serializer_function_ser_schema(
                lambda x: x._value if hasattr(x, "_value") and x._is_loaded else None,
                return_schema=inner_schema,
                when_used="json",
            ),
        )
        return schema


class A(BaseModel):
    content: LazyField[bytes] = LazyField()


async def get_content():
    return b"Hello, world!"


a = A(content=get_content)

print(a.content)

This is the output from above:

LazyField.__init__
LazyField.__get__
LazyField.__get__
LazyField.__get_pydantic_core_schema__
<function get_content at 0x102cc4860>

As you can see, __get__ is called twice. And because __set__ is never called, _is_loaded and _loader is None, so __get__ just returns the raw value as a function without evaluating.

I'm trying to implement a Lazily evaluated generic field type for Pydantic v2. This is the simple implementation I have. You can assign either a value, a function or an async function to the lazy field and, it's only evaluated when you access it. If you use this in any normal class, it works perfectly. But it doesn't work as a Pydantic field.

The problem is __set__ is never called here. __get__ is called twice for some reason though. I know Pydantic does some weird stuff internally which might be the reason. Any help would be highly appreciated to resolve this.

import asyncio
import inspect
from typing import Any, Awaitable, Callable, Generic, Optional, TypeVar, Union, cast

from pydantic import BaseModel, GetCoreSchemaHandler
from pydantic_core import CoreSchema, core_schema

T = TypeVar("T")


class LazyField(Generic[T]):
    """A lazy field that can hold a value, function, or async function.
    The value is evaluated only when accessed and then cached.
    """

    def __init__(self, value=None) -> None:
        print("LazyField.__init__")

        self._value: Optional[T] = None
        self._loader: Optional[Callable[[], Union[T, Awaitable[T]]]] = None
        self._is_loaded: bool = False

    def __get__(self, obj: Any, objtype=None) -> T:
        print("LazyField.__get__")

        if obj is None:
            return self  # type: ignore

        if not self._is_loaded:
            if self._loader is None:
                if self._value is None:
                    raise AttributeError("LazyField has no value or loader set")
                return self._value

            if inspect.iscoroutinefunction(self._loader):
                try:
                    loop = asyncio.get_running_loop()
                except RuntimeError:
                    loop = asyncio.new_event_loop()
                self._value = loop.run_until_complete(self._loader())  # type: ignore
            else:
                self._value = self._loader()  # type: ignore

            self._is_loaded = True
            self._loader = None

        assert self._value is not None
        return self._value

    def __set__(
        self, obj: Any, value: Union[T, Callable[[], T], Callable[[], Awaitable[T]]]
    ) -> None:
        print("LazyField.__set__")

        self._is_loaded = False
        if callable(value):
            self._loader = cast(
                Union[Callable[[], T], Callable[[], Awaitable[T]]], value
            )
            self._value = None
        else:
            self._loader = None
            self._value = cast(T, value)

    @classmethod
    def __get_pydantic_core_schema__(
        cls, source_type: type[Any], handler: GetCoreSchemaHandler
    ) -> CoreSchema:
        print("LazyField.__get_pydantic_core_schema__")

        # Extract the inner type from LazyField[T]
        inner_type = (
            source_type.__args__[0] if hasattr(source_type, "__args__") else Any
        )
        # Generate schema for the inner type
        inner_schema = handler.generate_schema(inner_type)

        schema = core_schema.json_or_python_schema(
            json_schema=inner_schema,
            python_schema=core_schema.union_schema(
                [
                    # Handle direct value assignment
                    inner_schema,
                    # Handle callable assignment
                    core_schema.callable_schema(),
                    # Handle coroutine function assignment
                    core_schema.callable_schema(),
                ]
            ),
            serialization=core_schema.plain_serializer_function_ser_schema(
                lambda x: x._value if hasattr(x, "_value") and x._is_loaded else None,
                return_schema=inner_schema,
                when_used="json",
            ),
        )
        return schema


class A(BaseModel):
    content: LazyField[bytes] = LazyField()


async def get_content():
    return b"Hello, world!"


a = A(content=get_content)

print(a.content)

This is the output from above:

LazyField.__init__
LazyField.__get__
LazyField.__get__
LazyField.__get_pydantic_core_schema__
<function get_content at 0x102cc4860>

As you can see, __get__ is called twice. And because __set__ is never called, _is_loaded and _loader is None, so __get__ just returns the raw value as a function without evaluating.

Share Improve this question asked Nov 20, 2024 at 22:16 Dulaj DisanayakaDulaj Disanayaka 891 silver badge7 bronze badges 6
  • I just looked into the pydantic code. The assignment is done (in general) via self.__dict__[name] = value. I'm not sure, but I guess, the __set__ method is not invoked in this case as the main logic behind descriptors should be inside the __getattribute__ (and __setattr__?) method of the object class: docs.python./3/howto/… – lord_haffi Commented Nov 21, 2024 at 21:44
  • But I also saw, that if it is an instance of property, the __set__ method will be explicitly called. So it may be worth it to try and wrap your logic inside a @property. – lord_haffi Commented Nov 21, 2024 at 21:46
  • Another idea: You could also override the __setattr__ method of your model class to explicitly invoke the __set__ method if the field does implement it, i.e. if it is a descriptor. – lord_haffi Commented Nov 21, 2024 at 21:48
  • I was hoping I can make this Field without needing to touch the model class. So the usage is very ergonomic. But if that's not possible, guess I'll have to resort to doing something with @computed_fields. Awesome suggestions thanks! – Dulaj Disanayaka Commented Nov 22, 2024 at 19:17
  • You could also create a feature request on the pydantic repo :) It shouldn't be that hard to implement this feature I think, so you may be lucky ^^ – lord_haffi Commented Nov 23, 2024 at 10:00
 |  Show 1 more comment

1 Answer 1

Reset to default 0

I'm interested in a similar feature. In my case, my FastAPI route may receive a deeply nested JSON payload. In some cases, sub-models may not be useful, and so validating them is a waste of time. I'm looking for a way to defer the validation of sub-models to first access, something like:

# Define a submodel
class B(BaseModel):
    value: int

# Define the main model with a lazy field
class A(BaseModel):
    toplevel: int
    b: B = LazyField()
   
    # Or using a decorator
    # @lazyfield
    # b: B

# Create an instance of A without validating the submodel immediately
data = {"toplevel": 42, "b": {"value": None}}  # Incorrect submodel type to demonstrate lazy validation
model = A(**data)

# Access eager field without any problem
print(model.toplevel)

# Access the submodel field to trigger validation
try:
    print(model.b)  # This will validate B and raise an error if invalid
except ValidationError as e:
    print("Validation error:", e)

My current solution is to allow extra fields and use cached property:

from functools import cached_property

from pydantic import (
    BaseModel,
    ConfigDict,
)


class B(BaseModel):
    value: int


class A(BaseModel):
    model_config = ConfigDict(extra="allow")
    value: int

    @cached_property
    def b(self) -> B:
        if self.model_extra is None or "b" not in self.model_extra:
            raise ValueError("Missing field `b`")
        return B.model_validate(self.model_extra["b"])


a = A.model_validate({"value": 4, "b": {"value": None}})
print(a.value)
print(a.model_dump_json())  # Print the object "as-is"
print(a.b)  # Fails because `b` does not validate

This requires more boilerplate, and I don't know if there is a way to make it more "user-friendly", through a LazyField class or an annotation.

While writing this, I understand that this may raise a concern about ValidationError that may be raised at any access (not only during model validation), but this is something I can live with.

本文标签: pythonImplementing a Lazy evaluated field for Pydantic v2Stack Overflow