"""Custom Pydantic field types for UKF schema compatibility.
This module provides custom Pydantic field types that handle validation,
serialization, and database mapping for the Universal Knowledge Framework.
These types enable BaseUKF to serve as a single source of truth for schema
definitions across different storage backends.
"""
__all__ = [
"UKF_TYPES",
"UKFIdType",
"UKFIntegerType",
"UKFBooleanType",
"UKFShortTextType",
"UKFMediumTextType",
"UKFLongTextType",
"UKFTimestampType",
"UKFDurationType",
"UKFJsonType",
"UKFTagsType",
"UKFAuthsType",
"UKFSynonymsType",
"UKFRelatedType",
"UKFVectorType",
]
from typing import Dict, Tuple, Optional, Union, List, Iterable
from pydantic_core import PydanticCustomError, core_schema
from ..utils.basic.serialize_utils import loads_json, dumps_json
from ..utils.basic.config_utils import HEAVEN_CM
from .ukf_utils import valid_tag
import numpy as np
import datetime
import isodate
import math
def _get_text_lengths():
return {"id": 63, "short": 255, "medium": 2047, "long": 65535} | HEAVEN_CM.get("ukf.text", {})
_TEXT_LENGTHS = _get_text_lengths()
UKF_TYPES = dict()
def _ukf_type(cls):
UKF_TYPES[cls.name] = cls
return cls
[文档]
@_ukf_type
class UKFIdType(int):
"""Custom type for UKF ID fields with validation and formatting.
Validates and converts values to integer IDs, supporting both raw integers
and formatted hash strings with underscores or dashes.
It is recommended to use integer IDs generated from `md5hash` in hash_utils.
Args:
value: Integer or string representation of an ID.
Returns:
int: Validated integer ID.
Raises:
PydanticCustomError: If value cannot be converted to a valid integer ID.
Examples:
>>> UKFIdType._validate(123)
123
>>> UKFIdType._validate("000123")
123
"""
name = "id"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.int_schema(),
)
@classmethod
def _validate(cls, value: Union[int, str]) -> int:
return None if value is None else int(value)
[文档]
@_ukf_type
class UKFIntegerType(int):
"""Custom type for integer fields with validation.
Validates and converts values to integers, providing consistent
handling of numeric inputs including integers, floats and string representations.
Args:
value: Integer, float, or string representation of an integer.
Returns:
int: Validated integer value.
Raises:
PydanticCustomError: If value cannot be converted to a valid integer.
Examples:
>>> UKFIntegerType._validate(123)
123
>>> UKFIntegerType._validate(123.0)
123
>>> UKFIntegerType._validate("123")
123
"""
name = "int"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.int_schema(),
)
@classmethod
def _validate(cls, value: Optional[Union[int, float, str]]) -> Optional[int]:
return None if (value is None) or (math.isnan(value)) else int(str(value))
[文档]
@_ukf_type
class UKFBooleanType:
"""Custom type for boolean fields with validation.
Validates and converts various representations to boolean values,
supporting common string representations and numeric values.
Args:
value: Boolean, integer, string, or other value to convert.
Returns:
bool: Validated boolean value.
Raises:
PydanticCustomError: If value cannot be converted to a valid boolean.
Examples:
>>> UKFBooleanType._validate(True)
True
>>> UKFBooleanType._validate("true")
True
>>> UKFBooleanType._validate(1)
True
>>> UKFBooleanType._validate("false")
False
>>> UKFBooleanType._validate(0)
False
"""
name = "bool"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.bool_schema(),
)
@classmethod
def _validate(cls, value: Optional[Union[bool, int, str]]) -> Optional[bool]:
if value is None:
return None
if isinstance(value, bool):
return value
if isinstance(value, int):
return bool(value)
if isinstance(value, str):
lower_value = value.lower().strip()
if lower_value in ("true", "1", "yes", "on", "enabled"):
return True
elif lower_value in ("false", "0", "no", "off", "disabled"):
return False
else:
raise PydanticCustomError("boolean_format", "Invalid boolean format: {value}", {"value": value})
return bool(value)
[文档]
@_ukf_type
class UKFShortTextType(str):
"""Custom type for short text fields with length validation.
Validates string length against configurable short text limit from config.yaml.
Default limit is 255 characters (equivalent to SQL VARCHAR(255)).
Args:
value: String value to validate.
Returns:
str: Validated string value.
Raises:
PydanticCustomError: If string exceeds maximum length limit.
Examples:
>>> UKFShortTextType._validate("Hello world")
'Hello world'
>>> UKFShortTextType._validate("a" * 256) # Assuming 255 char limit
PydanticCustomError: short_text_too_long
"""
name = "short_text"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.str_schema(max_length=_TEXT_LENGTHS.get("short", 255)),
)
@classmethod
def _validate(cls, value: Optional[str]) -> Optional[str]:
if value is None:
return None
max_length = _TEXT_LENGTHS.get("short", 255)
if len(value) > max_length:
raise PydanticCustomError(
"short_text_too_long",
"Short text must be {max_length} characters or less, got {length}",
{"max_length": max_length, "length": len(value)},
)
return str(value)
[文档]
@classmethod
def max_length(cls) -> int:
return _TEXT_LENGTHS.get("short", 255)
[文档]
@_ukf_type
class UKFMediumTextType(str):
"""Custom type for medium text fields with length validation.
Validates string length against configurable medium text limit from config.yaml.
Default limit is 2047 characters (equivalent to SQL VARCHAR(2047)).
Args:
value: String value to validate.
Returns:
str: Validated string value.
Raises:
PydanticCustomError: If string exceeds maximum length limit.
Examples:
>>> UKFMediumTextType._validate("Medium length text")
'Medium length text'
>>> UKFMediumTextType._validate("a" * 2048) # Assuming 2047 char limit
PydanticCustomError: medium_text_too_long
"""
name = "medium_text"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.str_schema(max_length=_TEXT_LENGTHS.get("medium", 2047)),
)
@classmethod
def _validate(cls, value: Optional[str]) -> Optional[str]:
if value is None:
return None
max_length = _TEXT_LENGTHS.get("medium", 2047)
if len(value) > max_length:
raise PydanticCustomError(
"medium_text_too_long",
"Medium text must be {max_length} characters or less, got {length}",
{"max_length": max_length, "length": len(value)},
)
return str(value)
[文档]
@classmethod
def max_length(cls) -> int:
return _TEXT_LENGTHS.get("medium", 2047)
[文档]
@_ukf_type
class UKFLongTextType(str):
"""Custom type for long text fields with length validation.
Validates string length against configurable long text limit from config.yaml.
Default limit is 65535 characters (equivalent to SQL VARCHAR(65535)).
Args:
value: String value to validate.
Returns:
str: Validated string value.
Raises:
PydanticCustomError: If string exceeds maximum length limit.
Examples:
>>> UKFLongTextType._validate("Very long text content")
'Very long text content'
>>> UKFLongTextType._validate("a" * 65536) # Assuming 65535 char limit
PydanticCustomError: long_text_too_long
"""
name = "long_text"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.str_schema(max_length=_TEXT_LENGTHS.get("long", 65535)),
)
@classmethod
def _validate(cls, value: Optional[str]) -> Optional[str]:
if value is None:
return None
max_length = _TEXT_LENGTHS.get("long", 65535)
if len(value) > max_length:
raise PydanticCustomError(
"long_text_too_long",
"Long text must be {max_length} characters or less, got {length}",
{"max_length": max_length, "length": len(value)},
)
return str(value)
[文档]
@classmethod
def max_length(cls) -> int:
return _TEXT_LENGTHS.get("long", 65535)
[文档]
@_ukf_type
class UKFTimestampType(datetime.datetime):
"""Custom type for datetime fields with UTC conversion and validation.
Validates and normalizes datetime values to UTC timezone with microseconds
stripped for consistency. Supports various input formats including ISO strings,
timestamps, and datetime objects.
Args:
value: Datetime, ISO string, timestamp (int/float), or datetime object.
Returns:
datetime.datetime: UTC datetime with microseconds stripped.
Raises:
PydanticCustomError: If value cannot be converted to a valid datetime.
Examples:
>>> UKFTimestampType._validate("2023-01-01T12:00:00Z")
datetime.datetime(2023, 1, 1, 12, 0, tzinfo=datetime.timezone.utc)
>>> UKFTimestampType._validate(1672574400) # Unix timestamp
datetime.datetime(2023, 1, 1, 12, 0, tzinfo=datetime.timezone.utc)
"""
name = "timestamp"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.datetime_schema(),
)
@classmethod
def _validate(cls, value: Optional[Union[datetime.datetime, str, int, float]]) -> Optional[datetime.datetime]:
if value is None:
return None
if isinstance(value, datetime.datetime):
dt = value
elif isinstance(value, (int, float)):
dt = datetime.datetime.fromtimestamp(value, tz=datetime.timezone.utc)
elif isinstance(value, str):
try:
dt = datetime.datetime.fromisoformat(value)
except Exception as e:
raise PydanticCustomError("timestamp_format", "Invalid timestamp format: {value}. Error: {e}", {"value": value, "e": e})
else:
raise PydanticCustomError("timestamp_format", "Invalid timestamp format: {value}", {"value": value})
if dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.timezone.utc)
else:
dt = dt.astimezone(datetime.timezone.utc)
return dt.replace(microsecond=0)
[文档]
@_ukf_type
class UKFDurationType(datetime.timedelta):
"""Custom type for duration fields with validation.
Validates and converts various representations to timedelta objects,
supporting ISO 8601 duration strings and numeric seconds.
Args:
value: Timedelta, ISO 8601 duration string, or numeric seconds.
Returns:
datetime.timedelta: Validated timedelta object.
Raises:
PydanticCustomError: If value cannot be converted to a valid timedelta.
Examples:
>>> UKFDurationType._validate("P1DT2H") # 1 day, 2 hours
datetime.timedelta(days=1, hours=2)
>>> UKFDurationType._validate(3600) # 1 hour in seconds
datetime.timedelta(seconds=3600)
"""
name = "duration"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.timedelta_schema(),
)
@classmethod
def _validate(cls, value: Optional[Union[datetime.timedelta, str, int, float]]) -> Optional[datetime.timedelta]:
if value is None:
return None
if isinstance(value, datetime.timedelta):
return value
if isinstance(value, (int, float)):
return datetime.timedelta(seconds=value)
if isinstance(value, str):
try:
return isodate.parse_duration(value)
except Exception as e:
raise PydanticCustomError("duration_format", "Invalid duration format: {value}. Error: {e}", {"value": value, "e": e})
raise PydanticCustomError("duration_format", "Invalid duration format: {value}", {"value": value})
[文档]
@_ukf_type
class UKFJsonType(dict):
"""Custom type for JSON fields with validation and parsing.
Validates and converts JSON data, supporting both dictionary objects and
JSON string representations. Uses custom JSON parser for consistency.
Args:
value: Dictionary object or JSON string to validate.
Returns:
dict: Validated dictionary object.
Raises:
PydanticCustomError: If JSON string cannot be parsed or value is invalid.
Examples:
>>> UKFJsonType._validate({"key": "value"})
{'key': 'value'}
>>> UKFJsonType._validate('{"key": "value"}')
{'key': 'value'}
>>> UKFJsonType._validate(None)
{}
"""
name = "json"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.dict_schema(),
)
@classmethod
def _validate(cls, value: Optional[Union[Dict, str]]) -> dict:
if isinstance(value, str):
value = loads_json(value)
return dict() if value is None else dict(value)
[文档]
@_ukf_type
class UKFAuthsType(UKFTagsType):
name = "auths"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return super().__get_pydantic_core_schema__(source, handler)
@classmethod
def _validate(cls, value: Optional[Iterable[Union[Tuple[str, str], List[str], str]]]) -> set:
return super()._validate(value)
[文档]
@_ukf_type
class UKFSynonymsType(set):
"""Custom type for synonyms set with validation.
Validates and converts various iterable types to a set of string synonyms.
Similar to UKFTagsType but specifically for synonym collections.
Args:
value: Set, list, tuple, or other iterable of synonym values.
Returns:
set: Set of string synonyms.
Raises:
TypeError: If value cannot be iterated over.
Examples:
>>> UKFSynonymsType._validate(["synonym1", "synonym2"])
{'synonym1', 'synonym2'}
>>> UKFSynonymsType._validate(None)
set()
"""
name = "synonyms"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.set_schema(core_schema.str_schema()),
)
@classmethod
def _validate(cls, value: Union[set, list, tuple]) -> set:
if value is None:
return set()
return set(str(item).strip() for item in value if item is not None)
[文档]
@_ukf_type
class UKFVectorType(list):
"""Custom type for vector fields with validation and serialization.
Validates and converts various iterable types to a list of floats.
Handles None values gracefully by returning empty list.
Args:
value: List, tuple, or other iterable of numeric values.
Returns:
list: List of floats representing the vector.
Raises:
TypeError: If value cannot be iterated over or contains non-numeric values.
Examples:
>>> UKFVectorType._validate([1, 2, 3])
[1.0, 2.0, 3.0]
>>> UKFVectorType._validate((4.5, 5.5))
[4.5, 5.5]
>>> UKFVectorType._validate(None)
[]
"""
name = "vector"
@classmethod
def __get_pydantic_core_schema__(cls, source, handler):
return core_schema.no_info_after_validator_function(
cls._validate,
core_schema.list_schema(core_schema.float_schema()),
)
@classmethod
def _validate(cls, value: Optional[Iterable[Union[int, float]]]) -> Optional[List[float]]:
if value is None:
return None
if isinstance(value, np.ndarray):
return value.tolist()
return [float(item) for item in value]