Source code for ahvn.utils.klop

"""Unified filter builder for all KL backends.

This module provides a unified filter system that creates backend-agnostic
JSON intermediate representation (IR) which can be compiled to different
backend formats (SQL, Vector DB, MongoDB).

Architecture:
- Stage 1 (Shared): expr(**kwargs) creates JSON IR
- Stage 2 (Backend-Specific): Adapters compile JSON IR to target format
  - OrmFilterAdapter → SQLAlchemy (SQL)
  - VdbFilterAdapter → LlamaIndex MetadataFilters (Vector DB)
  - MdbFilterAdapter → MongoDB MQL (MongoDB)

Example:
    >>> # Stage 1: Create backend-agnostic JSON IR
    >>> expr = KLOp.expr(
    ...     priority=KLOp.BETWEEN(0, 100),
    ...     status=KLOp.IN(["active", "pending"])
    ... )
    >>> # Result: {'AND': [
    ...     {'FIELD:priority': {'AND': [{'>=': 0}, {'<=': 100}]}},
    ...     {'FIELD:status': {'OR': [{'IN': ['active', 'pending']}]}}
    ... ]}

    >>> # Stage 2: Compile to backend-specific format (via adapters)
    >>> # sql_clause = OrmFilterAdapter.parse(orms, expr)
    >>> # vdb_filters = VdbFilterAdapter.parse(expr)
    >>> # mql_query = MdbFilterAdapter.parse(expr)
"""

__all__ = [
    "KLOp",
]

from typing import Any, Dict, List, Optional, Union, Set, Tuple
import datetime
from dataclasses import dataclass


class _KLOp:
    """Base class for all filter operators.

    All filter operators inherit from this base class to provide
    a common interface for type checking and validation.
    """

    pass


# Standard operators (shared across all backends)
@dataclass
class _LIKE(_KLOp):
    """LIKE operator for pattern matching.

    In SQL: Uses SQL LIKE pattern (%, _)
    In Vector DB: Uses text_match
    In MongoDB: Converted to $regex
    """

    v: str


@dataclass
class _ILIKE(_KLOp):
    """ILIKE operator for case-insensitive pattern matching.

    In SQL: Uses SQL ILIKE pattern
    In Vector DB: Uses text_match_insensitive
    In MongoDB: Converted to $regex with 'i' option
    """

    v: str


@dataclass
class _BETWEEN(_KLOp):
    """BETWEEN operator for range queries.

    Converted to: field >= min AND field <= max
    """

    min: Optional[Union[int, float, datetime.datetime]] = None
    max: Optional[Union[int, float, datetime.datetime]] = None


@dataclass
class _LT(_KLOp):
    """Less than operator."""

    v: Union[int, float, datetime.datetime]


@dataclass
class _LTE(_KLOp):
    """Less than or equal operator."""

    v: Union[int, float, datetime.datetime]


@dataclass
class _GT(_KLOp):
    """Greater than operator."""

    v: Union[int, float, datetime.datetime]


@dataclass
class _GTE(_KLOp):
    """Greater than or equal operator."""

    v: Union[int, float, datetime.datetime]


@dataclass
class _AND(_KLOp):
    """AND operator for logical conjunction."""

    v: List[Any]


@dataclass
class _OR(_KLOp):
    """OR operator for logical disjunction."""

    v: List[Any]


@dataclass
class _NOT(_KLOp):
    """NOT operator for logical negation."""

    v: Any


@dataclass
class _NF(_KLOp):
    """NF (Normalized Form) operator for tags/auths queries.

    Used for querying normalized multi-valued fields like tags and auths.

    In SQL: Compiled to EXISTS subquery
    In Vector DB: Compiled to metadata filters
    In MongoDB: Compiled to $elemMatch

    Supports automatic conversion:
    - KLOp operators (GT, LT, BETWEEN, etc.) are automatically converted
    - Lists/sets/tuples are automatically converted to OR/IN operators

    Examples:
        >>> # Simple value
        >>> KLOp.expr(tags=KLOp.NF(slot="type", value="security"))

        >>> # Explicit OR
        >>> KLOp.expr(tags=KLOp.NF(slot="type", value=KLOp.OR(["security", "privacy"])))

        >>> # Implicit list-to-OR conversion
        >>> KLOp.expr(tags=KLOp.NF(slot="type", value=["security", "privacy"]))

        >>> # With other operators
        >>> KLOp.expr(tags=KLOp.NF(slot="priority", value=KLOp.BETWEEN(0, 100)))
    """

    v: Dict[str, Any]

    def __init__(self, **kwargs):
        self.v = dict()
        for key, value in kwargs.items():
            # Auto-convert operator values
            if hasattr(value, "__class__") and isinstance(value, _KLOp):
                self.v[key] = KLOp._expr(value)
            # Auto-convert lists to OR operator
            elif isinstance(value, (list, set, tuple)):
                self.v[key] = KLOp._expr(KLOp.OR(list(value)))
            else:
                self.v[key] = value


# MongoDB-specific operators (optional for other backends)
@dataclass
class _JSON(_KLOp):
    """JSON operator for nested field queries (MongoDB-specific).

    Uses dot notation to query nested fields in MongoDB documents.
    Supports value matching, comparison operators, and existence checks.
    Other backends may not support this operator.

    Use Cases:
        1. Single key-value matching:
            >>> KLOp.expr(metadata=KLOp.JSON(role="admin"))
            >>> # MongoDB: {"metadata.role": "admin"}

        2. Nested path with dot notation:
            >>> KLOp.expr(data=KLOp.JSON(**{"user.role": "admin"}))
            >>> # MongoDB: {"data.user.role": "admin"}

        3. With comparison operators:
            >>> KLOp.expr(metadata=KLOp.JSON(count=KLOp.GT(100)))
            >>> # MongoDB: {"metadata.count": {"$gt": 100}}

        4. Field existence check (value=...):
            >>> KLOp.expr(metadata=KLOp.JSON(email=...))
            >>> # MongoDB: {"metadata.email": {"$exists": true}}

        5. Field non-existence check (value=NOT(...)):
            >>> KLOp.expr(metadata=KLOp.JSON(optional=KLOp.NOT(...)))
            >>> # MongoDB: {"metadata.optional": {"$exists": false}}

        6. Multiple conditions (AND of all):
            >>> KLOp.expr(metadata=KLOp.JSON(type="categorical", status="active"))
            >>> # MongoDB: {"$and": [{"metadata.type": "categorical"}, {"metadata.status": "active"}]}

        7. Multiple conditions with operators:
            >>> KLOp.expr(metadata=KLOp.JSON(count=KLOp.GT(100), status=KLOp.IN(["active", "pending"])))
            >>> # MongoDB: {"$and": [{"metadata.count": {"$gt": 100}}, {"metadata.status": {"$in": ["active", "pending"]}}]}

        8. Implicit list-to-OR conversion:
            >>> KLOp.expr(metadata=KLOp.JSON(role=["admin", "superuser"]))
            >>> # MongoDB: {"metadata.role": {"$in": ["admin", "superuser"]}}
    """

    v: Dict[str, Any]

    def __init__(self, **kwargs):
        self.v = dict()
        for key, value in kwargs.items():
            # Auto-convert operator values
            if hasattr(value, "__class__") and isinstance(value, _KLOp):
                self.v[key] = KLOp._expr(value)
            # Auto-convert lists to OR operator
            elif isinstance(value, (list, set, tuple)):
                self.v[key] = KLOp._expr(KLOp.OR(list(value)))
            else:
                self.v[key] = value


[docs] class KLOp: """Unified filter builder for all KL backends. This class provides a two-stage architecture: 1. expr(**kwargs) - Creates backend-agnostic JSON IR (shared) 2. parse(expr) - Compiles JSON IR to backend format (via adapters) Supported backends: - SQL/SQLAlchemy (OrmFilterAdapter) - Vector DB/LlamaIndex (VdbFilterAdapter) - MongoDB/MQL (MdbFilterAdapter) The JSON IR format uses: - "FIELD:<name>" keys to indicate field context - Operator keys: "==", "!=", "<", "<=", ">", ">=", "LIKE", "ILIKE", "IN" - Logical operators: "AND", "OR", "NOT" - Special operators: "NF", "JSON" - None value: field existence check (MongoDB) Example: >>> # Create JSON IR >>> expr = KLOp.expr( ... priority=KLOp.BETWEEN(0, 100), ... status=KLOp.IN(["active", "pending"]), ... description=KLOp.LIKE("%test%") ... ) >>> >>> # Compile to backend-specific format (via adapters) >>> # sql_clause = OrmFilterAdapter.parse(orms, expr) >>> # vdb_filters = VdbFilterAdapter.parse(expr) >>> # mql_query = MdbFilterAdapter.parse(expr) """ # Standard operator aliases (shared across all backends) LIKE = _LIKE ILIKE = _ILIKE BETWEEN = _BETWEEN LT = _LT LTE = _LTE GT = _GT GTE = _GTE AND = _AND OR = _OR NOT = _NOT IN = _OR # Alias: IN is semantically OR NF = _NF JSON = _JSON @staticmethod def _is_value(value: Any) -> bool: """Check if a value is a simple value (not an operator or collection). Args: value: The value to check Returns: True if the value is a simple value, False otherwise Example: >>> KLOp._is_value(42) True >>> KLOp._is_value(KLOp.GT(42)) False >>> KLOp._is_value([1, 2, 3]) False """ return not isinstance(value, (_KLOp, List, Set, Tuple, Dict)) @staticmethod def _expr(value: Any) -> Any: """Parse a single value or expression into its JSON IR representation. This method recursively converts operator objects to JSON intermediate representation that is backend-agnostic. Args: value: The value or operator to parse Returns: Dictionary representing the parsed condition in JSON IR format Example: >>> KLOp._expr(KLOp.BETWEEN(0, 100)) {'AND': [{'>=': 0}, {'<=': 100}]} >>> KLOp._expr(KLOp.NOT("test")) {'NOT': {'==': 'test'}} >>> KLOp._expr(KLOp.LIKE("%pattern%")) {'LIKE': '%pattern%'} """ # Logical operators if isinstance(value, KLOp.NOT): return {"NOT": KLOp._expr(value.v)} if isinstance(value, KLOp.AND): return {"AND": [KLOp._expr(v) for v in value.v]} # OR/IN operators with mixed values if isinstance(value, (list, set)): values = [v for v in value if KLOp._is_value(v)] dicts = [d for d in value if isinstance(d, dict)] others = [o for o in value if not KLOp._is_value(o) and not isinstance(o, dict)] or_list = [KLOp._expr(v) for v in others] if values: or_list.append({"IN": values}) if dicts: # Each dict in the list is treated as a separate AND group within the OR for d in dicts: or_list.append(KLOp.expr(**d)) return {"OR": or_list} if isinstance(value, (KLOp.IN, KLOp.OR)): values = [v for v in value.v if KLOp._is_value(v)] dicts = [d for d in value.v if isinstance(d, dict)] others = [o for o in value.v if not KLOp._is_value(o) and not isinstance(o, dict)] or_list = [KLOp._expr(v) for v in others] if values: or_list.append({"IN": values}) if dicts: for d in dicts: or_list.append(KLOp.expr(**d)) return {"OR": or_list} # Range operators if isinstance(value, KLOp.BETWEEN): return { "AND": [ {">=": value.min if value.min is not None else float("-inf")}, {"<=": value.max if value.max is not None else float("inf")}, ] } if isinstance(value, tuple): # Shorthand for BETWEEN return { "AND": [ {">=": value[0] if value[0] is not None else float("-inf")}, {"<=": value[1] if value[1] is not None else float("inf")}, ] } # Normalized form (tags/auths) if isinstance(value, KLOp.NF): return {"NF": value.v} # Pattern matching if isinstance(value, KLOp.LIKE): return {"LIKE": value.v} if isinstance(value, KLOp.ILIKE): return {"ILIKE": value.v} # Comparison operators if isinstance(value, KLOp.LT): return {"<": value.v} if isinstance(value, KLOp.LTE): return {"<=": value.v} if isinstance(value, KLOp.GT): return {">": value.v} if isinstance(value, KLOp.GTE): return {">=": value.v} # MongoDB-specific operators if isinstance(value, KLOp.JSON): return {"JSON": value.v} # Special handling for None: field existence check if value is None: return ... # Default: exact match return {"==": value}
[docs] @staticmethod def expr(**kwargs) -> Dict[str, Any]: """Parse multiple filter conditions into a JSON filter structure. This is the main entry point for creating backend-agnostic filter expressions. The resulting JSON IR can be compiled to any backend format using the appropriate adapter. Args: **kwargs: Filter conditions as key-value pairs. Returns: Dictionary containing the parsed filter conditions in JSON IR format. Uses "FIELD:<name>" keys to indicate field context. Example: >>> # Simple conditions >>> KLOp.expr(status="active", priority=50) {'AND': [ {'FIELD:status': {'==': 'active'}}, {'FIELD:priority': {'==': 50}} ]} >>> # Complex conditions >>> KLOp.expr( ... description=KLOp.NOT("def"), ... version="v1.0.0", ... priority=KLOp.BETWEEN(0, 100) ... ) { 'AND': [ {'FIELD:description': {'NOT': {'==': 'def'}}}, {'FIELD:version': {'==': 'v1.0.0'}}, {'FIELD:priority': {'AND': [{'>=': 0}, {'<=': 100}]}} ] } >>> # MongoDB-specific features >>> KLOp.expr( ... metadata=KLOp.JSON(role="admin"), ... tags=KLOp.NF(slot="type", value="security") ... ) { 'AND': [ {'FIELD:metadata': {'JSON': {'role': {'==': 'admin'}}}}, {'FIELD:tags': {'NF': {'slot': 'type', 'value': 'security'}}} ] } >>> # JSON with multiple fields (AND of all conditions) >>> KLOp.expr( ... metadata=KLOp.JSON(type="categorical", status="active", count=KLOp.GT(100)) ... ) { 'AND': [ {'FIELD:metadata': {'JSON': { 'type': {'==': 'categorical'}, 'status': {'==': 'active'}, 'count': {'>': 100} }}} ] } """ exprs = [{f"FIELD:{k}": KLOp._expr(v)} for k, v in kwargs.items()] return {"AND": exprs} if len(exprs) > 1 else (exprs[0] if exprs else None)