ahvn.utils.vdb.compiler 源代码

"""
Vector Filter Compiler for KLOp JSON IR.

This module provides functionality to compile KLOp JSON IR expressions
into LlamaIndex MetadataFilters for vector database backends.
"""

from __future__ import annotations

__all__ = ["VectorCompiler"]

from typing import Any, Dict, Optional, Union, TYPE_CHECKING

from ..deps import deps

if TYPE_CHECKING:
    from llama_index.core.vector_stores import (
        MetadataFilters,
        MetadataFilter,
        ExactMatchFilter,
    )

from ..basic.log_utils import get_logger
from ..basic.debug_utils import error_str

logger = get_logger(__name__)


def get_llama_index_filters():
    return deps.load("llama_index.core.vector_stores")


[文档] class VectorCompiler: """Compiler that converts KLOp JSON IR to LlamaIndex filters.""" @staticmethod def _to_filters(*expr_filters, op="and") -> MetadataFilters: """Combine multiple filter expressions into MetadataFilters. Args: *expr_filters: Variable number of filter expressions op: Logical operator ("and" or "or") Returns: MetadataFilters object combining all filters """ filters_mod = get_llama_index_filters() _MetadataFilters = filters_mod.MetadataFilters _MetadataFilter = filters_mod.MetadataFilter _ExactMatchFilter = filters_mod.ExactMatchFilter normalized = [] for expr_filter in expr_filters: if expr_filter is None: continue if isinstance(expr_filter, (list, tuple, set)): # Recursively convert list elements and add each to normalized for item in expr_filter: if item is not None: if isinstance(item, (_MetadataFilter, _ExactMatchFilter, _MetadataFilters)): normalized.append(item) elif isinstance(expr_filter, _MetadataFilters): normalized.append(expr_filter) elif isinstance(expr_filter, (_ExactMatchFilter, _MetadataFilter)): normalized.append(expr_filter) elif isinstance(expr_filter, dict): continue return _MetadataFilters(filters=normalized, condition=op) @staticmethod def _parse_op(key: str, op: str, val: Any) -> Union[MetadataFilter, MetadataFilters]: """Build LlamaIndex filter expression for a specific operator. Args: key: Metadata field key op: Operator type (==, !=, <, >, <=, >=, LIKE, ILIKE, IN) val: Value for the operator Returns: LlamaIndex filter object Raises: ValueError: If operator is unknown """ filters_mod = get_llama_index_filters() _MetadataFilter = filters_mod.MetadataFilter _ExactMatchFilter = filters_mod.ExactMatchFilter if op == "==": return _ExactMatchFilter(key=key, value=val) if op == "IN": if not isinstance(val, (list, tuple, set)): raise ValueError("IN operator requires a list, tuple, or set of values") return VectorCompiler._to_filters([_ExactMatchFilter(key=key, value=v) for v in val], op="or") llama_op = { "==": "==", "!=": "!=", "<": "<", "<=": "<=", ">": ">", ">=": ">=", "LIKE": "text_match", "ILIKE": "text_match_insensitive", "IN": "in", }.get(op, "in") return _MetadataFilter(key=key, value=val, operator=llama_op) @staticmethod def _parse(field: Optional[str] = None, expr: Optional[Dict[str, Any]] = None) -> Optional[Union[MetadataFilter, MetadataFilters]]: """Recursively build LlamaIndex filter objects from filter nodes. Args: field: Current field context for operator expressions expr: The filter expression dictionary to parse Returns: LlamaIndex filter object or None Raises: ValueError: If the expr structure is invalid """ if not expr: return None if len(expr) > 1: raise NotImplementedError("Complex expressions with multiple root keys not supported.") op, val = next(iter(expr.items())) try: if op in ("AND", "OR"): exprs = [VectorCompiler._parse(field=field, expr=v) for v in val] exprs = [expr for expr in exprs if expr is not None] if not exprs: # AND([]) = TRUE (all zero conditions satisfied) -> no filter # OR([]) = FALSE (none of zero alternatives true) -> empty OR filter (never matches) if op == "AND": return None # No filter = match all else: # OR # Return an empty OR filter which never matches (no alternatives) filters_mod = get_llama_index_filters() return filters_mod.MetadataFilters(filters=[], condition="or") return VectorCompiler._to_filters(*exprs, op=op.lower()) if op == "NOT": filters_mod = get_llama_index_filters() return filters_mod.MetadataFilters( filters=[VectorCompiler._parse(field=field, expr=val)], condition="not", ) if op.startswith("FIELD:"): if field is not None: raise ValueError(f"Nested FIELD: {op} inside {field} not allowed.") return VectorCompiler._parse(field=op.split("FIELD:")[1], expr=val) if field is None: raise ValueError(f"Operator '{op}' requires a field context (FIELD:).") return VectorCompiler._parse_op(field, op, val) except Exception as e: raise ValueError(f"Error processing expression key '{op}'.\n{expr}\n{error_str(e)}")
[文档] @staticmethod def compile(expr: Optional[Dict[str, Any]] = None, **kwargs) -> Optional[MetadataFilters]: """Convert a KLOp JSON IR to LlamaIndex MetadataFilters. Args: expr: The parsed filter expression dictionary (optional) **kwargs: Filter conditions as key-value pairs Returns: LlamaIndex MetadataFilters object or None Raises: ImportError: If LlamaIndex is not available ValueError: If filter structure is invalid """ get_llama_index_filters() # Ensure LlamaIndex is available from ..klop import KLOp exprs = list() if expr: exprs.append(VectorCompiler._parse(expr=expr)) if kwargs: exprs.append(VectorCompiler._parse(expr=KLOp.expr(**kwargs))) if not exprs: return None return VectorCompiler._to_filters(*exprs)