"""\
Dependency management and lazy loading utilities.
This module provides a clean, industrial-standard dependency management system
and utilities for lazy loading modules.
"""
__all__ = [
"DependencyManager",
"DependencyError",
"OptionalDependencyError",
"deps",
"lazy_getattr",
"collect_exports",
"lazy_import_submodules",
"DependencyInfo",
"get_default_dependencies",
]
from typing import Dict, List, Optional, Any, Union
import importlib
import types
from dataclasses import dataclass
[docs]
@dataclass
class DependencyInfo:
"""Information about a dependency."""
name: str
packages: List[str]
install: str
description: str
optional: bool = True
required_for: Optional[List[str]] = None
[docs]
def __post_init__(self):
"""Initialize default values."""
if self.required_for is None:
self.required_for = []
[docs]
def get_default_dependencies() -> dict:
"""Get all default dependency definitions."""
return {
"mysql": DependencyInfo(
name="mysql",
packages=["pymysql"],
install="pip install pymysql mysqlclient",
description="MySQL database support",
required_for=["database", "mysql_connections"],
),
"postgresql": DependencyInfo(
name="postgresql",
packages=["psycopg2-binary"],
install="pip install psycopg2-binary",
description="PostgreSQL database support",
required_for=["database", "postgresql_connections"],
),
"duckdb": DependencyInfo(
name="duckdb",
packages=["duckdb"],
install="pip install duckdb duckdb-engine",
description="DuckDB database support",
required_for=["database", "analytics"],
),
"mssql": DependencyInfo(
name="mssql",
packages=["pyodbc"],
install="pip install pyodbc",
description="Microsoft SQL Server support",
required_for=["database", "mssql_connections"],
),
"spacy": DependencyInfo(
name="spacy",
packages=["spacy"],
install="pip install spacy",
description="spaCy NLP library",
required_for=["nlp", "text_processing"],
),
"fastmcp": DependencyInfo(
name="fastmcp",
packages=["fastmcp"],
install="pip install fastmcp",
description="FastMCP interface",
required_for=["mcp", "interfaces"],
),
"pyahocorasick": DependencyInfo(
name="pyahocorasick",
packages=["ahocorasick"],
install="pip install pyahocorasick",
description="Aho-Corasick automaton for fast string matching",
required_for=["string_search", "pattern_matching"],
),
"chromadb": DependencyInfo(
name="chromadb",
packages=["chromadb"],
install="pip install chromadb",
description="ChromaDB vector database",
required_for=["vector_db", "chroma_integration"],
),
"mongodb": DependencyInfo(
name="mongodb",
packages=["pymongo"],
install="pip install pymongo",
description="MongoDB database support",
required_for=["database", "mongodb_connections"],
),
"milvus": DependencyInfo(
name="milvus",
packages=["pymilvus"],
install="pip install pymilvus",
description="Milvus vector database",
required_for=["vector_db", "milvus_integration"],
),
"lancedb": DependencyInfo(
name="lancedb",
packages=["lancedb", "pyarrow"],
install="pip install lancedb pyarrow",
description="Lance vector database",
required_for=["vector_db", "lance_integration"],
),
"llamaindex": DependencyInfo(
name="llamaindex",
packages=["llama_index"],
install="pip install llama-index llama-index-llms-ollama",
description="LlamaIndex integration",
required_for=["rag", "llm_integration"],
),
"neo4j": DependencyInfo(
name="neo4j",
packages=["neo4j"],
install="pip install neo4j",
description="Neo4j graph database",
required_for=["graph_db", "neo4j_integration"],
),
"snowflake": DependencyInfo(
name="snowflake",
packages=["snowflake-sqlalchemy"],
install="pip install snowflake-sqlalchemy",
description="Snowflake database support",
required_for=["database", "snowflake_connections"],
),
"bigquery": DependencyInfo(
name="bigquery",
packages=["sqlalchemy-bigquery"],
install="pip install sqlalchemy-bigquery",
description="BigQuery database support",
required_for=["database", "bigquery_connections"],
),
"clickhouse": DependencyInfo(
name="clickhouse",
packages=["clickhouse-sqlalchemy"],
install="pip install clickhouse-sqlalchemy",
description="ClickHouse database support",
required_for=["database", "clickhouse_connections"],
),
"trino": DependencyInfo(
name="trino",
packages=["trino"],
install="pip install trino sqlalchemy-trino",
description="Trino database support",
required_for=["database", "trino_connections"],
),
"presto": DependencyInfo(
name="presto",
packages=["pyhive"],
install="pip install pyhive",
description="Presto database support",
required_for=["database", "presto_connections"],
),
"oracle": DependencyInfo(
name="oracle",
packages=["cx_Oracle"],
install="pip install cx_Oracle sqlalchemy",
description="Oracle database support",
required_for=["database", "oracle_connections"],
),
"databricks": DependencyInfo(
name="databricks",
packages=["databricks-sql-connector"],
install="pip install databricks-sql-connector sqlalchemy-databricks",
description="Databricks support",
required_for=["database", "databricks_connections"],
),
"hive": DependencyInfo(
name="hive",
packages=["pyhive"],
install="pip install pyhive thrift sasl thrift_sasl",
description="Hive support",
required_for=["database", "hive_connections"],
),
"starrocks": DependencyInfo(
name="starrocks",
packages=["starrocks"],
install="pip install starrocks sqlalchemy-starrocks",
description="StarRocks support",
required_for=["database", "starrocks_connections"],
),
"hana": DependencyInfo(
name="hana",
packages=["hdbcli"],
install="pip install hdbcli sqlalchemy-hana",
description="SAP HANA support",
required_for=["database", "hana_connections"],
),
"sqlalchemy": DependencyInfo(
name="sqlalchemy",
packages=["sqlalchemy"],
install="pip install sqlalchemy",
description="SQLAlchemy ORM",
required_for=["database", "orm"],
),
"sqlglot": DependencyInfo(
name="sqlglot",
packages=["sqlglot"],
install="pip install sqlglot",
description="SQL Parser and Transpiler",
required_for=["database", "sql_processing"],
),
"prettytable": DependencyInfo(
name="prettytable",
packages=["prettytable"],
install="pip install prettytable",
description="Table display utility",
required_for=["cli", "display"],
),
"litellm": DependencyInfo(
name="litellm",
packages=["litellm"],
install="pip install litellm",
description="LLM interface",
required_for=["llm"],
),
"pandas": DependencyInfo(
name="pandas",
packages=["pandas"],
install="pip install pandas",
description="Data analysis library",
required_for=["analytics"],
),
"numpy": DependencyInfo(
name="numpy",
packages=["numpy"],
install="pip install numpy",
description="Numerical computing library",
required_for=["analytics", "vector"],
),
}
[docs]
class DependencyError(Exception):
"""Dependency-related error."""
pass
[docs]
class OptionalDependencyError(DependencyError, ImportError):
"""Optional dependency not available."""
pass
[docs]
class DependencyManager:
"""Clean dependency management system."""
_instance: Optional["DependencyManager"] = None
[docs]
def __new__(cls) -> "DependencyManager":
"""Singleton pattern."""
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
[docs]
def __init__(self):
"""Initialize the manager."""
if hasattr(self, "_initialized"):
return
self._deps: Dict[str, DependencyInfo] = {}
self._cache: Dict[str, bool] = {}
self._load_defaults()
self._initialized = True
def _load_defaults(self):
"""Load default dependencies."""
for dep_info in get_default_dependencies().values():
self.add(dep_info)
[docs]
def add(self, dep_info: DependencyInfo) -> None:
"""Add a dependency."""
if not dep_info.name:
raise ValueError("Dependency name cannot be empty")
self._deps[dep_info.name] = dep_info
[docs]
def check(self, name: str) -> bool:
"""Check if a dependency is available."""
if name in self._cache:
return self._cache[name]
if name not in self._deps:
# If not registered, try direct import check
try:
importlib.import_module(name)
self._cache[name] = True
return True
except ImportError:
self._cache[name] = False
return False
dep_info = self._deps[name]
available = False
for pkg in dep_info.packages:
try:
importlib.import_module(pkg)
available = True
break
except ImportError:
continue
self._cache[name] = available
return available
[docs]
def require(self, name: str, feature: str = "") -> None:
"""Require a dependency, raising error if missing."""
if not self.check(name):
if name in self._deps:
dep_info = self._deps[name]
feature_msg = f" for {feature}" if feature else ""
raise OptionalDependencyError(f"{dep_info.description} is required{feature_msg}.\n" f"Install with: {dep_info.install}")
else:
feature_msg = f" for {feature}" if feature else ""
raise OptionalDependencyError(f"Package '{name}' is required{feature_msg}.\n" f"Install with: pip install {name}")
[docs]
def list(self, filter_optional: Optional[bool] = None) -> List[str]:
"""List all dependencies."""
deps = list(self._deps.keys())
if filter_optional is not None:
deps = [name for name in deps if self._deps[name].optional == filter_optional]
return deps
[docs]
def missing(self) -> List[str]:
"""Get list of missing dependencies."""
return [name for name in self._deps if not self.check(name)]
[docs]
def info(self, name: str) -> Dict[str, Any]:
"""Get dependency information."""
if name not in self._deps:
raise KeyError(f"Dependency '{name}' not found")
dep_info = self._deps[name]
return {
"name": name,
"description": dep_info.description,
"packages": dep_info.packages,
"install": dep_info.install,
"optional": dep_info.optional,
"available": self.check(name),
"required_for": dep_info.required_for,
}
[docs]
def clear_cache(self) -> None:
"""Clear dependency cache."""
self._cache.clear()
[docs]
def load(
self,
module_name: str,
package: Optional[str] = None,
error_msg: Optional[str] = None,
) -> types.ModuleType:
"""
Import an optional dependency, raising a clear error if missing.
Args:
module_name: The python module name to import (e.g. "pandas")
package: The pip package name (e.g. "pandas"). Defaults to module_name.
error_msg: Custom error message. If None, generates a standard one.
Returns:
The imported module.
Raises:
OptionalDependencyError: If the module cannot be imported.
"""
try:
return importlib.import_module(module_name)
except ImportError as e:
# Check if the error is actually due to the module we want, or a sub-dependency
if e.name and e.name != module_name and not module_name.startswith(e.name + "."):
# It's a sub-dependency error, re-raise it as is to avoid confusion
raise
pkg_name = package or module_name
# Check if we have info in the registry
if pkg_name in self._deps:
dep_info = self._deps[pkg_name]
install_cmd = dep_info.install
desc = dep_info.description
else:
install_cmd = f"pip install {pkg_name}"
desc = f"Package '{pkg_name}'"
if error_msg:
msg = f"{error_msg}\nInstall with: {install_cmd}"
else:
msg = f"{desc} is required but not installed.\nInstall with: {install_cmd}"
raise OptionalDependencyError(msg) from e
# Global instance
deps = DependencyManager()
# Lazy loading utilities
[docs]
def lazy_getattr(name: str, export_map: Dict[str, str], package: str):
"""\
Helper function to implement __getattr__ for lazy loading modules.
Args:
name: The attribute name being accessed.
export_map: A dictionary mapping attribute names to relative module paths (e.g., { "MyClass": ".my_module" }).
package: The package name (usually __name__ of the calling module).
Returns:
The requested attribute from the imported module.
Raises:
AttributeError: If the name is not in the export_map.
"""
if name in export_map:
module_path = export_map[name]
module = importlib.import_module(module_path, package)
return getattr(module, name)
raise AttributeError(f"module {package!r} has no attribute {name!r}")
[docs]
def collect_exports(package_names: List[str], parent_package: str) -> Dict[str, str]:
"""\
Collects exported names from a list of subpackages to build a master lazy map.
Args:
package_names: List of relative package names (e.g., ["klstore", "klengine"]).
parent_package: The parent package name (usually __name__).
Returns:
A dictionary mapping exported names to their relative package path (e.g., { "DatabaseKLStore": ".klstore" }).
"""
lazy_map = {}
for pkg_name in package_names:
# Import the subpackage (assumed to be lightweight/lazy itself)
full_pkg_name = f".{pkg_name}"
pkg = importlib.import_module(full_pkg_name, parent_package)
# Get its __all__
exports = getattr(pkg, "__all__", [])
for item in exports:
lazy_map[item] = full_pkg_name
return lazy_map
[docs]
def lazy_import_submodules(name: str, submodules: List[str], package: str):
"""\
Helper function to lazy load submodules.
Args:
name: The attribute name being accessed.
submodules: List of submodule names (relative).
package: The package name.
Returns:
The imported module or None.
"""
if name in submodules:
return importlib.import_module(f".{name}", package)
return None