Coverage for src/qdrant_loader/config/global_config.py: 100%
28 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""Global configuration settings.
3This module defines the global configuration settings that apply across the application,
4including chunking, embedding, and logging configurations.
5"""
7from typing import Any
9from pydantic import Field
11from qdrant_loader.config.base import BaseConfig
12from qdrant_loader.config.chunking import ChunkingConfig
13from qdrant_loader.config.embedding import EmbeddingConfig
14from qdrant_loader.config.qdrant import QdrantConfig
15from qdrant_loader.config.sources import SourcesConfig
16from qdrant_loader.config.state import StateManagementConfig
17from qdrant_loader.config.types import GlobalConfigDict
18from qdrant_loader.core.file_conversion import FileConversionConfig
21class SemanticAnalysisConfig(BaseConfig):
22 """Configuration for semantic analysis."""
24 num_topics: int = Field(
25 default=3, description="Number of topics to extract using LDA"
26 )
28 lda_passes: int = Field(default=10, description="Number of passes for LDA training")
31class GlobalConfig(BaseConfig):
32 """Global configuration settings."""
34 chunking: ChunkingConfig = Field(default_factory=ChunkingConfig)
35 embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
36 semantic_analysis: SemanticAnalysisConfig = Field(
37 default_factory=SemanticAnalysisConfig,
38 description="Semantic analysis configuration",
39 )
40 state_management: StateManagementConfig = Field(
41 default_factory=lambda: StateManagementConfig(database_path=":memory:"),
42 description="State management configuration",
43 )
44 sources: SourcesConfig = Field(default_factory=SourcesConfig)
45 file_conversion: FileConversionConfig = Field(
46 default_factory=FileConversionConfig,
47 description="File conversion configuration",
48 )
49 qdrant: QdrantConfig | None = Field(
50 default=None, description="Qdrant configuration"
51 )
53 def __init__(self, **data):
54 """Initialize global configuration."""
55 # If skip_validation is True and no state_management is provided, use in-memory database
56 skip_validation = data.pop("skip_validation", False)
57 if skip_validation and "state_management" not in data:
58 data["state_management"] = {
59 "database_path": ":memory:",
60 "table_prefix": "qdrant_loader_",
61 "connection_pool": {"size": 5, "timeout": 30},
62 }
63 super().__init__(**data)
65 def to_dict(self) -> dict[str, Any]:
66 """Convert the configuration to a dictionary."""
67 return {
68 "chunking": {
69 "chunk_size": self.chunking.chunk_size,
70 "chunk_overlap": self.chunking.chunk_overlap,
71 },
72 "embedding": self.embedding.model_dump(),
73 "semantic_analysis": {
74 "num_topics": self.semantic_analysis.num_topics,
75 "lda_passes": self.semantic_analysis.lda_passes,
76 },
77 "sources": self.sources.to_dict(),
78 "state_management": self.state_management.to_dict(),
79 "file_conversion": {
80 "max_file_size": self.file_conversion.max_file_size,
81 "conversion_timeout": self.file_conversion.conversion_timeout,
82 "markitdown": {
83 "enable_llm_descriptions": self.file_conversion.markitdown.enable_llm_descriptions,
84 "llm_model": self.file_conversion.markitdown.llm_model,
85 "llm_endpoint": self.file_conversion.markitdown.llm_endpoint,
86 "llm_api_key": self.file_conversion.markitdown.llm_api_key,
87 },
88 },
89 "qdrant": self.qdrant.to_dict() if self.qdrant else None,
90 }