Coverage for src/qdrant_loader/config/global_config.py: 100%

28 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:50 +0000

1"""Global configuration settings. 

2 

3This module defines the global configuration settings that apply across the application, 

4including chunking, embedding, and logging configurations. 

5""" 

6 

7from typing import Any 

8 

9from pydantic import Field 

10 

11from qdrant_loader.config.base import BaseConfig 

12from qdrant_loader.config.chunking import ChunkingConfig 

13from qdrant_loader.config.embedding import EmbeddingConfig 

14from qdrant_loader.config.qdrant import QdrantConfig 

15from qdrant_loader.config.sources import SourcesConfig 

16from qdrant_loader.config.state import StateManagementConfig 

17from qdrant_loader.config.types import GlobalConfigDict 

18from qdrant_loader.core.file_conversion import FileConversionConfig 

19 

20 

21class SemanticAnalysisConfig(BaseConfig): 

22 """Configuration for semantic analysis.""" 

23 

24 num_topics: int = Field( 

25 default=3, description="Number of topics to extract using LDA" 

26 ) 

27 

28 lda_passes: int = Field(default=10, description="Number of passes for LDA training") 

29 

30 

31class GlobalConfig(BaseConfig): 

32 """Global configuration settings.""" 

33 

34 chunking: ChunkingConfig = Field(default_factory=ChunkingConfig) 

35 embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig) 

36 semantic_analysis: SemanticAnalysisConfig = Field( 

37 default_factory=SemanticAnalysisConfig, 

38 description="Semantic analysis configuration", 

39 ) 

40 state_management: StateManagementConfig = Field( 

41 default_factory=lambda: StateManagementConfig(database_path=":memory:"), 

42 description="State management configuration", 

43 ) 

44 sources: SourcesConfig = Field(default_factory=SourcesConfig) 

45 file_conversion: FileConversionConfig = Field( 

46 default_factory=FileConversionConfig, 

47 description="File conversion configuration", 

48 ) 

49 qdrant: QdrantConfig | None = Field( 

50 default=None, description="Qdrant configuration" 

51 ) 

52 

53 def __init__(self, **data): 

54 """Initialize global configuration.""" 

55 # If skip_validation is True and no state_management is provided, use in-memory database 

56 skip_validation = data.pop("skip_validation", False) 

57 if skip_validation and "state_management" not in data: 

58 data["state_management"] = { 

59 "database_path": ":memory:", 

60 "table_prefix": "qdrant_loader_", 

61 "connection_pool": {"size": 5, "timeout": 30}, 

62 } 

63 super().__init__(**data) 

64 

65 def to_dict(self) -> dict[str, Any]: 

66 """Convert the configuration to a dictionary.""" 

67 return { 

68 "chunking": { 

69 "chunk_size": self.chunking.chunk_size, 

70 "chunk_overlap": self.chunking.chunk_overlap, 

71 }, 

72 "embedding": self.embedding.model_dump(), 

73 "semantic_analysis": { 

74 "num_topics": self.semantic_analysis.num_topics, 

75 "lda_passes": self.semantic_analysis.lda_passes, 

76 }, 

77 "sources": self.sources.to_dict(), 

78 "state_management": self.state_management.to_dict(), 

79 "file_conversion": { 

80 "max_file_size": self.file_conversion.max_file_size, 

81 "conversion_timeout": self.file_conversion.conversion_timeout, 

82 "markitdown": { 

83 "enable_llm_descriptions": self.file_conversion.markitdown.enable_llm_descriptions, 

84 "llm_model": self.file_conversion.markitdown.llm_model, 

85 "llm_endpoint": self.file_conversion.markitdown.llm_endpoint, 

86 "llm_api_key": self.file_conversion.markitdown.llm_api_key, 

87 }, 

88 }, 

89 "qdrant": self.qdrant.to_dict() if self.qdrant else None, 

90 }