Coverage for src / qdrant_loader / config / global_config.py: 100%

31 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-06-11 09:38 +0000

1"""Global configuration settings. 

2 

3This module defines the global configuration settings that apply across the application, 

4including chunking, embedding, and logging configurations. 

5""" 

6 

7from typing import Any 

8 

9from pydantic import Field 

10 

11from qdrant_loader.config.base import BaseConfig 

12from qdrant_loader.config.chunking import ChunkingConfig 

13from qdrant_loader.config.embedding import EmbeddingConfig 

14from qdrant_loader.config.qdrant import QdrantConfig 

15from qdrant_loader.config.sources import SourcesConfig 

16from qdrant_loader.config.state import StateManagementConfig 

17from qdrant_loader.config.workers import WorkersConfig 

18from qdrant_loader.core.file_conversion import FileConversionConfig 

19 

20 

21class SemanticAnalysisConfig(BaseConfig): 

22 """Configuration for semantic analysis.""" 

23 

24 num_topics: int = Field( 

25 default=3, description="Number of topics to extract using LDA" 

26 ) 

27 

28 lda_passes: int = Field(default=10, description="Number of passes for LDA training") 

29 

30 spacy_model: str = Field( 

31 default="en_core_web_md", 

32 description="spaCy model to use for text processing. Options: en_core_web_sm (15MB, no vectors), en_core_web_md (50MB, 20k vectors), en_core_web_lg (750MB, 514k vectors)", 

33 ) 

34 

35 

36class GlobalConfig(BaseConfig): 

37 """Global configuration settings.""" 

38 

39 chunking: ChunkingConfig = Field(default_factory=ChunkingConfig) 

40 embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig) 

41 llm: dict[str, Any] | None = Field( 

42 default=None, description="Unified LLM configuration (provider-agnostic)" 

43 ) 

44 semantic_analysis: SemanticAnalysisConfig = Field( 

45 default_factory=SemanticAnalysisConfig, 

46 description="Semantic analysis configuration", 

47 ) 

48 state_management: StateManagementConfig = Field( 

49 default_factory=StateManagementConfig, 

50 description="State management configuration", 

51 ) 

52 sources: SourcesConfig = Field(default_factory=SourcesConfig) 

53 file_conversion: FileConversionConfig = Field( 

54 default_factory=FileConversionConfig, 

55 description="File conversion configuration", 

56 ) 

57 qdrant: QdrantConfig = Field( 

58 default_factory=QdrantConfig, description="Qdrant configuration" 

59 ) 

60 workers: WorkersConfig = Field( 

61 default_factory=WorkersConfig, 

62 description="Worker scheduling and runtime configuration", 

63 ) 

64 

65 def __init__(self, **data): 

66 """Initialize global configuration.""" 

67 # If skip_validation is True and no state_management is provided, use in-memory database 

68 skip_validation = data.pop("skip_validation", False) 

69 if skip_validation and "state_management" not in data: 

70 data["state_management"] = { 

71 "database_path": "./state.db", 

72 "table_prefix": "qdrant_loader_", 

73 "connection_pool": {"size": 5, "timeout": 30}, 

74 } 

75 super().__init__(**data) 

76 

77 def to_dict(self) -> dict[str, Any]: 

78 """Convert the configuration to a dictionary.""" 

79 return { 

80 "chunking": { 

81 "chunk_size": self.chunking.chunk_size, 

82 "chunk_overlap": self.chunking.chunk_overlap, 

83 "enable_semantic_analysis": self.chunking.enable_semantic_analysis, 

84 "enable_enhanced_semantic_analysis": self.chunking.enable_enhanced_semantic_analysis, 

85 }, 

86 "embedding": self.embedding.model_dump(), 

87 "llm": self.llm, 

88 "semantic_analysis": { 

89 "num_topics": self.semantic_analysis.num_topics, 

90 "lda_passes": self.semantic_analysis.lda_passes, 

91 "spacy_model": self.semantic_analysis.spacy_model, 

92 }, 

93 "sources": self.sources.to_dict(), 

94 "state_management": self.state_management.to_dict(), 

95 "file_conversion": { 

96 "max_file_size": self.file_conversion.max_file_size, 

97 "conversion_timeout": self.file_conversion.conversion_timeout, 

98 "markitdown": { 

99 "enable_llm_descriptions": self.file_conversion.markitdown.enable_llm_descriptions, 

100 "llm_model": self.file_conversion.markitdown.llm_model, 

101 "llm_endpoint": self.file_conversion.markitdown.llm_endpoint, 

102 "llm_api_key": self.file_conversion.markitdown.llm_api_key, 

103 }, 

104 }, 

105 "qdrant": self.qdrant.to_dict(), 

106 "workers": self.workers.to_dict(), 

107 }