Coverage for src/qdrant_loader/config/global_config.py: 100%

29 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1"""Global configuration settings. 

2 

3This module defines the global configuration settings that apply across the application, 

4including chunking, embedding, and logging configurations. 

5""" 

6 

7from typing import Any 

8 

9from pydantic import Field 

10 

11from qdrant_loader.config.base import BaseConfig 

12from qdrant_loader.config.chunking import ChunkingConfig 

13from qdrant_loader.config.embedding import EmbeddingConfig 

14from qdrant_loader.config.qdrant import QdrantConfig 

15from qdrant_loader.config.sources import SourcesConfig 

16from qdrant_loader.config.state import StateManagementConfig 

17from qdrant_loader.core.file_conversion import FileConversionConfig 

18 

19 

20class SemanticAnalysisConfig(BaseConfig): 

21 """Configuration for semantic analysis.""" 

22 

23 num_topics: int = Field( 

24 default=3, description="Number of topics to extract using LDA" 

25 ) 

26 

27 lda_passes: int = Field(default=10, description="Number of passes for LDA training") 

28 

29 spacy_model: str = Field( 

30 default="en_core_web_md", 

31 description="spaCy model to use for text processing. Options: en_core_web_sm (15MB, no vectors), en_core_web_md (50MB, 20k vectors), en_core_web_lg (750MB, 514k vectors)", 

32 ) 

33 

34 

35class GlobalConfig(BaseConfig): 

36 """Global configuration settings.""" 

37 

38 chunking: ChunkingConfig = Field(default_factory=ChunkingConfig) 

39 embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig) 

40 llm: dict[str, Any] | None = Field( 

41 default=None, description="Unified LLM configuration (provider-agnostic)" 

42 ) 

43 semantic_analysis: SemanticAnalysisConfig = Field( 

44 default_factory=SemanticAnalysisConfig, 

45 description="Semantic analysis configuration", 

46 ) 

47 state_management: StateManagementConfig = Field( 

48 default_factory=lambda: StateManagementConfig(database_path=":memory:"), 

49 description="State management configuration", 

50 ) 

51 sources: SourcesConfig = Field(default_factory=SourcesConfig) 

52 file_conversion: FileConversionConfig = Field( 

53 default_factory=FileConversionConfig, 

54 description="File conversion configuration", 

55 ) 

56 qdrant: QdrantConfig | None = Field( 

57 default=None, description="Qdrant configuration" 

58 ) 

59 

60 def __init__(self, **data): 

61 """Initialize global configuration.""" 

62 # If skip_validation is True and no state_management is provided, use in-memory database 

63 skip_validation = data.pop("skip_validation", False) 

64 if skip_validation and "state_management" not in data: 

65 data["state_management"] = { 

66 "database_path": ":memory:", 

67 "table_prefix": "qdrant_loader_", 

68 "connection_pool": {"size": 5, "timeout": 30}, 

69 } 

70 super().__init__(**data) 

71 

72 def to_dict(self) -> dict[str, Any]: 

73 """Convert the configuration to a dictionary.""" 

74 return { 

75 "chunking": { 

76 "chunk_size": self.chunking.chunk_size, 

77 "chunk_overlap": self.chunking.chunk_overlap, 

78 }, 

79 "embedding": self.embedding.model_dump(), 

80 "llm": self.llm, 

81 "semantic_analysis": { 

82 "num_topics": self.semantic_analysis.num_topics, 

83 "lda_passes": self.semantic_analysis.lda_passes, 

84 "spacy_model": self.semantic_analysis.spacy_model, 

85 }, 

86 "sources": self.sources.to_dict(), 

87 "state_management": self.state_management.to_dict(), 

88 "file_conversion": { 

89 "max_file_size": self.file_conversion.max_file_size, 

90 "conversion_timeout": self.file_conversion.conversion_timeout, 

91 "markitdown": { 

92 "enable_llm_descriptions": self.file_conversion.markitdown.enable_llm_descriptions, 

93 "llm_model": self.file_conversion.markitdown.llm_model, 

94 "llm_endpoint": self.file_conversion.markitdown.llm_endpoint, 

95 "llm_api_key": self.file_conversion.markitdown.llm_api_key, 

96 }, 

97 }, 

98 "qdrant": self.qdrant.to_dict() if self.qdrant else None, 

99 }