Coverage for src/qdrant_loader/config/__init__.py: 84%

161 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1"""Configuration module. 

2 

3This module provides the main configuration interface for the application. 

4It combines global settings with source-specific configurations. 

5""" 

6 

7import os 

8import re 

9from pathlib import Path 

10from typing import Any, Optional 

11 

12import yaml 

13from dotenv import load_dotenv 

14from pydantic import Field, ValidationError, model_validator 

15from pydantic_settings import BaseSettings, SettingsConfigDict 

16 

17from ..utils.logging import LoggingConfig 

18from .chunking import ChunkingConfig 

19 

20# Import consolidated configs 

21from .global_config import GlobalConfig, SemanticAnalysisConfig 

22 

23# Import multi-project support 

24from .models import ( 

25 ParsedConfig, 

26 ProjectConfig, 

27 ProjectContext, 

28 ProjectDetail, 

29 ProjectInfo, 

30 ProjectsConfig, 

31 ProjectStats, 

32) 

33from .parser import MultiProjectConfigParser 

34from .sources import SourcesConfig 

35from .state import StateManagementConfig 

36from .validator import ConfigValidator 

37from .workspace import WorkspaceConfig 

38 

39# Load environment variables from .env file 

40load_dotenv(override=False) 

41 

42# Get logger without initializing it 

43logger = LoggingConfig.get_logger(__name__) 

44 

45 

46# Lazy import function for connector configs 

47def _get_connector_configs(): 

48 """Lazy import connector configs to avoid circular dependencies.""" 

49 from ..connectors.confluence.config import ConfluenceSpaceConfig 

50 from ..connectors.git.config import GitAuthConfig, GitRepoConfig 

51 from ..connectors.jira.config import JiraProjectConfig 

52 from ..connectors.publicdocs.config import PublicDocsSourceConfig, SelectorsConfig 

53 

54 return { 

55 "ConfluenceSpaceConfig": ConfluenceSpaceConfig, 

56 "GitAuthConfig": GitAuthConfig, 

57 "GitRepoConfig": GitRepoConfig, 

58 "JiraProjectConfig": JiraProjectConfig, 

59 "PublicDocsSourceConfig": PublicDocsSourceConfig, 

60 "SelectorsConfig": SelectorsConfig, 

61 } 

62 

63 

64__all__ = [ 

65 "ChunkingConfig", 

66 "ConfluenceSpaceConfig", 

67 "GitAuthConfig", 

68 "GitRepoConfig", 

69 "GlobalConfig", 

70 "JiraProjectConfig", 

71 "PublicDocsSourceConfig", 

72 "SelectorsConfig", 

73 "SemanticAnalysisConfig", 

74 "Settings", 

75 "SourcesConfig", 

76 "StateManagementConfig", 

77 # Multi-project support 

78 "ProjectContext", 

79 "ProjectConfig", 

80 "ProjectsConfig", 

81 "ParsedConfig", 

82 "ProjectStats", 

83 "ProjectInfo", 

84 "ProjectDetail", 

85 "MultiProjectConfigParser", 

86 "ConfigValidator", 

87 # Functions 

88 "get_global_config", 

89 "get_settings", 

90 "initialize_config", 

91 "initialize_config_with_workspace", 

92] 

93 

94 

95# Add lazy loading for connector configs 

96def __getattr__(name): 

97 """Lazy import connector configs to avoid circular dependencies.""" 

98 connector_configs = _get_connector_configs() 

99 if name in connector_configs: 

100 return connector_configs[name] 

101 raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 

102 

103 

104_global_settings: Optional["Settings"] = None 

105 

106 

107def get_settings() -> "Settings": 

108 """Get the global settings instance. 

109 

110 Returns: 

111 Settings: The global settings instance. 

112 """ 

113 if _global_settings is None: 

114 raise RuntimeError( 

115 "Settings not initialized. Call initialize_config() or initialize_config_with_workspace() first." 

116 ) 

117 return _global_settings 

118 

119 

120def get_global_config() -> GlobalConfig: 

121 """Get the global configuration instance. 

122 

123 Returns: 

124 GlobalConfig: The global configuration instance. 

125 """ 

126 return get_settings().global_config 

127 

128 

129def initialize_config( 

130 yaml_path: Path, env_path: Path | None = None, skip_validation: bool = False 

131) -> None: 

132 """Initialize the global configuration. 

133 

134 Args: 

135 yaml_path: Path to the YAML configuration file. 

136 env_path: Optional path to the .env file. 

137 skip_validation: If True, skip directory validation and creation. 

138 """ 

139 global _global_settings 

140 try: 

141 # Proceed with initialization 

142 logger.debug( 

143 "Initializing configuration", 

144 yaml_path=str(yaml_path), 

145 env_path=str(env_path) if env_path else None, 

146 ) 

147 _global_settings = Settings.from_yaml( 

148 yaml_path, env_path=env_path, skip_validation=skip_validation 

149 ) 

150 logger.debug("Successfully initialized configuration") 

151 

152 except Exception as e: 

153 logger.error( 

154 "Failed to initialize configuration", error=str(e), yaml_path=str(yaml_path) 

155 ) 

156 raise 

157 

158 

159def initialize_config_with_workspace( 

160 workspace_config: WorkspaceConfig, skip_validation: bool = False 

161) -> None: 

162 """Initialize configuration using workspace settings. 

163 

164 Args: 

165 workspace_config: Workspace configuration with paths and settings 

166 skip_validation: If True, skip directory validation and creation 

167 """ 

168 global _global_settings 

169 try: 

170 logger.debug( 

171 "Initializing configuration with workspace", 

172 workspace=str(workspace_config.workspace_path), 

173 config_path=str(workspace_config.config_path), 

174 env_path=( 

175 str(workspace_config.env_path) if workspace_config.env_path else None 

176 ), 

177 ) 

178 

179 # Load configuration using workspace paths 

180 _global_settings = Settings.from_yaml( 

181 workspace_config.config_path, 

182 env_path=workspace_config.env_path, 

183 skip_validation=skip_validation, 

184 ) 

185 

186 # Check if database_path was specified in config.yaml and warn user 

187 original_db_path = _global_settings.global_config.state_management.database_path 

188 workspace_db_path = str(workspace_config.database_path) 

189 

190 # Only warn if the original path is different from the workspace path and not empty/default 

191 if ( 

192 original_db_path 

193 and original_db_path != ":memory:" 

194 and original_db_path != workspace_db_path 

195 ): 

196 logger.warning( 

197 "Database path in config.yaml is ignored in workspace mode", 

198 config_database_path=original_db_path, 

199 workspace_database_path=workspace_db_path, 

200 ) 

201 

202 # Override the database path with workspace-specific path 

203 _global_settings.global_config.state_management.database_path = ( 

204 workspace_db_path 

205 ) 

206 

207 logger.debug( 

208 "Set workspace database path", 

209 database_path=workspace_db_path, 

210 ) 

211 

212 logger.debug( 

213 "Successfully initialized configuration with workspace", 

214 workspace=str(workspace_config.workspace_path), 

215 ) 

216 

217 except Exception as e: 

218 logger.error( 

219 "Failed to initialize configuration with workspace", 

220 error=str(e), 

221 workspace=str(workspace_config.workspace_path), 

222 ) 

223 raise 

224 

225 

226class Settings(BaseSettings): 

227 """Main configuration class combining global and source-specific settings.""" 

228 

229 # Configuration objects - these are the only fields we need 

230 global_config: GlobalConfig = Field( 

231 default_factory=GlobalConfig, description="Global configuration settings" 

232 ) 

233 projects_config: ProjectsConfig = Field( 

234 default_factory=ProjectsConfig, description="Multi-project configurations" 

235 ) 

236 

237 model_config = SettingsConfigDict( 

238 env_file=None, # Disable automatic .env loading - we handle this manually 

239 env_file_encoding="utf-8", 

240 extra="allow", 

241 ) 

242 

243 @model_validator(mode="after") # type: ignore 

244 def validate_source_configs(self) -> "Settings": 

245 """Validate that required configuration is present for configured sources.""" 

246 logger.debug("Validating source configurations") 

247 

248 # Validate that qdrant configuration is present in global config 

249 if not self.global_config.qdrant: 

250 raise ValueError("Qdrant configuration is required in global config") 

251 

252 # Validate that required fields are not empty after variable substitution 

253 if not self.global_config.qdrant.url: 

254 raise ValueError( 

255 "Qdrant URL is required but was not provided or substituted" 

256 ) 

257 

258 if not self.global_config.qdrant.collection_name: 

259 raise ValueError( 

260 "Qdrant collection name is required but was not provided or substituted" 

261 ) 

262 

263 # Note: Source validation is now handled at the project level 

264 # Each project's sources are validated when the project is processed 

265 

266 logger.debug("Source configuration validation successful") 

267 return self 

268 

269 @property 

270 def qdrant_url(self) -> str: 

271 """Get the Qdrant URL from global configuration.""" 

272 if not self.global_config.qdrant: 

273 raise ValueError("Qdrant configuration is not available") 

274 return self.global_config.qdrant.url 

275 

276 @property 

277 def qdrant_api_key(self) -> str | None: 

278 """Get the Qdrant API key from global configuration.""" 

279 if not self.global_config.qdrant: 

280 return None 

281 return self.global_config.qdrant.api_key 

282 

283 @property 

284 def qdrant_collection_name(self) -> str: 

285 """Get the Qdrant collection name from global configuration.""" 

286 if not self.global_config.qdrant: 

287 raise ValueError("Qdrant configuration is not available") 

288 return self.global_config.qdrant.collection_name 

289 

290 @property 

291 def openai_api_key(self) -> str: 

292 """Get the OpenAI API key from embedding configuration.""" 

293 api_key = self.global_config.embedding.api_key 

294 if not api_key: 

295 raise ValueError( 

296 "OpenAI API key is required but was not provided or substituted in embedding configuration" 

297 ) 

298 return api_key 

299 

300 @property 

301 def state_db_path(self) -> str: 

302 """Get the state database path from global configuration.""" 

303 return self.global_config.state_management.database_path 

304 

305 @property 

306 def llm_settings(self): 

307 """Provider-agnostic LLM settings derived from global configuration. 

308 

309 Uses `global.llm` when present; otherwise maps legacy fields. 

310 """ 

311 # Import lazily to avoid hard dependency issues in environments without core installed 

312 from importlib import import_module 

313 

314 settings_mod = import_module("qdrant_loader_core.llm.settings") 

315 LLMSettings = settings_mod.LLMSettings 

316 return LLMSettings.from_global_config(self.global_config.to_dict()) 

317 

318 @staticmethod 

319 def _substitute_env_vars(data: Any) -> Any: 

320 """Recursively substitute environment variables in configuration data. 

321 

322 Args: 

323 data: Configuration data to process 

324 

325 Returns: 

326 Processed data with environment variables substituted 

327 """ 

328 if isinstance(data, str): 

329 # First expand $HOME if present 

330 if "$HOME" in data: 

331 data = data.replace("$HOME", os.path.expanduser("~")) 

332 

333 # Then handle ${VAR_NAME} pattern 

334 pattern = r"\${([^}]+)}" 

335 matches = re.finditer(pattern, data) 

336 result = data 

337 for match in matches: 

338 var_name = match.group(1) 

339 env_value = os.getenv(var_name) 

340 if env_value is None: 

341 # Only warn about missing variables that are commonly required 

342 # Skip STATE_DB_PATH as it's often overridden in workspace mode 

343 if var_name not in ["STATE_DB_PATH"]: 

344 logger.warning( 

345 "Environment variable not found", variable=var_name 

346 ) 

347 continue 

348 # If the environment variable contains $HOME, expand it 

349 if "$HOME" in env_value: 

350 env_value = env_value.replace("$HOME", os.path.expanduser("~")) 

351 result = result.replace(f"${{{var_name}}}", env_value) 

352 

353 return result 

354 elif isinstance(data, dict): 

355 return {k: Settings._substitute_env_vars(v) for k, v in data.items()} 

356 elif isinstance(data, list): 

357 return [Settings._substitute_env_vars(item) for item in data] 

358 return data 

359 

360 @classmethod 

361 def from_yaml( 

362 cls, 

363 config_path: Path, 

364 env_path: Path | None = None, 

365 skip_validation: bool = False, 

366 ) -> "Settings": 

367 """Load configuration from a YAML file. 

368 

369 Args: 

370 config_path: Path to the YAML configuration file. 

371 env_path: Optional path to the .env file. If provided, only this file is loaded. 

372 skip_validation: If True, skip directory validation and creation. 

373 

374 Returns: 

375 Settings: Loaded configuration. 

376 """ 

377 logger.debug("Loading configuration from YAML", path=str(config_path)) 

378 try: 

379 # Step 1: Load environment variables first 

380 if env_path is not None: 

381 # Custom env file specified - load only this file 

382 logger.debug("Loading custom environment file", path=str(env_path)) 

383 if not env_path.exists(): 

384 raise FileNotFoundError(f"Environment file not found: {env_path}") 

385 load_dotenv(env_path, override=True) 

386 else: 

387 # Load default .env file if it exists 

388 logger.debug("Loading default environment variables") 

389 load_dotenv(override=False) 

390 

391 # Step 2: Load YAML config 

392 with open(config_path) as f: 

393 config_data = yaml.safe_load(f) 

394 

395 # Step 3: Process all environment variables in config using substitution 

396 logger.debug("Processing environment variables in configuration") 

397 config_data = cls._substitute_env_vars(config_data) 

398 

399 # Step 4: Use multi-project parser to parse configuration 

400 validator = ConfigValidator() 

401 parser = MultiProjectConfigParser(validator) 

402 parsed_config = parser.parse(config_data, skip_validation=skip_validation) 

403 

404 # Step 5: Create settings instance with parsed configuration 

405 settings = cls( 

406 global_config=parsed_config.global_config, 

407 projects_config=parsed_config.projects_config, 

408 ) 

409 

410 logger.debug("Successfully created Settings instance") 

411 return settings 

412 

413 except yaml.YAMLError as e: 

414 logger.error("Failed to parse YAML configuration", error=str(e)) 

415 raise 

416 except ValidationError as e: 

417 logger.error("Configuration validation failed", error=str(e)) 

418 raise 

419 except Exception as e: 

420 logger.error("Unexpected error loading configuration", error=str(e)) 

421 raise 

422 

423 def to_dict(self) -> dict: 

424 """Convert the configuration to a dictionary. 

425 

426 Returns: 

427 dict: Configuration as a dictionary. 

428 """ 

429 return { 

430 "global": self.global_config.to_dict(), 

431 "projects": self.projects_config.to_dict(), 

432 }