Coverage for src/qdrant_loader/config/__init__.py: 83%

155 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:50 +0000

1"""Configuration module. 

2 

3This module provides the main configuration interface for the application. 

4It combines global settings with source-specific configurations. 

5""" 

6 

7import os 

8import re 

9from pathlib import Path 

10from typing import Any, Optional 

11 

12import yaml 

13from dotenv import load_dotenv 

14from pydantic import ( 

15 Field, 

16 ValidationError, 

17 field_validator, 

18 model_validator, 

19) 

20from pydantic_settings import BaseSettings, SettingsConfigDict 

21 

22from ..utils.logging import LoggingConfig 

23from .chunking import ChunkingConfig 

24 

25# Import consolidated configs 

26from .global_config import GlobalConfig, SemanticAnalysisConfig 

27from .sources import SourcesConfig 

28from .state import StateManagementConfig 

29from .workspace import WorkspaceConfig, get_workspace_env_override 

30 

31# Import multi-project support 

32from .models import ( 

33 ProjectContext, 

34 ProjectConfig, 

35 ProjectsConfig, 

36 ParsedConfig, 

37 ProjectStats, 

38 ProjectInfo, 

39 ProjectDetail, 

40) 

41from .parser import MultiProjectConfigParser 

42from .validator import ConfigValidator 

43 

44# Load environment variables from .env file 

45load_dotenv(override=False) 

46 

47# Get logger without initializing it 

48logger = LoggingConfig.get_logger(__name__) 

49 

50 

51# Lazy import function for connector configs 

52def _get_connector_configs(): 

53 """Lazy import connector configs to avoid circular dependencies.""" 

54 from ..connectors.confluence.config import ConfluenceSpaceConfig 

55 from ..connectors.git.config import GitAuthConfig, GitRepoConfig 

56 from ..connectors.jira.config import JiraProjectConfig 

57 from ..connectors.publicdocs.config import PublicDocsSourceConfig, SelectorsConfig 

58 

59 return { 

60 "ConfluenceSpaceConfig": ConfluenceSpaceConfig, 

61 "GitAuthConfig": GitAuthConfig, 

62 "GitRepoConfig": GitRepoConfig, 

63 "JiraProjectConfig": JiraProjectConfig, 

64 "PublicDocsSourceConfig": PublicDocsSourceConfig, 

65 "SelectorsConfig": SelectorsConfig, 

66 } 

67 

68 

69__all__ = [ 

70 "ChunkingConfig", 

71 "ConfluenceSpaceConfig", 

72 "GitAuthConfig", 

73 "GitRepoConfig", 

74 "GlobalConfig", 

75 "JiraProjectConfig", 

76 "PublicDocsSourceConfig", 

77 "SelectorsConfig", 

78 "SemanticAnalysisConfig", 

79 "Settings", 

80 "SourcesConfig", 

81 "StateManagementConfig", 

82 # Multi-project support 

83 "ProjectContext", 

84 "ProjectConfig", 

85 "ProjectsConfig", 

86 "ParsedConfig", 

87 "ProjectStats", 

88 "ProjectInfo", 

89 "ProjectDetail", 

90 "MultiProjectConfigParser", 

91 "ConfigValidator", 

92 # Functions 

93 "get_global_config", 

94 "get_settings", 

95 "initialize_config", 

96 "initialize_config_with_workspace", 

97] 

98 

99 

100# Add lazy loading for connector configs 

101def __getattr__(name): 

102 """Lazy import connector configs to avoid circular dependencies.""" 

103 connector_configs = _get_connector_configs() 

104 if name in connector_configs: 

105 return connector_configs[name] 

106 raise AttributeError(f"module '{__name__}' has no attribute '{name}'") 

107 

108 

109_global_settings: Optional["Settings"] = None 

110 

111 

112def get_settings() -> "Settings": 

113 """Get the global settings instance. 

114 

115 Returns: 

116 Settings: The global settings instance. 

117 """ 

118 if _global_settings is None: 

119 raise RuntimeError( 

120 "Settings not initialized. Call initialize_config() or initialize_config_with_workspace() first." 

121 ) 

122 return _global_settings 

123 

124 

125def get_global_config() -> GlobalConfig: 

126 """Get the global configuration instance. 

127 

128 Returns: 

129 GlobalConfig: The global configuration instance. 

130 """ 

131 return get_settings().global_config 

132 

133 

134def initialize_config( 

135 yaml_path: Path, env_path: Path | None = None, skip_validation: bool = False 

136) -> None: 

137 """Initialize the global configuration. 

138 

139 Args: 

140 yaml_path: Path to the YAML configuration file. 

141 env_path: Optional path to the .env file. 

142 skip_validation: If True, skip directory validation and creation. 

143 """ 

144 global _global_settings 

145 try: 

146 # Proceed with initialization 

147 logger.debug( 

148 "Initializing configuration", 

149 yaml_path=str(yaml_path), 

150 env_path=str(env_path) if env_path else None, 

151 ) 

152 _global_settings = Settings.from_yaml( 

153 yaml_path, env_path=env_path, skip_validation=skip_validation 

154 ) 

155 logger.debug("Successfully initialized configuration") 

156 

157 except Exception as e: 

158 logger.error( 

159 "Failed to initialize configuration", error=str(e), yaml_path=str(yaml_path) 

160 ) 

161 raise 

162 

163 

164def initialize_config_with_workspace( 

165 workspace_config: WorkspaceConfig, skip_validation: bool = False 

166) -> None: 

167 """Initialize configuration using workspace settings. 

168 

169 Args: 

170 workspace_config: Workspace configuration with paths and settings 

171 skip_validation: If True, skip directory validation and creation 

172 """ 

173 global _global_settings 

174 try: 

175 logger.debug( 

176 "Initializing configuration with workspace", 

177 workspace=str(workspace_config.workspace_path), 

178 config_path=str(workspace_config.config_path), 

179 env_path=( 

180 str(workspace_config.env_path) if workspace_config.env_path else None 

181 ), 

182 ) 

183 

184 # Load configuration using workspace paths 

185 _global_settings = Settings.from_yaml( 

186 workspace_config.config_path, 

187 env_path=workspace_config.env_path, 

188 skip_validation=skip_validation, 

189 ) 

190 

191 # Check if database_path was specified in config.yaml and warn user 

192 original_db_path = _global_settings.global_config.state_management.database_path 

193 workspace_db_path = str(workspace_config.database_path) 

194 

195 # Only warn if the original path is different from the workspace path and not empty/default 

196 if ( 

197 original_db_path 

198 and original_db_path != ":memory:" 

199 and original_db_path != workspace_db_path 

200 ): 

201 logger.warning( 

202 "Database path in config.yaml is ignored in workspace mode", 

203 config_database_path=original_db_path, 

204 workspace_database_path=workspace_db_path, 

205 ) 

206 

207 # Override the database path with workspace-specific path 

208 _global_settings.global_config.state_management.database_path = ( 

209 workspace_db_path 

210 ) 

211 

212 logger.debug( 

213 "Set workspace database path", 

214 database_path=workspace_db_path, 

215 ) 

216 

217 logger.debug( 

218 "Successfully initialized configuration with workspace", 

219 workspace=str(workspace_config.workspace_path), 

220 ) 

221 

222 except Exception as e: 

223 logger.error( 

224 "Failed to initialize configuration with workspace", 

225 error=str(e), 

226 workspace=str(workspace_config.workspace_path), 

227 ) 

228 raise 

229 

230 

231class Settings(BaseSettings): 

232 """Main configuration class combining global and source-specific settings.""" 

233 

234 # Configuration objects - these are the only fields we need 

235 global_config: GlobalConfig = Field( 

236 default_factory=GlobalConfig, description="Global configuration settings" 

237 ) 

238 projects_config: ProjectsConfig = Field( 

239 default_factory=ProjectsConfig, description="Multi-project configurations" 

240 ) 

241 

242 model_config = SettingsConfigDict( 

243 env_file=None, # Disable automatic .env loading - we handle this manually 

244 env_file_encoding="utf-8", 

245 extra="allow", 

246 ) 

247 

248 @model_validator(mode="after") # type: ignore 

249 def validate_source_configs(self) -> "Settings": 

250 """Validate that required configuration is present for configured sources.""" 

251 logger.debug("Validating source configurations") 

252 

253 # Validate that qdrant configuration is present in global config 

254 if not self.global_config.qdrant: 

255 raise ValueError("Qdrant configuration is required in global config") 

256 

257 # Validate that required fields are not empty after variable substitution 

258 if not self.global_config.qdrant.url: 

259 raise ValueError( 

260 "Qdrant URL is required but was not provided or substituted" 

261 ) 

262 

263 if not self.global_config.qdrant.collection_name: 

264 raise ValueError( 

265 "Qdrant collection name is required but was not provided or substituted" 

266 ) 

267 

268 # Note: Source validation is now handled at the project level 

269 # Each project's sources are validated when the project is processed 

270 

271 logger.debug("Source configuration validation successful") 

272 return self 

273 

274 @property 

275 def qdrant_url(self) -> str: 

276 """Get the Qdrant URL from global configuration.""" 

277 if not self.global_config.qdrant: 

278 raise ValueError("Qdrant configuration is not available") 

279 return self.global_config.qdrant.url 

280 

281 @property 

282 def qdrant_api_key(self) -> str | None: 

283 """Get the Qdrant API key from global configuration.""" 

284 if not self.global_config.qdrant: 

285 return None 

286 return self.global_config.qdrant.api_key 

287 

288 @property 

289 def qdrant_collection_name(self) -> str: 

290 """Get the Qdrant collection name from global configuration.""" 

291 if not self.global_config.qdrant: 

292 raise ValueError("Qdrant configuration is not available") 

293 return self.global_config.qdrant.collection_name 

294 

295 @property 

296 def openai_api_key(self) -> str: 

297 """Get the OpenAI API key from embedding configuration.""" 

298 api_key = self.global_config.embedding.api_key 

299 if not api_key: 

300 raise ValueError( 

301 "OpenAI API key is required but was not provided or substituted in embedding configuration" 

302 ) 

303 return api_key 

304 

305 @property 

306 def state_db_path(self) -> str: 

307 """Get the state database path from global configuration.""" 

308 return self.global_config.state_management.database_path 

309 

310 @staticmethod 

311 def _substitute_env_vars(data: Any) -> Any: 

312 """Recursively substitute environment variables in configuration data. 

313 

314 Args: 

315 data: Configuration data to process 

316 

317 Returns: 

318 Processed data with environment variables substituted 

319 """ 

320 if isinstance(data, str): 

321 # First expand $HOME if present 

322 if "$HOME" in data: 

323 data = data.replace("$HOME", os.path.expanduser("~")) 

324 

325 # Then handle ${VAR_NAME} pattern 

326 pattern = r"\${([^}]+)}" 

327 matches = re.finditer(pattern, data) 

328 result = data 

329 for match in matches: 

330 var_name = match.group(1) 

331 env_value = os.getenv(var_name) 

332 if env_value is None: 

333 # Only warn about missing variables that are commonly required 

334 # Skip STATE_DB_PATH as it's often overridden in workspace mode 

335 if var_name not in ["STATE_DB_PATH"]: 

336 logger.warning( 

337 "Environment variable not found", variable=var_name 

338 ) 

339 continue 

340 # If the environment variable contains $HOME, expand it 

341 if "$HOME" in env_value: 

342 env_value = env_value.replace("$HOME", os.path.expanduser("~")) 

343 result = result.replace(f"${ {var_name}} ", env_value) 

344 

345 return result 

346 elif isinstance(data, dict): 

347 return {k: Settings._substitute_env_vars(v) for k, v in data.items()} 

348 elif isinstance(data, list): 

349 return [Settings._substitute_env_vars(item) for item in data] 

350 return data 

351 

352 @classmethod 

353 def from_yaml( 

354 cls, 

355 config_path: Path, 

356 env_path: Path | None = None, 

357 skip_validation: bool = False, 

358 ) -> "Settings": 

359 """Load configuration from a YAML file. 

360 

361 Args: 

362 config_path: Path to the YAML configuration file. 

363 env_path: Optional path to the .env file. If provided, only this file is loaded. 

364 skip_validation: If True, skip directory validation and creation. 

365 

366 Returns: 

367 Settings: Loaded configuration. 

368 """ 

369 logger.debug("Loading configuration from YAML", path=str(config_path)) 

370 try: 

371 # Step 1: Load environment variables first 

372 if env_path is not None: 

373 # Custom env file specified - load only this file 

374 logger.debug("Loading custom environment file", path=str(env_path)) 

375 if not env_path.exists(): 

376 raise FileNotFoundError(f"Environment file not found: {env_path}") 

377 load_dotenv(env_path, override=True) 

378 else: 

379 # Load default .env file if it exists 

380 logger.debug("Loading default environment variables") 

381 load_dotenv(override=False) 

382 

383 # Step 2: Load YAML config 

384 with open(config_path) as f: 

385 config_data = yaml.safe_load(f) 

386 

387 # Step 3: Process all environment variables in config using substitution 

388 logger.debug("Processing environment variables in configuration") 

389 config_data = cls._substitute_env_vars(config_data) 

390 

391 # Step 4: Use multi-project parser to parse configuration 

392 validator = ConfigValidator() 

393 parser = MultiProjectConfigParser(validator) 

394 parsed_config = parser.parse(config_data, skip_validation=skip_validation) 

395 

396 # Step 5: Create settings instance with parsed configuration 

397 settings = cls( 

398 global_config=parsed_config.global_config, 

399 projects_config=parsed_config.projects_config, 

400 ) 

401 

402 logger.debug("Successfully created Settings instance") 

403 return settings 

404 

405 except yaml.YAMLError as e: 

406 logger.error("Failed to parse YAML configuration", error=str(e)) 

407 raise 

408 except ValidationError as e: 

409 logger.error("Configuration validation failed", error=str(e)) 

410 raise 

411 except Exception as e: 

412 logger.error("Unexpected error loading configuration", error=str(e)) 

413 raise 

414 

415 def to_dict(self) -> dict: 

416 """Convert the configuration to a dictionary. 

417 

418 Returns: 

419 dict: Configuration as a dictionary. 

420 """ 

421 return { 

422 "global": self.global_config.to_dict(), 

423 "projects": self.projects_config.to_dict(), 

424 }