Coverage for src/qdrant_loader/config/__init__.py: 83%
155 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""Configuration module.
3This module provides the main configuration interface for the application.
4It combines global settings with source-specific configurations.
5"""
7import os
8import re
9from pathlib import Path
10from typing import Any, Optional
12import yaml
13from dotenv import load_dotenv
14from pydantic import (
15 Field,
16 ValidationError,
17 field_validator,
18 model_validator,
19)
20from pydantic_settings import BaseSettings, SettingsConfigDict
22from ..utils.logging import LoggingConfig
23from .chunking import ChunkingConfig
25# Import consolidated configs
26from .global_config import GlobalConfig, SemanticAnalysisConfig
27from .sources import SourcesConfig
28from .state import StateManagementConfig
29from .workspace import WorkspaceConfig, get_workspace_env_override
31# Import multi-project support
32from .models import (
33 ProjectContext,
34 ProjectConfig,
35 ProjectsConfig,
36 ParsedConfig,
37 ProjectStats,
38 ProjectInfo,
39 ProjectDetail,
40)
41from .parser import MultiProjectConfigParser
42from .validator import ConfigValidator
44# Load environment variables from .env file
45load_dotenv(override=False)
47# Get logger without initializing it
48logger = LoggingConfig.get_logger(__name__)
51# Lazy import function for connector configs
52def _get_connector_configs():
53 """Lazy import connector configs to avoid circular dependencies."""
54 from ..connectors.confluence.config import ConfluenceSpaceConfig
55 from ..connectors.git.config import GitAuthConfig, GitRepoConfig
56 from ..connectors.jira.config import JiraProjectConfig
57 from ..connectors.publicdocs.config import PublicDocsSourceConfig, SelectorsConfig
59 return {
60 "ConfluenceSpaceConfig": ConfluenceSpaceConfig,
61 "GitAuthConfig": GitAuthConfig,
62 "GitRepoConfig": GitRepoConfig,
63 "JiraProjectConfig": JiraProjectConfig,
64 "PublicDocsSourceConfig": PublicDocsSourceConfig,
65 "SelectorsConfig": SelectorsConfig,
66 }
69__all__ = [
70 "ChunkingConfig",
71 "ConfluenceSpaceConfig",
72 "GitAuthConfig",
73 "GitRepoConfig",
74 "GlobalConfig",
75 "JiraProjectConfig",
76 "PublicDocsSourceConfig",
77 "SelectorsConfig",
78 "SemanticAnalysisConfig",
79 "Settings",
80 "SourcesConfig",
81 "StateManagementConfig",
82 # Multi-project support
83 "ProjectContext",
84 "ProjectConfig",
85 "ProjectsConfig",
86 "ParsedConfig",
87 "ProjectStats",
88 "ProjectInfo",
89 "ProjectDetail",
90 "MultiProjectConfigParser",
91 "ConfigValidator",
92 # Functions
93 "get_global_config",
94 "get_settings",
95 "initialize_config",
96 "initialize_config_with_workspace",
97]
100# Add lazy loading for connector configs
101def __getattr__(name):
102 """Lazy import connector configs to avoid circular dependencies."""
103 connector_configs = _get_connector_configs()
104 if name in connector_configs:
105 return connector_configs[name]
106 raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
109_global_settings: Optional["Settings"] = None
112def get_settings() -> "Settings":
113 """Get the global settings instance.
115 Returns:
116 Settings: The global settings instance.
117 """
118 if _global_settings is None:
119 raise RuntimeError(
120 "Settings not initialized. Call initialize_config() or initialize_config_with_workspace() first."
121 )
122 return _global_settings
125def get_global_config() -> GlobalConfig:
126 """Get the global configuration instance.
128 Returns:
129 GlobalConfig: The global configuration instance.
130 """
131 return get_settings().global_config
134def initialize_config(
135 yaml_path: Path, env_path: Path | None = None, skip_validation: bool = False
136) -> None:
137 """Initialize the global configuration.
139 Args:
140 yaml_path: Path to the YAML configuration file.
141 env_path: Optional path to the .env file.
142 skip_validation: If True, skip directory validation and creation.
143 """
144 global _global_settings
145 try:
146 # Proceed with initialization
147 logger.debug(
148 "Initializing configuration",
149 yaml_path=str(yaml_path),
150 env_path=str(env_path) if env_path else None,
151 )
152 _global_settings = Settings.from_yaml(
153 yaml_path, env_path=env_path, skip_validation=skip_validation
154 )
155 logger.debug("Successfully initialized configuration")
157 except Exception as e:
158 logger.error(
159 "Failed to initialize configuration", error=str(e), yaml_path=str(yaml_path)
160 )
161 raise
164def initialize_config_with_workspace(
165 workspace_config: WorkspaceConfig, skip_validation: bool = False
166) -> None:
167 """Initialize configuration using workspace settings.
169 Args:
170 workspace_config: Workspace configuration with paths and settings
171 skip_validation: If True, skip directory validation and creation
172 """
173 global _global_settings
174 try:
175 logger.debug(
176 "Initializing configuration with workspace",
177 workspace=str(workspace_config.workspace_path),
178 config_path=str(workspace_config.config_path),
179 env_path=(
180 str(workspace_config.env_path) if workspace_config.env_path else None
181 ),
182 )
184 # Load configuration using workspace paths
185 _global_settings = Settings.from_yaml(
186 workspace_config.config_path,
187 env_path=workspace_config.env_path,
188 skip_validation=skip_validation,
189 )
191 # Check if database_path was specified in config.yaml and warn user
192 original_db_path = _global_settings.global_config.state_management.database_path
193 workspace_db_path = str(workspace_config.database_path)
195 # Only warn if the original path is different from the workspace path and not empty/default
196 if (
197 original_db_path
198 and original_db_path != ":memory:"
199 and original_db_path != workspace_db_path
200 ):
201 logger.warning(
202 "Database path in config.yaml is ignored in workspace mode",
203 config_database_path=original_db_path,
204 workspace_database_path=workspace_db_path,
205 )
207 # Override the database path with workspace-specific path
208 _global_settings.global_config.state_management.database_path = (
209 workspace_db_path
210 )
212 logger.debug(
213 "Set workspace database path",
214 database_path=workspace_db_path,
215 )
217 logger.debug(
218 "Successfully initialized configuration with workspace",
219 workspace=str(workspace_config.workspace_path),
220 )
222 except Exception as e:
223 logger.error(
224 "Failed to initialize configuration with workspace",
225 error=str(e),
226 workspace=str(workspace_config.workspace_path),
227 )
228 raise
231class Settings(BaseSettings):
232 """Main configuration class combining global and source-specific settings."""
234 # Configuration objects - these are the only fields we need
235 global_config: GlobalConfig = Field(
236 default_factory=GlobalConfig, description="Global configuration settings"
237 )
238 projects_config: ProjectsConfig = Field(
239 default_factory=ProjectsConfig, description="Multi-project configurations"
240 )
242 model_config = SettingsConfigDict(
243 env_file=None, # Disable automatic .env loading - we handle this manually
244 env_file_encoding="utf-8",
245 extra="allow",
246 )
248 @model_validator(mode="after") # type: ignore
249 def validate_source_configs(self) -> "Settings":
250 """Validate that required configuration is present for configured sources."""
251 logger.debug("Validating source configurations")
253 # Validate that qdrant configuration is present in global config
254 if not self.global_config.qdrant:
255 raise ValueError("Qdrant configuration is required in global config")
257 # Validate that required fields are not empty after variable substitution
258 if not self.global_config.qdrant.url:
259 raise ValueError(
260 "Qdrant URL is required but was not provided or substituted"
261 )
263 if not self.global_config.qdrant.collection_name:
264 raise ValueError(
265 "Qdrant collection name is required but was not provided or substituted"
266 )
268 # Note: Source validation is now handled at the project level
269 # Each project's sources are validated when the project is processed
271 logger.debug("Source configuration validation successful")
272 return self
274 @property
275 def qdrant_url(self) -> str:
276 """Get the Qdrant URL from global configuration."""
277 if not self.global_config.qdrant:
278 raise ValueError("Qdrant configuration is not available")
279 return self.global_config.qdrant.url
281 @property
282 def qdrant_api_key(self) -> str | None:
283 """Get the Qdrant API key from global configuration."""
284 if not self.global_config.qdrant:
285 return None
286 return self.global_config.qdrant.api_key
288 @property
289 def qdrant_collection_name(self) -> str:
290 """Get the Qdrant collection name from global configuration."""
291 if not self.global_config.qdrant:
292 raise ValueError("Qdrant configuration is not available")
293 return self.global_config.qdrant.collection_name
295 @property
296 def openai_api_key(self) -> str:
297 """Get the OpenAI API key from embedding configuration."""
298 api_key = self.global_config.embedding.api_key
299 if not api_key:
300 raise ValueError(
301 "OpenAI API key is required but was not provided or substituted in embedding configuration"
302 )
303 return api_key
305 @property
306 def state_db_path(self) -> str:
307 """Get the state database path from global configuration."""
308 return self.global_config.state_management.database_path
310 @staticmethod
311 def _substitute_env_vars(data: Any) -> Any:
312 """Recursively substitute environment variables in configuration data.
314 Args:
315 data: Configuration data to process
317 Returns:
318 Processed data with environment variables substituted
319 """
320 if isinstance(data, str):
321 # First expand $HOME if present
322 if "$HOME" in data:
323 data = data.replace("$HOME", os.path.expanduser("~"))
325 # Then handle ${VAR_NAME} pattern
326 pattern = r"\${([^}]+)}"
327 matches = re.finditer(pattern, data)
328 result = data
329 for match in matches:
330 var_name = match.group(1)
331 env_value = os.getenv(var_name)
332 if env_value is None:
333 # Only warn about missing variables that are commonly required
334 # Skip STATE_DB_PATH as it's often overridden in workspace mode
335 if var_name not in ["STATE_DB_PATH"]:
336 logger.warning(
337 "Environment variable not found", variable=var_name
338 )
339 continue
340 # If the environment variable contains $HOME, expand it
341 if "$HOME" in env_value:
342 env_value = env_value.replace("$HOME", os.path.expanduser("~"))
343 result = result.replace(f"${ {var_name}} ", env_value)
345 return result
346 elif isinstance(data, dict):
347 return {k: Settings._substitute_env_vars(v) for k, v in data.items()}
348 elif isinstance(data, list):
349 return [Settings._substitute_env_vars(item) for item in data]
350 return data
352 @classmethod
353 def from_yaml(
354 cls,
355 config_path: Path,
356 env_path: Path | None = None,
357 skip_validation: bool = False,
358 ) -> "Settings":
359 """Load configuration from a YAML file.
361 Args:
362 config_path: Path to the YAML configuration file.
363 env_path: Optional path to the .env file. If provided, only this file is loaded.
364 skip_validation: If True, skip directory validation and creation.
366 Returns:
367 Settings: Loaded configuration.
368 """
369 logger.debug("Loading configuration from YAML", path=str(config_path))
370 try:
371 # Step 1: Load environment variables first
372 if env_path is not None:
373 # Custom env file specified - load only this file
374 logger.debug("Loading custom environment file", path=str(env_path))
375 if not env_path.exists():
376 raise FileNotFoundError(f"Environment file not found: {env_path}")
377 load_dotenv(env_path, override=True)
378 else:
379 # Load default .env file if it exists
380 logger.debug("Loading default environment variables")
381 load_dotenv(override=False)
383 # Step 2: Load YAML config
384 with open(config_path) as f:
385 config_data = yaml.safe_load(f)
387 # Step 3: Process all environment variables in config using substitution
388 logger.debug("Processing environment variables in configuration")
389 config_data = cls._substitute_env_vars(config_data)
391 # Step 4: Use multi-project parser to parse configuration
392 validator = ConfigValidator()
393 parser = MultiProjectConfigParser(validator)
394 parsed_config = parser.parse(config_data, skip_validation=skip_validation)
396 # Step 5: Create settings instance with parsed configuration
397 settings = cls(
398 global_config=parsed_config.global_config,
399 projects_config=parsed_config.projects_config,
400 )
402 logger.debug("Successfully created Settings instance")
403 return settings
405 except yaml.YAMLError as e:
406 logger.error("Failed to parse YAML configuration", error=str(e))
407 raise
408 except ValidationError as e:
409 logger.error("Configuration validation failed", error=str(e))
410 raise
411 except Exception as e:
412 logger.error("Unexpected error loading configuration", error=str(e))
413 raise
415 def to_dict(self) -> dict:
416 """Convert the configuration to a dictionary.
418 Returns:
419 dict: Configuration as a dictionary.
420 """
421 return {
422 "global": self.global_config.to_dict(),
423 "projects": self.projects_config.to_dict(),
424 }