Coverage for src/qdrant_loader/config/__init__.py: 84%
162 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-11 07:21 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-11 07:21 +0000
1"""Configuration module.
3This module provides the main configuration interface for the application.
4It combines global settings with source-specific configurations.
5"""
7import os
8import re
9from pathlib import Path
10from typing import Any, Optional
12import yaml
13from dotenv import load_dotenv
14from pydantic import Field, ValidationError, model_validator
15from pydantic_settings import BaseSettings, SettingsConfigDict
17from ..utils.logging import LoggingConfig
18from .chunking import ChunkingConfig
20# Import consolidated configs
21from .global_config import GlobalConfig, SemanticAnalysisConfig
23# Import multi-project support
24from .models import (
25 ParsedConfig,
26 ProjectConfig,
27 ProjectContext,
28 ProjectDetail,
29 ProjectInfo,
30 ProjectsConfig,
31 ProjectStats,
32)
33from .parser import MultiProjectConfigParser
34from .sources import SourcesConfig
35from .state import StateManagementConfig
36from .validator import ConfigValidator
37from .workspace import WorkspaceConfig
39# Load environment variables from .env file
40load_dotenv(override=False)
43def _get_logger():
44 return LoggingConfig.get_logger(__name__)
47# Lazy import function for connector configs
48def _get_connector_configs():
49 """Lazy import connector configs to avoid circular dependencies."""
50 from ..connectors.confluence.config import ConfluenceSpaceConfig
51 from ..connectors.git.config import GitAuthConfig, GitRepoConfig
52 from ..connectors.jira.config import JiraProjectConfig
53 from ..connectors.publicdocs.config import PublicDocsSourceConfig, SelectorsConfig
55 return {
56 "ConfluenceSpaceConfig": ConfluenceSpaceConfig,
57 "GitAuthConfig": GitAuthConfig,
58 "GitRepoConfig": GitRepoConfig,
59 "JiraProjectConfig": JiraProjectConfig,
60 "PublicDocsSourceConfig": PublicDocsSourceConfig,
61 "SelectorsConfig": SelectorsConfig,
62 }
65__all__ = [
66 "ChunkingConfig",
67 "ConfluenceSpaceConfig",
68 "GitAuthConfig",
69 "GitRepoConfig",
70 "GlobalConfig",
71 "JiraProjectConfig",
72 "PublicDocsSourceConfig",
73 "SelectorsConfig",
74 "SemanticAnalysisConfig",
75 "Settings",
76 "SourcesConfig",
77 "StateManagementConfig",
78 # Multi-project support
79 "ProjectContext",
80 "ProjectConfig",
81 "ProjectsConfig",
82 "ParsedConfig",
83 "ProjectStats",
84 "ProjectInfo",
85 "ProjectDetail",
86 "MultiProjectConfigParser",
87 "ConfigValidator",
88 # Functions
89 "get_global_config",
90 "get_settings",
91 "initialize_config",
92 "initialize_config_with_workspace",
93]
96# Add lazy loading for connector configs
97def __getattr__(name):
98 """Lazy import connector configs to avoid circular dependencies."""
99 connector_configs = _get_connector_configs()
100 if name in connector_configs:
101 return connector_configs[name]
102 raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
105_global_settings: Optional["Settings"] = None
108def get_settings() -> "Settings":
109 """Get the global settings instance.
111 Returns:
112 Settings: The global settings instance.
113 """
114 if _global_settings is None:
115 raise RuntimeError(
116 "Settings not initialized. Call initialize_config() or initialize_config_with_workspace() first."
117 )
118 return _global_settings
121def get_global_config() -> GlobalConfig:
122 """Get the global configuration instance.
124 Returns:
125 GlobalConfig: The global configuration instance.
126 """
127 return get_settings().global_config
130def initialize_config(
131 yaml_path: Path, env_path: Path | None = None, skip_validation: bool = False
132) -> None:
133 """Initialize the global configuration.
135 Args:
136 yaml_path: Path to the YAML configuration file.
137 env_path: Optional path to the .env file.
138 skip_validation: If True, skip directory validation and creation.
139 """
140 global _global_settings
141 try:
142 # Proceed with initialization
143 _get_logger().debug(
144 "Initializing configuration",
145 yaml_path=str(yaml_path),
146 env_path=str(env_path) if env_path else None,
147 )
148 _global_settings = Settings.from_yaml(
149 yaml_path, env_path=env_path, skip_validation=skip_validation
150 )
151 _get_logger().debug("Successfully initialized configuration")
153 except Exception as e:
154 _get_logger().error(
155 "Failed to initialize configuration", error=str(e), yaml_path=str(yaml_path)
156 )
157 raise
160def initialize_config_with_workspace(
161 workspace_config: WorkspaceConfig, skip_validation: bool = False
162) -> None:
163 """Initialize configuration using workspace settings.
165 Args:
166 workspace_config: Workspace configuration with paths and settings
167 skip_validation: If True, skip directory validation and creation
168 """
169 global _global_settings
170 try:
171 _get_logger().debug(
172 "Initializing configuration with workspace",
173 workspace=str(workspace_config.workspace_path),
174 config_path=str(workspace_config.config_path),
175 env_path=(
176 str(workspace_config.env_path) if workspace_config.env_path else None
177 ),
178 )
180 # Load configuration using workspace paths
181 _global_settings = Settings.from_yaml(
182 workspace_config.config_path,
183 env_path=workspace_config.env_path,
184 skip_validation=skip_validation,
185 )
187 # Check if database_path was specified in config.yaml and warn user
188 original_db_path = _global_settings.global_config.state_management.database_path
189 workspace_db_path = str(workspace_config.database_path)
191 # Only warn if the original path is different from the workspace path and not empty/default
192 if (
193 original_db_path
194 and original_db_path != ":memory:"
195 and original_db_path != workspace_db_path
196 ):
197 _get_logger().warning(
198 "Database path in config.yaml is ignored in workspace mode",
199 config_database_path=original_db_path,
200 workspace_database_path=workspace_db_path,
201 )
203 # Override the database path with workspace-specific path
204 _global_settings.global_config.state_management.database_path = (
205 workspace_db_path
206 )
208 _get_logger().debug(
209 "Set workspace database path",
210 database_path=workspace_db_path,
211 )
213 _get_logger().debug(
214 "Successfully initialized configuration with workspace",
215 workspace=str(workspace_config.workspace_path),
216 )
218 except Exception as e:
219 _get_logger().error(
220 "Failed to initialize configuration with workspace",
221 error=str(e),
222 workspace=str(workspace_config.workspace_path),
223 )
224 raise
227class Settings(BaseSettings):
228 """Main configuration class combining global and source-specific settings."""
230 # Configuration objects - these are the only fields we need
231 global_config: GlobalConfig = Field(
232 default_factory=GlobalConfig, description="Global configuration settings"
233 )
234 projects_config: ProjectsConfig = Field(
235 default_factory=ProjectsConfig, description="Multi-project configurations"
236 )
238 model_config = SettingsConfigDict(
239 env_file=None, # Disable automatic .env loading - we handle this manually
240 env_file_encoding="utf-8",
241 extra="allow",
242 )
244 @model_validator(mode="after") # type: ignore
245 def validate_source_configs(self) -> "Settings":
246 """Validate that required configuration is present for configured sources."""
247 _get_logger().debug("Validating source configurations")
249 # Validate that qdrant configuration is present in global config
250 if not self.global_config.qdrant:
251 raise ValueError("Qdrant configuration is required in global config")
253 # Validate that required fields are not empty after variable substitution
254 if not self.global_config.qdrant.url:
255 raise ValueError(
256 "Qdrant URL is required but was not provided or substituted"
257 )
259 if not self.global_config.qdrant.collection_name:
260 raise ValueError(
261 "Qdrant collection name is required but was not provided or substituted"
262 )
264 # Note: Source validation is now handled at the project level
265 # Each project's sources are validated when the project is processed
267 _get_logger().debug("Source configuration validation successful")
268 return self
270 @property
271 def qdrant_url(self) -> str:
272 """Get the Qdrant URL from global configuration."""
273 if not self.global_config.qdrant:
274 raise ValueError("Qdrant configuration is not available")
275 return self.global_config.qdrant.url
277 @property
278 def qdrant_api_key(self) -> str | None:
279 """Get the Qdrant API key from global configuration."""
280 if not self.global_config.qdrant:
281 return None
282 return self.global_config.qdrant.api_key
284 @property
285 def qdrant_collection_name(self) -> str:
286 """Get the Qdrant collection name from global configuration."""
287 if not self.global_config.qdrant:
288 raise ValueError("Qdrant configuration is not available")
289 return self.global_config.qdrant.collection_name
291 @property
292 def openai_api_key(self) -> str:
293 """Get the OpenAI API key from embedding configuration."""
294 api_key = self.global_config.embedding.api_key
295 if not api_key:
296 raise ValueError(
297 "OpenAI API key is required but was not provided or substituted in embedding configuration"
298 )
299 return api_key
301 @property
302 def state_db_path(self) -> str:
303 """Get the state database path from global configuration."""
304 return self.global_config.state_management.database_path
306 @property
307 def llm_settings(self):
308 """Provider-agnostic LLM settings derived from global configuration.
310 Uses `global.llm` when present; otherwise maps legacy fields.
311 """
312 # Import lazily to avoid hard dependency issues in environments without core installed
313 from importlib import import_module
315 settings_mod = import_module("qdrant_loader_core.llm.settings")
316 LLMSettings = settings_mod.LLMSettings
317 return LLMSettings.from_global_config(self.global_config.to_dict())
319 @staticmethod
320 def _substitute_env_vars(data: Any) -> Any:
321 """Recursively substitute environment variables in configuration data.
323 Args:
324 data: Configuration data to process
326 Returns:
327 Processed data with environment variables substituted
328 """
329 if isinstance(data, str):
330 # First expand $HOME if present
331 if "$HOME" in data:
332 data = data.replace("$HOME", os.path.expanduser("~"))
334 # Then handle ${VAR_NAME} pattern
335 pattern = r"\${([^}]+)}"
336 matches = re.finditer(pattern, data)
337 result = data
338 for match in matches:
339 var_name = match.group(1)
340 env_value = os.getenv(var_name)
341 if env_value is None:
342 # Only warn about missing variables that are commonly required
343 # Skip STATE_DB_PATH as it's often overridden in workspace mode
344 if var_name not in ["STATE_DB_PATH"]:
345 _get_logger().warning(
346 "Environment variable not found", variable=var_name
347 )
348 continue
349 # If the environment variable contains $HOME, expand it
350 if "$HOME" in env_value:
351 env_value = env_value.replace("$HOME", os.path.expanduser("~"))
352 result = result.replace(f"${{{var_name}}}", env_value)
354 return result
355 elif isinstance(data, dict):
356 return {k: Settings._substitute_env_vars(v) for k, v in data.items()}
357 elif isinstance(data, list):
358 return [Settings._substitute_env_vars(item) for item in data]
359 return data
361 @classmethod
362 def from_yaml(
363 cls,
364 config_path: Path,
365 env_path: Path | None = None,
366 skip_validation: bool = False,
367 ) -> "Settings":
368 """Load configuration from a YAML file.
370 Args:
371 config_path: Path to the YAML configuration file.
372 env_path: Optional path to the .env file. If provided, only this file is loaded.
373 skip_validation: If True, skip directory validation and creation.
375 Returns:
376 Settings: Loaded configuration.
377 """
378 _get_logger().debug("Loading configuration from YAML", path=str(config_path))
379 try:
380 # Step 1: Load environment variables first
381 if env_path is not None:
382 # Custom env file specified - load only this file
383 _get_logger().debug(
384 "Loading custom environment file", path=str(env_path)
385 )
386 if not env_path.exists():
387 raise FileNotFoundError(f"Environment file not found: {env_path}")
388 load_dotenv(env_path, override=True)
389 else:
390 # Load default .env file if it exists
391 _get_logger().debug("Loading default environment variables")
392 load_dotenv(override=False)
394 # Step 2: Load YAML config
395 with open(config_path) as f:
396 config_data = yaml.safe_load(f)
398 # Step 3: Process all environment variables in config using substitution
399 _get_logger().debug("Processing environment variables in configuration")
400 config_data = cls._substitute_env_vars(config_data)
402 # Step 4: Use multi-project parser to parse configuration
403 validator = ConfigValidator()
404 parser = MultiProjectConfigParser(validator)
405 parsed_config = parser.parse(config_data, skip_validation=skip_validation)
407 # Step 5: Create settings instance with parsed configuration
408 settings = cls(
409 global_config=parsed_config.global_config,
410 projects_config=parsed_config.projects_config,
411 )
413 _get_logger().debug("Successfully created Settings instance")
414 return settings
416 except yaml.YAMLError as e:
417 _get_logger().error("Failed to parse YAML configuration", error=str(e))
418 raise
419 except ValidationError as e:
420 _get_logger().error("Configuration validation failed", error=str(e))
421 raise
422 except Exception as e:
423 _get_logger().error("Unexpected error loading configuration", error=str(e))
424 raise
426 def to_dict(self) -> dict:
427 """Convert the configuration to a dictionary.
429 Returns:
430 dict: Configuration as a dictionary.
431 """
432 return {
433 "global": self.global_config.to_dict(),
434 "projects": self.projects_config.to_dict(),
435 }