Coverage for src/qdrant_loader/config/__init__.py: 84%
161 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1"""Configuration module.
3This module provides the main configuration interface for the application.
4It combines global settings with source-specific configurations.
5"""
7import os
8import re
9from pathlib import Path
10from typing import Any, Optional
12import yaml
13from dotenv import load_dotenv
14from pydantic import Field, ValidationError, model_validator
15from pydantic_settings import BaseSettings, SettingsConfigDict
17from ..utils.logging import LoggingConfig
18from .chunking import ChunkingConfig
20# Import consolidated configs
21from .global_config import GlobalConfig, SemanticAnalysisConfig
23# Import multi-project support
24from .models import (
25 ParsedConfig,
26 ProjectConfig,
27 ProjectContext,
28 ProjectDetail,
29 ProjectInfo,
30 ProjectsConfig,
31 ProjectStats,
32)
33from .parser import MultiProjectConfigParser
34from .sources import SourcesConfig
35from .state import StateManagementConfig
36from .validator import ConfigValidator
37from .workspace import WorkspaceConfig
39# Load environment variables from .env file
40load_dotenv(override=False)
42# Get logger without initializing it
43logger = LoggingConfig.get_logger(__name__)
46# Lazy import function for connector configs
47def _get_connector_configs():
48 """Lazy import connector configs to avoid circular dependencies."""
49 from ..connectors.confluence.config import ConfluenceSpaceConfig
50 from ..connectors.git.config import GitAuthConfig, GitRepoConfig
51 from ..connectors.jira.config import JiraProjectConfig
52 from ..connectors.publicdocs.config import PublicDocsSourceConfig, SelectorsConfig
54 return {
55 "ConfluenceSpaceConfig": ConfluenceSpaceConfig,
56 "GitAuthConfig": GitAuthConfig,
57 "GitRepoConfig": GitRepoConfig,
58 "JiraProjectConfig": JiraProjectConfig,
59 "PublicDocsSourceConfig": PublicDocsSourceConfig,
60 "SelectorsConfig": SelectorsConfig,
61 }
64__all__ = [
65 "ChunkingConfig",
66 "ConfluenceSpaceConfig",
67 "GitAuthConfig",
68 "GitRepoConfig",
69 "GlobalConfig",
70 "JiraProjectConfig",
71 "PublicDocsSourceConfig",
72 "SelectorsConfig",
73 "SemanticAnalysisConfig",
74 "Settings",
75 "SourcesConfig",
76 "StateManagementConfig",
77 # Multi-project support
78 "ProjectContext",
79 "ProjectConfig",
80 "ProjectsConfig",
81 "ParsedConfig",
82 "ProjectStats",
83 "ProjectInfo",
84 "ProjectDetail",
85 "MultiProjectConfigParser",
86 "ConfigValidator",
87 # Functions
88 "get_global_config",
89 "get_settings",
90 "initialize_config",
91 "initialize_config_with_workspace",
92]
95# Add lazy loading for connector configs
96def __getattr__(name):
97 """Lazy import connector configs to avoid circular dependencies."""
98 connector_configs = _get_connector_configs()
99 if name in connector_configs:
100 return connector_configs[name]
101 raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
104_global_settings: Optional["Settings"] = None
107def get_settings() -> "Settings":
108 """Get the global settings instance.
110 Returns:
111 Settings: The global settings instance.
112 """
113 if _global_settings is None:
114 raise RuntimeError(
115 "Settings not initialized. Call initialize_config() or initialize_config_with_workspace() first."
116 )
117 return _global_settings
120def get_global_config() -> GlobalConfig:
121 """Get the global configuration instance.
123 Returns:
124 GlobalConfig: The global configuration instance.
125 """
126 return get_settings().global_config
129def initialize_config(
130 yaml_path: Path, env_path: Path | None = None, skip_validation: bool = False
131) -> None:
132 """Initialize the global configuration.
134 Args:
135 yaml_path: Path to the YAML configuration file.
136 env_path: Optional path to the .env file.
137 skip_validation: If True, skip directory validation and creation.
138 """
139 global _global_settings
140 try:
141 # Proceed with initialization
142 logger.debug(
143 "Initializing configuration",
144 yaml_path=str(yaml_path),
145 env_path=str(env_path) if env_path else None,
146 )
147 _global_settings = Settings.from_yaml(
148 yaml_path, env_path=env_path, skip_validation=skip_validation
149 )
150 logger.debug("Successfully initialized configuration")
152 except Exception as e:
153 logger.error(
154 "Failed to initialize configuration", error=str(e), yaml_path=str(yaml_path)
155 )
156 raise
159def initialize_config_with_workspace(
160 workspace_config: WorkspaceConfig, skip_validation: bool = False
161) -> None:
162 """Initialize configuration using workspace settings.
164 Args:
165 workspace_config: Workspace configuration with paths and settings
166 skip_validation: If True, skip directory validation and creation
167 """
168 global _global_settings
169 try:
170 logger.debug(
171 "Initializing configuration with workspace",
172 workspace=str(workspace_config.workspace_path),
173 config_path=str(workspace_config.config_path),
174 env_path=(
175 str(workspace_config.env_path) if workspace_config.env_path else None
176 ),
177 )
179 # Load configuration using workspace paths
180 _global_settings = Settings.from_yaml(
181 workspace_config.config_path,
182 env_path=workspace_config.env_path,
183 skip_validation=skip_validation,
184 )
186 # Check if database_path was specified in config.yaml and warn user
187 original_db_path = _global_settings.global_config.state_management.database_path
188 workspace_db_path = str(workspace_config.database_path)
190 # Only warn if the original path is different from the workspace path and not empty/default
191 if (
192 original_db_path
193 and original_db_path != ":memory:"
194 and original_db_path != workspace_db_path
195 ):
196 logger.warning(
197 "Database path in config.yaml is ignored in workspace mode",
198 config_database_path=original_db_path,
199 workspace_database_path=workspace_db_path,
200 )
202 # Override the database path with workspace-specific path
203 _global_settings.global_config.state_management.database_path = (
204 workspace_db_path
205 )
207 logger.debug(
208 "Set workspace database path",
209 database_path=workspace_db_path,
210 )
212 logger.debug(
213 "Successfully initialized configuration with workspace",
214 workspace=str(workspace_config.workspace_path),
215 )
217 except Exception as e:
218 logger.error(
219 "Failed to initialize configuration with workspace",
220 error=str(e),
221 workspace=str(workspace_config.workspace_path),
222 )
223 raise
226class Settings(BaseSettings):
227 """Main configuration class combining global and source-specific settings."""
229 # Configuration objects - these are the only fields we need
230 global_config: GlobalConfig = Field(
231 default_factory=GlobalConfig, description="Global configuration settings"
232 )
233 projects_config: ProjectsConfig = Field(
234 default_factory=ProjectsConfig, description="Multi-project configurations"
235 )
237 model_config = SettingsConfigDict(
238 env_file=None, # Disable automatic .env loading - we handle this manually
239 env_file_encoding="utf-8",
240 extra="allow",
241 )
243 @model_validator(mode="after") # type: ignore
244 def validate_source_configs(self) -> "Settings":
245 """Validate that required configuration is present for configured sources."""
246 logger.debug("Validating source configurations")
248 # Validate that qdrant configuration is present in global config
249 if not self.global_config.qdrant:
250 raise ValueError("Qdrant configuration is required in global config")
252 # Validate that required fields are not empty after variable substitution
253 if not self.global_config.qdrant.url:
254 raise ValueError(
255 "Qdrant URL is required but was not provided or substituted"
256 )
258 if not self.global_config.qdrant.collection_name:
259 raise ValueError(
260 "Qdrant collection name is required but was not provided or substituted"
261 )
263 # Note: Source validation is now handled at the project level
264 # Each project's sources are validated when the project is processed
266 logger.debug("Source configuration validation successful")
267 return self
269 @property
270 def qdrant_url(self) -> str:
271 """Get the Qdrant URL from global configuration."""
272 if not self.global_config.qdrant:
273 raise ValueError("Qdrant configuration is not available")
274 return self.global_config.qdrant.url
276 @property
277 def qdrant_api_key(self) -> str | None:
278 """Get the Qdrant API key from global configuration."""
279 if not self.global_config.qdrant:
280 return None
281 return self.global_config.qdrant.api_key
283 @property
284 def qdrant_collection_name(self) -> str:
285 """Get the Qdrant collection name from global configuration."""
286 if not self.global_config.qdrant:
287 raise ValueError("Qdrant configuration is not available")
288 return self.global_config.qdrant.collection_name
290 @property
291 def openai_api_key(self) -> str:
292 """Get the OpenAI API key from embedding configuration."""
293 api_key = self.global_config.embedding.api_key
294 if not api_key:
295 raise ValueError(
296 "OpenAI API key is required but was not provided or substituted in embedding configuration"
297 )
298 return api_key
300 @property
301 def state_db_path(self) -> str:
302 """Get the state database path from global configuration."""
303 return self.global_config.state_management.database_path
305 @property
306 def llm_settings(self):
307 """Provider-agnostic LLM settings derived from global configuration.
309 Uses `global.llm` when present; otherwise maps legacy fields.
310 """
311 # Import lazily to avoid hard dependency issues in environments without core installed
312 from importlib import import_module
314 settings_mod = import_module("qdrant_loader_core.llm.settings")
315 LLMSettings = settings_mod.LLMSettings
316 return LLMSettings.from_global_config(self.global_config.to_dict())
318 @staticmethod
319 def _substitute_env_vars(data: Any) -> Any:
320 """Recursively substitute environment variables in configuration data.
322 Args:
323 data: Configuration data to process
325 Returns:
326 Processed data with environment variables substituted
327 """
328 if isinstance(data, str):
329 # First expand $HOME if present
330 if "$HOME" in data:
331 data = data.replace("$HOME", os.path.expanduser("~"))
333 # Then handle ${VAR_NAME} pattern
334 pattern = r"\${([^}]+)}"
335 matches = re.finditer(pattern, data)
336 result = data
337 for match in matches:
338 var_name = match.group(1)
339 env_value = os.getenv(var_name)
340 if env_value is None:
341 # Only warn about missing variables that are commonly required
342 # Skip STATE_DB_PATH as it's often overridden in workspace mode
343 if var_name not in ["STATE_DB_PATH"]:
344 logger.warning(
345 "Environment variable not found", variable=var_name
346 )
347 continue
348 # If the environment variable contains $HOME, expand it
349 if "$HOME" in env_value:
350 env_value = env_value.replace("$HOME", os.path.expanduser("~"))
351 result = result.replace(f"${{{var_name}}}", env_value)
353 return result
354 elif isinstance(data, dict):
355 return {k: Settings._substitute_env_vars(v) for k, v in data.items()}
356 elif isinstance(data, list):
357 return [Settings._substitute_env_vars(item) for item in data]
358 return data
360 @classmethod
361 def from_yaml(
362 cls,
363 config_path: Path,
364 env_path: Path | None = None,
365 skip_validation: bool = False,
366 ) -> "Settings":
367 """Load configuration from a YAML file.
369 Args:
370 config_path: Path to the YAML configuration file.
371 env_path: Optional path to the .env file. If provided, only this file is loaded.
372 skip_validation: If True, skip directory validation and creation.
374 Returns:
375 Settings: Loaded configuration.
376 """
377 logger.debug("Loading configuration from YAML", path=str(config_path))
378 try:
379 # Step 1: Load environment variables first
380 if env_path is not None:
381 # Custom env file specified - load only this file
382 logger.debug("Loading custom environment file", path=str(env_path))
383 if not env_path.exists():
384 raise FileNotFoundError(f"Environment file not found: {env_path}")
385 load_dotenv(env_path, override=True)
386 else:
387 # Load default .env file if it exists
388 logger.debug("Loading default environment variables")
389 load_dotenv(override=False)
391 # Step 2: Load YAML config
392 with open(config_path) as f:
393 config_data = yaml.safe_load(f)
395 # Step 3: Process all environment variables in config using substitution
396 logger.debug("Processing environment variables in configuration")
397 config_data = cls._substitute_env_vars(config_data)
399 # Step 4: Use multi-project parser to parse configuration
400 validator = ConfigValidator()
401 parser = MultiProjectConfigParser(validator)
402 parsed_config = parser.parse(config_data, skip_validation=skip_validation)
404 # Step 5: Create settings instance with parsed configuration
405 settings = cls(
406 global_config=parsed_config.global_config,
407 projects_config=parsed_config.projects_config,
408 )
410 logger.debug("Successfully created Settings instance")
411 return settings
413 except yaml.YAMLError as e:
414 logger.error("Failed to parse YAML configuration", error=str(e))
415 raise
416 except ValidationError as e:
417 logger.error("Configuration validation failed", error=str(e))
418 raise
419 except Exception as e:
420 logger.error("Unexpected error loading configuration", error=str(e))
421 raise
423 def to_dict(self) -> dict:
424 """Convert the configuration to a dictionary.
426 Returns:
427 dict: Configuration as a dictionary.
428 """
429 return {
430 "global": self.global_config.to_dict(),
431 "projects": self.projects_config.to_dict(),
432 }