Coverage for src / qdrant_loader / config / __init__.py: 86%
175 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:48 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:48 +0000
1"""Configuration module.
3This module provides the main configuration interface for the application.
4It combines global settings with source-specific configurations.
5"""
7import os
8import re
9from pathlib import Path
10from typing import Any, Optional
12import yaml
13from dotenv import load_dotenv
14from pydantic import Field, ValidationError, model_validator
15from pydantic_settings import BaseSettings, SettingsConfigDict
17from ..utils.logging import LoggingConfig
18from .chunking import ChunkingConfig
20# Import consolidated configs
21from .global_config import GlobalConfig, SemanticAnalysisConfig
23# Import multi-project support
24from .models import (
25 ParsedConfig,
26 ProjectConfig,
27 ProjectContext,
28 ProjectDetail,
29 ProjectInfo,
30 ProjectsConfig,
31 ProjectStats,
32)
33from .parser import MultiProjectConfigParser
34from .sources import SourcesConfig
35from .state import StateManagementConfig
36from .validator import ConfigValidator
37from .workspace import WorkspaceConfig
39# Load environment variables from .env file
40load_dotenv(override=False)
43def _get_logger():
44 return LoggingConfig.get_logger(__name__)
47# Lazy import function for connector configs
48def _get_connector_configs():
49 """Lazy import connector configs to avoid circular dependencies."""
50 from ..connectors.confluence.config import ConfluenceSpaceConfig
51 from ..connectors.git.config import GitAuthConfig, GitRepoConfig
52 from ..connectors.jira.config import JiraProjectConfig
53 from ..connectors.publicdocs.config import PublicDocsSourceConfig, SelectorsConfig
55 return {
56 "ConfluenceSpaceConfig": ConfluenceSpaceConfig,
57 "GitAuthConfig": GitAuthConfig,
58 "GitRepoConfig": GitRepoConfig,
59 "JiraProjectConfig": JiraProjectConfig,
60 "PublicDocsSourceConfig": PublicDocsSourceConfig,
61 "SelectorsConfig": SelectorsConfig,
62 }
65__all__ = [
66 "ChunkingConfig",
67 "ConfluenceSpaceConfig",
68 "GitAuthConfig",
69 "GitRepoConfig",
70 "GlobalConfig",
71 "JiraProjectConfig",
72 "PublicDocsSourceConfig",
73 "SelectorsConfig",
74 "SemanticAnalysisConfig",
75 "Settings",
76 "SourcesConfig",
77 "StateManagementConfig",
78 # Multi-project support
79 "ProjectContext",
80 "ProjectConfig",
81 "ProjectsConfig",
82 "ParsedConfig",
83 "ProjectStats",
84 "ProjectInfo",
85 "ProjectDetail",
86 "MultiProjectConfigParser",
87 "ConfigValidator",
88 # Functions
89 "get_global_config",
90 "get_settings",
91 "initialize_config",
92 "initialize_config_with_workspace",
93]
96# Add lazy loading for connector configs
97def __getattr__(name):
98 """Lazy import connector configs to avoid circular dependencies."""
99 connector_configs = _get_connector_configs()
100 if name in connector_configs:
101 return connector_configs[name]
102 raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
105_global_settings: Optional["Settings"] = None
108def get_settings() -> "Settings":
109 """Get the global settings instance.
111 Returns:
112 Settings: The global settings instance.
113 """
114 if _global_settings is None:
115 raise RuntimeError(
116 "Settings not initialized. Call initialize_config() or initialize_config_with_workspace() first."
117 )
118 return _global_settings
121def get_global_config() -> GlobalConfig:
122 """Get the global configuration instance.
124 Returns:
125 GlobalConfig: The global configuration instance.
126 """
127 return get_settings().global_config
130def initialize_config(
131 yaml_path: Path, env_path: Path | None = None, skip_validation: bool = False
132) -> None:
133 """Initialize the global configuration.
135 Args:
136 yaml_path: Path to the YAML configuration file.
137 env_path: Optional path to the .env file.
138 skip_validation: If True, skip directory validation and creation.
139 """
140 global _global_settings
141 try:
142 # Proceed with initialization
143 _get_logger().debug(
144 "Initializing configuration",
145 yaml_path=str(yaml_path),
146 env_path=str(env_path) if env_path else None,
147 )
148 _global_settings = Settings.from_yaml(
149 yaml_path, env_path=env_path, skip_validation=skip_validation
150 )
151 _get_logger().debug("Successfully initialized configuration")
153 except Exception as e:
154 _get_logger().error(
155 "Failed to initialize configuration", error=str(e), yaml_path=str(yaml_path)
156 )
157 raise
160def initialize_config_with_workspace(
161 workspace_config: WorkspaceConfig, skip_validation: bool = False
162) -> None:
163 """Initialize configuration using workspace settings.
165 Args:
166 workspace_config: Workspace configuration with paths and settings
167 skip_validation: If True, skip directory validation and creation
168 """
169 global _global_settings
170 try:
171 _get_logger().debug(
172 "Initializing configuration with workspace",
173 workspace=str(workspace_config.workspace_path),
174 config_path=str(workspace_config.config_path),
175 env_path=(
176 str(workspace_config.env_path) if workspace_config.env_path else None
177 ),
178 )
180 # Load configuration using workspace paths
181 _global_settings = Settings.from_yaml(
182 workspace_config.config_path,
183 env_path=workspace_config.env_path,
184 skip_validation=skip_validation,
185 )
187 # Check if database_path was specified in config.yaml and warn user
188 original_db_path = _global_settings.global_config.state_management.database_path
189 workspace_db_path = str(workspace_config.database_path)
191 # Only warn if the original path is different from the workspace path and not empty/default
192 if (
193 original_db_path
194 and original_db_path != ":memory:"
195 and original_db_path != workspace_db_path
196 ):
197 _get_logger().warning(
198 "Database path in config.yaml is ignored in workspace mode",
199 config_database_path=original_db_path,
200 workspace_database_path=workspace_db_path,
201 )
203 # Override the database path with workspace-specific path
204 _global_settings.global_config.state_management.database_path = (
205 workspace_db_path
206 )
208 _get_logger().debug(
209 "Set workspace database path",
210 database_path=workspace_db_path,
211 )
213 _get_logger().debug(
214 "Successfully initialized configuration with workspace",
215 workspace=str(workspace_config.workspace_path),
216 )
218 except Exception as e:
219 _get_logger().error(
220 "Failed to initialize configuration with workspace",
221 error=str(e),
222 workspace=str(workspace_config.workspace_path),
223 )
224 raise
227class Settings(BaseSettings):
228 """Main configuration class combining global and source-specific settings."""
230 # Configuration objects - these are the only fields we need
231 global_config: GlobalConfig = Field(
232 default_factory=GlobalConfig, description="Global configuration settings"
233 )
234 projects_config: ProjectsConfig = Field(
235 default_factory=ProjectsConfig, description="Multi-project configurations"
236 )
238 model_config = SettingsConfigDict(
239 env_file=None, # Disable automatic .env loading - we handle this manually
240 env_file_encoding="utf-8",
241 extra="allow",
242 )
244 @model_validator(mode="after") # type: ignore
245 def validate_source_configs(self) -> "Settings":
246 """Validate that required configuration is present for configured sources."""
247 _get_logger().debug("Validating source configurations")
249 # Auto-resolve environment variables as fallbacks
250 self._auto_resolve_env_vars()
252 # Validate that required fields are not empty after variable substitution
253 if not self.global_config.qdrant.url:
254 raise ValueError(
255 "Qdrant URL is required but was not provided or substituted"
256 )
258 if not self.global_config.qdrant.collection_name:
259 raise ValueError(
260 "Qdrant collection name is required but was not provided or substituted"
261 )
263 # Note: Source validation is now handled at the project level
264 # Each project's sources are validated when the project is processed
266 _get_logger().debug("Source configuration validation successful")
267 return self
269 def _auto_resolve_env_vars(self) -> None:
270 """Auto-resolve well-known environment variables as fallbacks.
272 Priority: config file value > environment variable > default.
273 Only fills in values that were not explicitly set in config.
275 Note: detection uses default-value sentinels, so explicitly setting a
276 config value equal to the default (e.g. url: http://localhost:6333)
277 will still be overridden by the environment variable.
278 """
279 # OPENAI_API_KEY → embedding.api_key and llm.api_key
280 openai_key = os.getenv("OPENAI_API_KEY")
281 if openai_key:
282 if not self.global_config.embedding.api_key:
283 self.global_config.embedding.api_key = openai_key
284 if self.global_config.llm and isinstance(self.global_config.llm, dict):
285 if not self.global_config.llm.get("api_key"):
286 self.global_config.llm["api_key"] = openai_key
288 # QDRANT_URL → qdrant.url (override only if still default)
289 qdrant_url = os.getenv("QDRANT_URL")
290 if qdrant_url and self.global_config.qdrant.url == "http://localhost:6333":
291 self.global_config.qdrant.url = qdrant_url
293 # QDRANT_API_KEY → qdrant.api_key
294 qdrant_api_key = os.getenv("QDRANT_API_KEY")
295 if qdrant_api_key and not self.global_config.qdrant.api_key:
296 self.global_config.qdrant.api_key = qdrant_api_key
298 # QDRANT_COLLECTION_NAME → qdrant.collection_name
299 collection = os.getenv("QDRANT_COLLECTION_NAME")
300 if collection and self.global_config.qdrant.collection_name == "documents":
301 self.global_config.qdrant.collection_name = collection
303 # STATE_DB_PATH → state_management.database_path
304 # Note: In workspace mode this is overridden by workspace_config.database_path
305 state_db = os.getenv("STATE_DB_PATH")
306 if (
307 state_db
308 and self.global_config.state_management.database_path == "./state.db"
309 ):
310 self.global_config.state_management.database_path = state_db
312 @property
313 def qdrant_url(self) -> str:
314 """Get the Qdrant URL from global configuration."""
315 return self.global_config.qdrant.url
317 @property
318 def qdrant_api_key(self) -> str | None:
319 """Get the Qdrant API key from global configuration."""
320 return self.global_config.qdrant.api_key
322 @property
323 def qdrant_collection_name(self) -> str:
324 """Get the Qdrant collection name from global configuration."""
325 return self.global_config.qdrant.collection_name
327 @property
328 def openai_api_key(self) -> str:
329 """Get the OpenAI API key from embedding configuration."""
330 api_key = self.global_config.embedding.api_key
331 if not api_key:
332 raise ValueError(
333 "OpenAI API key is required but was not provided or substituted in embedding configuration"
334 )
335 return api_key
337 @property
338 def state_db_path(self) -> str:
339 """Get the state database path from global configuration."""
340 return self.global_config.state_management.database_path
342 @property
343 def llm_settings(self):
344 """Provider-agnostic LLM settings derived from global configuration.
346 Uses `global.llm` when present; otherwise maps legacy fields.
347 """
348 # Import lazily to avoid hard dependency issues in environments without core installed
349 from importlib import import_module
351 settings_mod = import_module("qdrant_loader_core.llm.settings")
352 LLMSettings = settings_mod.LLMSettings
353 return LLMSettings.from_global_config(self.global_config.to_dict())
355 @staticmethod
356 def _substitute_env_vars(data: Any) -> Any:
357 """Recursively substitute environment variables in configuration data.
359 Args:
360 data: Configuration data to process
362 Returns:
363 Processed data with environment variables substituted
364 """
365 if isinstance(data, str):
366 # First expand $HOME if present
367 if "$HOME" in data:
368 data = data.replace("$HOME", os.path.expanduser("~"))
370 # Then handle ${VAR_NAME} pattern
371 pattern = r"\${([^}]+)}"
372 matches = re.finditer(pattern, data)
373 result = data
374 for match in matches:
375 var_name = match.group(1)
376 env_value = os.getenv(var_name)
377 if env_value is None:
378 # Only warn about missing variables that are commonly required
379 # Skip STATE_DB_PATH as it's often overridden in workspace mode
380 if var_name not in ["STATE_DB_PATH"]:
381 _get_logger().warning(
382 "Environment variable not found", variable=var_name
383 )
384 continue
385 # If the environment variable contains $HOME, expand it
386 if "$HOME" in env_value:
387 env_value = env_value.replace("$HOME", os.path.expanduser("~"))
388 result = result.replace(f"${{{var_name}}}", env_value)
390 return result
391 elif isinstance(data, dict):
392 return {k: Settings._substitute_env_vars(v) for k, v in data.items()}
393 elif isinstance(data, list):
394 return [Settings._substitute_env_vars(item) for item in data]
395 return data
397 @classmethod
398 def from_yaml(
399 cls,
400 config_path: Path,
401 env_path: Path | None = None,
402 skip_validation: bool = False,
403 ) -> "Settings":
404 """Load configuration from a YAML file.
406 Args:
407 config_path: Path to the YAML configuration file.
408 env_path: Optional path to the .env file. If provided, only this file is loaded.
409 skip_validation: If True, skip directory validation and creation.
411 Returns:
412 Settings: Loaded configuration.
413 """
414 _get_logger().debug("Loading configuration from YAML", path=str(config_path))
415 try:
416 # Step 1: Load environment variables first
417 if env_path is not None:
418 # Custom env file specified - load only this file
419 _get_logger().debug(
420 "Loading custom environment file", path=str(env_path)
421 )
422 if not env_path.exists():
423 raise FileNotFoundError(f"Environment file not found: {env_path}")
424 load_dotenv(env_path, override=True)
425 else:
426 # Load default .env file if it exists
427 _get_logger().debug("Loading default environment variables")
428 load_dotenv(override=False)
430 # Step 2: Load YAML config
431 with open(config_path) as f:
432 config_data = yaml.safe_load(f)
434 # Step 3: Process all environment variables in config using substitution
435 _get_logger().debug("Processing environment variables in configuration")
436 config_data = cls._substitute_env_vars(config_data)
438 # Step 4: Use multi-project parser to parse configuration
439 validator = ConfigValidator()
440 parser = MultiProjectConfigParser(validator)
441 parsed_config = parser.parse(config_data, skip_validation=skip_validation)
443 # Step 5: Create settings instance with parsed configuration
444 settings = cls(
445 global_config=parsed_config.global_config,
446 projects_config=parsed_config.projects_config,
447 )
449 _get_logger().debug("Successfully created Settings instance")
450 return settings
452 except yaml.YAMLError as e:
453 _get_logger().error("Failed to parse YAML configuration", error=str(e))
454 raise
455 except ValidationError as e:
456 _get_logger().error("Configuration validation failed", error=str(e))
457 raise
458 except Exception as e:
459 _get_logger().error("Unexpected error loading configuration", error=str(e))
460 raise
462 def to_dict(self) -> dict:
463 """Convert the configuration to a dictionary.
465 Returns:
466 dict: Configuration as a dictionary.
467 """
468 return {
469 "global": self.global_config.to_dict(),
470 "projects": self.projects_config.to_dict(),
471 }