Coverage for src / qdrant_loader / config / parser.py: 100%
89 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
1"""Multi-project configuration parser.
3This module provides parsing functionality for multi-project configurations.
4"""
6import re
7from typing import Any
9from pydantic import ValidationError
11from ..utils.logging import LoggingConfig
12from ..utils.sensitive import sanitize_exception_message
13from .global_config import GlobalConfig
14from .models import ParsedConfig, ProjectConfig, ProjectsConfig
15from .sources import SourcesConfig
16from .validator import ConfigValidator
19def _get_logger():
20 return LoggingConfig.get_logger(__name__)
23class MultiProjectConfigParser:
24 """Parser for multi-project configurations."""
26 def __init__(self, validator: ConfigValidator):
27 """Initialize the parser with a validator.
29 Args:
30 validator: Configuration validator instance
31 """
32 self.validator = validator
34 def parse(
35 self, config_data: dict[str, Any], skip_validation: bool = False
36 ) -> ParsedConfig:
37 """Parse configuration with multi-project support.
39 Supports two formats:
40 1. Standard: config with 'projects' section
41 2. Simplified: config with top-level 'sources' (auto-wrapped into default project)
43 Args:
44 config_data: Raw configuration data from YAML
45 skip_validation: Whether to skip validation during parsing
47 Returns:
48 ParsedConfig: Parsed configuration with project information
50 Raises:
51 ValidationError: If configuration is invalid
52 """
53 _get_logger().debug("Starting configuration parsing")
55 # Auto-wrap simplified format: top-level 'sources' → projects.default
56 if isinstance(config_data, dict):
57 config_data = self._normalize_config(config_data)
59 # Validate configuration structure
60 self.validator.validate_structure(config_data)
62 # Parse global configuration
63 global_config = self._parse_global_config(
64 config_data.get("global", {}), skip_validation
65 )
67 # Parse projects
68 projects_config = self._parse_projects(config_data, global_config)
70 _get_logger().debug(
71 "Configuration parsing completed",
72 project_count=len(projects_config.projects),
73 )
75 return ParsedConfig(
76 global_config=global_config,
77 projects_config=projects_config,
78 )
80 def _normalize_config(self, config_data: dict[str, Any]) -> dict[str, Any]:
81 """Normalize simplified config format to standard format.
83 If config has top-level 'sources' but no 'projects', wrap sources
84 into a default project automatically.
86 Args:
87 config_data: Raw configuration data
89 Returns:
90 Normalized configuration data with 'projects' section
91 """
92 has_projects = "projects" in config_data
93 has_sources = "sources" in config_data
95 if has_sources and has_projects:
96 _get_logger().warning(
97 "Config has both 'projects' and top-level 'sources'; "
98 "top-level 'sources' will be ignored"
99 )
101 if has_sources and not has_projects:
102 _get_logger().debug(
103 "Simplified config detected: wrapping top-level 'sources' "
104 "into default project"
105 )
106 result = {k: v for k, v in config_data.items() if k != "sources"}
107 result["projects"] = {
108 "default": {
109 "display_name": "Default Project",
110 "sources": config_data["sources"],
111 }
112 }
113 return result
115 return config_data
117 def _parse_global_config(
118 self, global_data: dict[str, Any], skip_validation: bool = False
119 ) -> GlobalConfig:
120 """Parse global configuration section.
122 Args:
123 global_data: Global configuration data
124 skip_validation: Whether to skip validation during parsing
126 Returns:
127 GlobalConfig: Parsed global configuration
128 """
129 try:
130 return GlobalConfig(**global_data, skip_validation=skip_validation)
131 except ValidationError as e:
132 _get_logger().error(
133 "Failed to parse global configuration",
134 error=sanitize_exception_message(e),
135 )
136 raise
138 def _parse_projects(
139 self, config_data: dict[str, Any], global_config: GlobalConfig
140 ) -> ProjectsConfig:
141 """Parse project configurations.
143 Args:
144 config_data: Raw configuration data
145 global_config: Parsed global configuration
147 Returns:
148 ProjectsConfig: Parsed projects configuration
149 """
150 projects_config = ProjectsConfig()
152 # Handle multi-project format
153 projects_data = config_data.get("projects", {})
154 for project_id, project_data in projects_data.items():
155 project_config = self._parse_project_config(
156 project_id, project_data, global_config
157 )
158 projects_config.add_project(project_config)
159 _get_logger().debug("Parsed project configuration", project_id=project_id)
161 return projects_config
163 def _parse_project_config(
164 self, project_id: str, project_data: dict[str, Any], global_config: GlobalConfig
165 ) -> ProjectConfig:
166 """Parse individual project configuration.
168 Args:
169 project_id: Project identifier
170 project_data: Project configuration data
171 global_config: Global configuration
173 Returns:
174 ProjectConfig: Parsed project configuration
175 """
176 # Validate project ID
177 if not self._is_valid_project_id(project_id):
178 raise ValueError(
179 f"Invalid project ID '{project_id}'. "
180 "Project IDs must be valid Python identifiers (alphanumeric + underscores)."
181 )
183 # Extract basic project information
184 display_name = project_data.get("display_name", project_id)
185 description = project_data.get("description")
187 # Parse project-specific sources with automatic field injection
188 sources_data = project_data.get("sources", {})
189 enhanced_sources_data = self._inject_source_metadata(sources_data)
190 sources_config = SourcesConfig(**enhanced_sources_data)
192 # Extract configuration overrides
193 overrides = project_data.get("overrides", {})
195 # Merge project-specific overrides with global config
196 merged_overrides = self._merge_configs(global_config, overrides)
198 return ProjectConfig(
199 project_id=project_id,
200 display_name=display_name,
201 description=description,
202 sources=sources_config,
203 overrides=merged_overrides,
204 )
206 def _inject_source_metadata(self, sources_data: dict[str, Any]) -> dict[str, Any]:
207 """Inject source_type and source fields into source configurations.
209 Args:
210 sources_data: Raw sources configuration data
212 Returns:
213 Dict[str, Any]: Enhanced sources data with injected metadata
214 """
215 enhanced_data = {}
217 for source_type, source_configs in sources_data.items():
218 if not isinstance(source_configs, dict):
219 enhanced_data[source_type] = source_configs
220 continue
222 enhanced_source_configs = {}
223 for source_name, source_config in source_configs.items():
224 if isinstance(source_config, dict):
225 # Create a copy to avoid modifying the original
226 enhanced_config = source_config.copy()
228 # Always inject source_type and source fields
229 enhanced_config["source_type"] = source_type
230 enhanced_config["source"] = source_name
232 enhanced_source_configs[source_name] = enhanced_config
233 else:
234 enhanced_source_configs[source_name] = source_config
236 enhanced_data[source_type] = enhanced_source_configs
238 return enhanced_data
240 def _is_valid_project_id(self, project_id: str) -> bool:
241 """Validate project ID format.
243 Args:
244 project_id: Project identifier to validate
246 Returns:
247 bool: True if valid, False otherwise
248 """
249 # Project IDs must be valid Python identifiers
250 # Allow alphanumeric characters, underscores, and hyphens
251 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
252 return bool(re.match(pattern, project_id))
254 def _merge_configs(
255 self, global_config: GlobalConfig, project_overrides: dict[str, Any]
256 ) -> dict[str, Any]:
257 """Merge project-specific overrides with global configuration.
259 Args:
260 global_config: Global configuration
261 project_overrides: Project-specific overrides
263 Returns:
264 Dict[str, Any]: Merged configuration
265 """
266 # Convert global config to dict
267 global_dict = global_config.to_dict()
269 # Deep merge project overrides
270 merged = self._deep_merge_dicts(global_dict, project_overrides)
272 return merged
274 def _deep_merge_dicts(
275 self, base: dict[str, Any], override: dict[str, Any]
276 ) -> dict[str, Any]:
277 """Deep merge two dictionaries.
279 Args:
280 base: Base dictionary
281 override: Override dictionary
283 Returns:
284 Dict[str, Any]: Merged dictionary
285 """
286 result = base.copy()
288 for key, value in override.items():
289 if (
290 key in result
291 and isinstance(result[key], dict)
292 and isinstance(value, dict)
293 ):
294 result[key] = self._deep_merge_dicts(result[key], value)
295 else:
296 result[key] = value
298 return result