Coverage for src / qdrant_loader / config / parser.py: 100%
88 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:48 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:48 +0000
1"""Multi-project configuration parser.
3This module provides parsing functionality for multi-project configurations.
4"""
6import re
7from typing import Any
9from pydantic import ValidationError
11from ..utils.logging import LoggingConfig
12from .global_config import GlobalConfig
13from .models import ParsedConfig, ProjectConfig, ProjectsConfig
14from .sources import SourcesConfig
15from .validator import ConfigValidator
18def _get_logger():
19 return LoggingConfig.get_logger(__name__)
22class MultiProjectConfigParser:
23 """Parser for multi-project configurations."""
25 def __init__(self, validator: ConfigValidator):
26 """Initialize the parser with a validator.
28 Args:
29 validator: Configuration validator instance
30 """
31 self.validator = validator
33 def parse(
34 self, config_data: dict[str, Any], skip_validation: bool = False
35 ) -> ParsedConfig:
36 """Parse configuration with multi-project support.
38 Supports two formats:
39 1. Standard: config with 'projects' section
40 2. Simplified: config with top-level 'sources' (auto-wrapped into default project)
42 Args:
43 config_data: Raw configuration data from YAML
44 skip_validation: Whether to skip validation during parsing
46 Returns:
47 ParsedConfig: Parsed configuration with project information
49 Raises:
50 ValidationError: If configuration is invalid
51 """
52 _get_logger().debug("Starting configuration parsing")
54 # Auto-wrap simplified format: top-level 'sources' → projects.default
55 if isinstance(config_data, dict):
56 config_data = self._normalize_config(config_data)
58 # Validate configuration structure
59 self.validator.validate_structure(config_data)
61 # Parse global configuration
62 global_config = self._parse_global_config(
63 config_data.get("global", {}), skip_validation
64 )
66 # Parse projects
67 projects_config = self._parse_projects(config_data, global_config)
69 _get_logger().debug(
70 "Configuration parsing completed",
71 project_count=len(projects_config.projects),
72 )
74 return ParsedConfig(
75 global_config=global_config,
76 projects_config=projects_config,
77 )
79 def _normalize_config(self, config_data: dict[str, Any]) -> dict[str, Any]:
80 """Normalize simplified config format to standard format.
82 If config has top-level 'sources' but no 'projects', wrap sources
83 into a default project automatically.
85 Args:
86 config_data: Raw configuration data
88 Returns:
89 Normalized configuration data with 'projects' section
90 """
91 has_projects = "projects" in config_data
92 has_sources = "sources" in config_data
94 if has_sources and has_projects:
95 _get_logger().warning(
96 "Config has both 'projects' and top-level 'sources'; "
97 "top-level 'sources' will be ignored"
98 )
100 if has_sources and not has_projects:
101 _get_logger().debug(
102 "Simplified config detected: wrapping top-level 'sources' "
103 "into default project"
104 )
105 result = {k: v for k, v in config_data.items() if k != "sources"}
106 result["projects"] = {
107 "default": {
108 "display_name": "Default Project",
109 "sources": config_data["sources"],
110 }
111 }
112 return result
114 return config_data
116 def _parse_global_config(
117 self, global_data: dict[str, Any], skip_validation: bool = False
118 ) -> GlobalConfig:
119 """Parse global configuration section.
121 Args:
122 global_data: Global configuration data
123 skip_validation: Whether to skip validation during parsing
125 Returns:
126 GlobalConfig: Parsed global configuration
127 """
128 try:
129 return GlobalConfig(**global_data, skip_validation=skip_validation)
130 except ValidationError as e:
131 _get_logger().error("Failed to parse global configuration", error=str(e))
132 raise
134 def _parse_projects(
135 self, config_data: dict[str, Any], global_config: GlobalConfig
136 ) -> ProjectsConfig:
137 """Parse project configurations.
139 Args:
140 config_data: Raw configuration data
141 global_config: Parsed global configuration
143 Returns:
144 ProjectsConfig: Parsed projects configuration
145 """
146 projects_config = ProjectsConfig()
148 # Handle multi-project format
149 projects_data = config_data.get("projects", {})
150 for project_id, project_data in projects_data.items():
151 project_config = self._parse_project_config(
152 project_id, project_data, global_config
153 )
154 projects_config.add_project(project_config)
155 _get_logger().debug("Parsed project configuration", project_id=project_id)
157 return projects_config
159 def _parse_project_config(
160 self, project_id: str, project_data: dict[str, Any], global_config: GlobalConfig
161 ) -> ProjectConfig:
162 """Parse individual project configuration.
164 Args:
165 project_id: Project identifier
166 project_data: Project configuration data
167 global_config: Global configuration
169 Returns:
170 ProjectConfig: Parsed project configuration
171 """
172 # Validate project ID
173 if not self._is_valid_project_id(project_id):
174 raise ValueError(
175 f"Invalid project ID '{project_id}'. "
176 "Project IDs must be valid Python identifiers (alphanumeric + underscores)."
177 )
179 # Extract basic project information
180 display_name = project_data.get("display_name", project_id)
181 description = project_data.get("description")
183 # Parse project-specific sources with automatic field injection
184 sources_data = project_data.get("sources", {})
185 enhanced_sources_data = self._inject_source_metadata(sources_data)
186 sources_config = SourcesConfig(**enhanced_sources_data)
188 # Extract configuration overrides
189 overrides = project_data.get("overrides", {})
191 # Merge project-specific overrides with global config
192 merged_overrides = self._merge_configs(global_config, overrides)
194 return ProjectConfig(
195 project_id=project_id,
196 display_name=display_name,
197 description=description,
198 sources=sources_config,
199 overrides=merged_overrides,
200 )
202 def _inject_source_metadata(self, sources_data: dict[str, Any]) -> dict[str, Any]:
203 """Inject source_type and source fields into source configurations.
205 Args:
206 sources_data: Raw sources configuration data
208 Returns:
209 Dict[str, Any]: Enhanced sources data with injected metadata
210 """
211 enhanced_data = {}
213 for source_type, source_configs in sources_data.items():
214 if not isinstance(source_configs, dict):
215 enhanced_data[source_type] = source_configs
216 continue
218 enhanced_source_configs = {}
219 for source_name, source_config in source_configs.items():
220 if isinstance(source_config, dict):
221 # Create a copy to avoid modifying the original
222 enhanced_config = source_config.copy()
224 # Always inject source_type and source fields
225 enhanced_config["source_type"] = source_type
226 enhanced_config["source"] = source_name
228 enhanced_source_configs[source_name] = enhanced_config
229 else:
230 enhanced_source_configs[source_name] = source_config
232 enhanced_data[source_type] = enhanced_source_configs
234 return enhanced_data
236 def _is_valid_project_id(self, project_id: str) -> bool:
237 """Validate project ID format.
239 Args:
240 project_id: Project identifier to validate
242 Returns:
243 bool: True if valid, False otherwise
244 """
245 # Project IDs must be valid Python identifiers
246 # Allow alphanumeric characters, underscores, and hyphens
247 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
248 return bool(re.match(pattern, project_id))
250 def _merge_configs(
251 self, global_config: GlobalConfig, project_overrides: dict[str, Any]
252 ) -> dict[str, Any]:
253 """Merge project-specific overrides with global configuration.
255 Args:
256 global_config: Global configuration
257 project_overrides: Project-specific overrides
259 Returns:
260 Dict[str, Any]: Merged configuration
261 """
262 # Convert global config to dict
263 global_dict = global_config.to_dict()
265 # Deep merge project overrides
266 merged = self._deep_merge_dicts(global_dict, project_overrides)
268 return merged
270 def _deep_merge_dicts(
271 self, base: dict[str, Any], override: dict[str, Any]
272 ) -> dict[str, Any]:
273 """Deep merge two dictionaries.
275 Args:
276 base: Base dictionary
277 override: Override dictionary
279 Returns:
280 Dict[str, Any]: Merged dictionary
281 """
282 result = base.copy()
284 for key, value in override.items():
285 if (
286 key in result
287 and isinstance(result[key], dict)
288 and isinstance(value, dict)
289 ):
290 result[key] = self._deep_merge_dicts(result[key], value)
291 else:
292 result[key] = value
294 return result