Coverage for src/qdrant_loader/config/parser.py: 88%
84 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""Multi-project configuration parser.
3This module provides parsing functionality for multi-project configurations.
4"""
6import re
7from typing import Any, Dict, List
9from pydantic import ValidationError
11from .global_config import GlobalConfig
12from .models import ParsedConfig, ProjectConfig, ProjectsConfig
13from .sources import SourcesConfig
14from .validator import ConfigValidator
15from ..utils.logging import LoggingConfig
17logger = LoggingConfig.get_logger(__name__)
20class MultiProjectConfigParser:
21 """Parser for multi-project configurations."""
23 def __init__(self, validator: ConfigValidator):
24 """Initialize the parser with a validator.
26 Args:
27 validator: Configuration validator instance
28 """
29 self.validator = validator
31 def parse(
32 self, config_data: Dict[str, Any], skip_validation: bool = False
33 ) -> ParsedConfig:
34 """Parse configuration with multi-project support.
36 Args:
37 config_data: Raw configuration data from YAML
38 skip_validation: Whether to skip validation during parsing
40 Returns:
41 ParsedConfig: Parsed configuration with project information
43 Raises:
44 ValidationError: If configuration is invalid
45 """
46 logger.debug("Starting configuration parsing")
48 # Check for legacy format and provide clear error message
49 if self._is_legacy_config(config_data):
50 self._raise_legacy_format_error()
52 # Validate configuration structure
53 self.validator.validate_structure(config_data)
55 # Parse global configuration
56 global_config = self._parse_global_config(
57 config_data.get("global", {}), skip_validation
58 )
60 # Parse projects
61 projects_config = self._parse_projects(config_data, global_config)
63 logger.debug(
64 "Configuration parsing completed",
65 project_count=len(projects_config.projects),
66 )
68 return ParsedConfig(
69 global_config=global_config,
70 projects_config=projects_config,
71 )
73 def _parse_global_config(
74 self, global_data: Dict[str, Any], skip_validation: bool = False
75 ) -> GlobalConfig:
76 """Parse global configuration section.
78 Args:
79 global_data: Global configuration data
80 skip_validation: Whether to skip validation during parsing
82 Returns:
83 GlobalConfig: Parsed global configuration
84 """
85 try:
86 return GlobalConfig(**global_data, skip_validation=skip_validation)
87 except ValidationError as e:
88 logger.error("Failed to parse global configuration", error=str(e))
89 raise
91 def _is_legacy_config(self, config_data: Dict[str, Any]) -> bool:
92 """Determine if configuration uses legacy single-project format.
94 Args:
95 config_data: Raw configuration data
97 Returns:
98 bool: True if legacy format, False if multi-project format
99 """
100 has_sources_at_root = "sources" in config_data
101 has_projects_section = "projects" in config_data
103 return has_sources_at_root and not has_projects_section
105 def _raise_legacy_format_error(self) -> None:
106 """Raise a helpful error message for legacy configuration format."""
107 error_message = """
108Legacy configuration format detected. Please update your config.yaml file to use the new multi-project format.
110MIGRATION GUIDE:
111================
113OLD FORMAT (legacy):
114```yaml
115global:
116 # ... global settings ...
118sources:
119 git:
120 my-repo:
121 # ... git config ...
122 confluence:
123 my-space:
124 # ... confluence config ...
125```
127NEW FORMAT (multi-project):
128```yaml
129global:
130 # ... global settings ...
132projects:
133 default: # or any project name you prefer
134 display_name: "My Project"
135 description: "Project description"
136 collection_name: "my_collection" # optional, defaults to global collection + project name
137 sources:
138 git:
139 my-repo:
140 # ... git config ...
141 confluence:
142 my-space:
143 # ... confluence config ...
144 overrides: {} # optional project-specific config overrides
145```
147BENEFITS OF NEW FORMAT:
148- Support for multiple projects in a single configuration
149- Better organization and isolation of different data sources
150- Project-specific collection names and configuration overrides
151- Clearer structure and easier maintenance
153To migrate your configuration:
1541. Move your 'sources' section under 'projects.default.sources'
1552. Add required project fields: display_name, description
1563. Optionally specify a custom collection_name
1574. Add any project-specific overrides if needed
159For more information, see the documentation on multi-project configuration.
160"""
161 raise ValueError(error_message.strip())
163 def _parse_projects(
164 self, config_data: Dict[str, Any], global_config: GlobalConfig
165 ) -> ProjectsConfig:
166 """Parse project configurations.
168 Args:
169 config_data: Raw configuration data
170 global_config: Parsed global configuration
172 Returns:
173 ProjectsConfig: Parsed projects configuration
174 """
175 projects_config = ProjectsConfig()
177 # Handle multi-project format
178 projects_data = config_data.get("projects", {})
179 for project_id, project_data in projects_data.items():
180 project_config = self._parse_project_config(
181 project_id, project_data, global_config
182 )
183 projects_config.add_project(project_config)
184 logger.debug("Parsed project configuration", project_id=project_id)
186 return projects_config
188 def _parse_project_config(
189 self, project_id: str, project_data: Dict[str, Any], global_config: GlobalConfig
190 ) -> ProjectConfig:
191 """Parse individual project configuration.
193 Args:
194 project_id: Project identifier
195 project_data: Project configuration data
196 global_config: Global configuration
198 Returns:
199 ProjectConfig: Parsed project configuration
200 """
201 # Validate project ID
202 if not self._is_valid_project_id(project_id):
203 raise ValueError(
204 f"Invalid project ID '{project_id}'. "
205 "Project IDs must be valid Python identifiers (alphanumeric + underscores)."
206 )
208 # Extract basic project information
209 display_name = project_data.get("display_name", project_id)
210 description = project_data.get("description")
211 collection_name = project_data.get("collection_name")
213 # Parse project-specific sources with automatic field injection
214 sources_data = project_data.get("sources", {})
215 enhanced_sources_data = self._inject_source_metadata(sources_data)
216 sources_config = SourcesConfig(**enhanced_sources_data)
218 # Extract configuration overrides
219 overrides = project_data.get("overrides", {})
221 # Merge project-specific overrides with global config
222 merged_overrides = self._merge_configs(global_config, overrides)
224 return ProjectConfig(
225 project_id=project_id,
226 display_name=display_name,
227 description=description,
228 sources=sources_config,
229 overrides=merged_overrides,
230 )
232 def _inject_source_metadata(self, sources_data: Dict[str, Any]) -> Dict[str, Any]:
233 """Inject source_type and source fields into source configurations.
235 Args:
236 sources_data: Raw sources configuration data
238 Returns:
239 Dict[str, Any]: Enhanced sources data with injected metadata
240 """
241 enhanced_data = {}
243 for source_type, source_configs in sources_data.items():
244 if not isinstance(source_configs, dict):
245 enhanced_data[source_type] = source_configs
246 continue
248 enhanced_source_configs = {}
249 for source_name, source_config in source_configs.items():
250 if isinstance(source_config, dict):
251 # Create a copy to avoid modifying the original
252 enhanced_config = source_config.copy()
254 # Always inject source_type and source fields
255 enhanced_config["source_type"] = source_type
256 enhanced_config["source"] = source_name
258 enhanced_source_configs[source_name] = enhanced_config
259 else:
260 enhanced_source_configs[source_name] = source_config
262 enhanced_data[source_type] = enhanced_source_configs
264 return enhanced_data
266 def _is_valid_project_id(self, project_id: str) -> bool:
267 """Validate project ID format.
269 Args:
270 project_id: Project identifier to validate
272 Returns:
273 bool: True if valid, False otherwise
274 """
275 # Project IDs must be valid Python identifiers
276 # Allow alphanumeric characters, underscores, and hyphens
277 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
278 return bool(re.match(pattern, project_id))
280 def _merge_configs(
281 self, global_config: GlobalConfig, project_overrides: Dict[str, Any]
282 ) -> Dict[str, Any]:
283 """Merge project-specific overrides with global configuration.
285 Args:
286 global_config: Global configuration
287 project_overrides: Project-specific overrides
289 Returns:
290 Dict[str, Any]: Merged configuration
291 """
292 # Convert global config to dict
293 global_dict = global_config.to_dict()
295 # Deep merge project overrides
296 merged = self._deep_merge_dicts(global_dict, project_overrides)
298 return merged
300 def _deep_merge_dicts(
301 self, base: Dict[str, Any], override: Dict[str, Any]
302 ) -> Dict[str, Any]:
303 """Deep merge two dictionaries.
305 Args:
306 base: Base dictionary
307 override: Override dictionary
309 Returns:
310 Dict[str, Any]: Merged dictionary
311 """
312 result = base.copy()
314 for key, value in override.items():
315 if (
316 key in result
317 and isinstance(result[key], dict)
318 and isinstance(value, dict)
319 ):
320 result[key] = self._deep_merge_dicts(result[key], value)
321 else:
322 result[key] = value
324 return result