Coverage for src/qdrant_loader/config/parser.py: 100%
75 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1"""Multi-project configuration parser.
3This module provides parsing functionality for multi-project configurations.
4"""
6import re
7from typing import Any
9from pydantic import ValidationError
11from ..utils.logging import LoggingConfig
12from .global_config import GlobalConfig
13from .models import ParsedConfig, ProjectConfig, ProjectsConfig
14from .sources import SourcesConfig
15from .validator import ConfigValidator
17logger = LoggingConfig.get_logger(__name__)
20class MultiProjectConfigParser:
21 """Parser for multi-project configurations."""
23 def __init__(self, validator: ConfigValidator):
24 """Initialize the parser with a validator.
26 Args:
27 validator: Configuration validator instance
28 """
29 self.validator = validator
31 def parse(
32 self, config_data: dict[str, Any], skip_validation: bool = False
33 ) -> ParsedConfig:
34 """Parse configuration with multi-project support.
36 Args:
37 config_data: Raw configuration data from YAML
38 skip_validation: Whether to skip validation during parsing
40 Returns:
41 ParsedConfig: Parsed configuration with project information
43 Raises:
44 ValidationError: If configuration is invalid
45 """
46 logger.debug("Starting configuration parsing")
48 # Validate configuration structure
49 self.validator.validate_structure(config_data)
51 # Parse global configuration
52 global_config = self._parse_global_config(
53 config_data.get("global", {}), skip_validation
54 )
56 # Parse projects
57 projects_config = self._parse_projects(config_data, global_config)
59 logger.debug(
60 "Configuration parsing completed",
61 project_count=len(projects_config.projects),
62 )
64 return ParsedConfig(
65 global_config=global_config,
66 projects_config=projects_config,
67 )
69 def _parse_global_config(
70 self, global_data: dict[str, Any], skip_validation: bool = False
71 ) -> GlobalConfig:
72 """Parse global configuration section.
74 Args:
75 global_data: Global configuration data
76 skip_validation: Whether to skip validation during parsing
78 Returns:
79 GlobalConfig: Parsed global configuration
80 """
81 try:
82 return GlobalConfig(**global_data, skip_validation=skip_validation)
83 except ValidationError as e:
84 logger.error("Failed to parse global configuration", error=str(e))
85 raise
87 def _parse_projects(
88 self, config_data: dict[str, Any], global_config: GlobalConfig
89 ) -> ProjectsConfig:
90 """Parse project configurations.
92 Args:
93 config_data: Raw configuration data
94 global_config: Parsed global configuration
96 Returns:
97 ProjectsConfig: Parsed projects configuration
98 """
99 projects_config = ProjectsConfig()
101 # Handle multi-project format
102 projects_data = config_data.get("projects", {})
103 for project_id, project_data in projects_data.items():
104 project_config = self._parse_project_config(
105 project_id, project_data, global_config
106 )
107 projects_config.add_project(project_config)
108 logger.debug("Parsed project configuration", project_id=project_id)
110 return projects_config
112 def _parse_project_config(
113 self, project_id: str, project_data: dict[str, Any], global_config: GlobalConfig
114 ) -> ProjectConfig:
115 """Parse individual project configuration.
117 Args:
118 project_id: Project identifier
119 project_data: Project configuration data
120 global_config: Global configuration
122 Returns:
123 ProjectConfig: Parsed project configuration
124 """
125 # Validate project ID
126 if not self._is_valid_project_id(project_id):
127 raise ValueError(
128 f"Invalid project ID '{project_id}'. "
129 "Project IDs must be valid Python identifiers (alphanumeric + underscores)."
130 )
132 # Extract basic project information
133 display_name = project_data.get("display_name", project_id)
134 description = project_data.get("description")
135 project_data.get("collection_name")
137 # Parse project-specific sources with automatic field injection
138 sources_data = project_data.get("sources", {})
139 enhanced_sources_data = self._inject_source_metadata(sources_data)
140 sources_config = SourcesConfig(**enhanced_sources_data)
142 # Extract configuration overrides
143 overrides = project_data.get("overrides", {})
145 # Merge project-specific overrides with global config
146 merged_overrides = self._merge_configs(global_config, overrides)
148 return ProjectConfig(
149 project_id=project_id,
150 display_name=display_name,
151 description=description,
152 sources=sources_config,
153 overrides=merged_overrides,
154 )
156 def _inject_source_metadata(self, sources_data: dict[str, Any]) -> dict[str, Any]:
157 """Inject source_type and source fields into source configurations.
159 Args:
160 sources_data: Raw sources configuration data
162 Returns:
163 Dict[str, Any]: Enhanced sources data with injected metadata
164 """
165 enhanced_data = {}
167 for source_type, source_configs in sources_data.items():
168 if not isinstance(source_configs, dict):
169 enhanced_data[source_type] = source_configs
170 continue
172 enhanced_source_configs = {}
173 for source_name, source_config in source_configs.items():
174 if isinstance(source_config, dict):
175 # Create a copy to avoid modifying the original
176 enhanced_config = source_config.copy()
178 # Always inject source_type and source fields
179 enhanced_config["source_type"] = source_type
180 enhanced_config["source"] = source_name
182 enhanced_source_configs[source_name] = enhanced_config
183 else:
184 enhanced_source_configs[source_name] = source_config
186 enhanced_data[source_type] = enhanced_source_configs
188 return enhanced_data
190 def _is_valid_project_id(self, project_id: str) -> bool:
191 """Validate project ID format.
193 Args:
194 project_id: Project identifier to validate
196 Returns:
197 bool: True if valid, False otherwise
198 """
199 # Project IDs must be valid Python identifiers
200 # Allow alphanumeric characters, underscores, and hyphens
201 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
202 return bool(re.match(pattern, project_id))
204 def _merge_configs(
205 self, global_config: GlobalConfig, project_overrides: dict[str, Any]
206 ) -> dict[str, Any]:
207 """Merge project-specific overrides with global configuration.
209 Args:
210 global_config: Global configuration
211 project_overrides: Project-specific overrides
213 Returns:
214 Dict[str, Any]: Merged configuration
215 """
216 # Convert global config to dict
217 global_dict = global_config.to_dict()
219 # Deep merge project overrides
220 merged = self._deep_merge_dicts(global_dict, project_overrides)
222 return merged
224 def _deep_merge_dicts(
225 self, base: dict[str, Any], override: dict[str, Any]
226 ) -> dict[str, Any]:
227 """Deep merge two dictionaries.
229 Args:
230 base: Base dictionary
231 override: Override dictionary
233 Returns:
234 Dict[str, Any]: Merged dictionary
235 """
236 result = base.copy()
238 for key, value in override.items():
239 if (
240 key in result
241 and isinstance(result[key], dict)
242 and isinstance(value, dict)
243 ):
244 result[key] = self._deep_merge_dicts(result[key], value)
245 else:
246 result[key] = value
248 return result