Coverage for src/qdrant_loader/config/validator.py: 100%
114 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1"""Configuration validation for multi-project support.
3This module provides validation functionality for multi-project
4configurations, ensuring data integrity and catching common configuration errors.
5"""
7import re
8from typing import Any
10from ..utils.logging import LoggingConfig
12logger = LoggingConfig.get_logger(__name__)
15class ConfigValidator:
16 """Validates configuration data for multi-project support."""
18 def __init__(self):
19 """Initialize the validator."""
20 pass
22 def validate_structure(self, config_data: dict[str, Any]) -> None:
23 """Validate the overall configuration structure.
25 Args:
26 config_data: Raw configuration data
28 Raises:
29 ValueError: If configuration structure is invalid
30 """
31 logger.debug("Validating configuration structure")
33 # Check for required sections
34 if not isinstance(config_data, dict):
35 raise ValueError("Configuration must be a dictionary")
37 # Validate that we have projects section
38 if "projects" not in config_data:
39 raise ValueError("Configuration must contain 'projects' section")
41 # Validate projects section
42 self._validate_projects_section(config_data["projects"])
44 # Validate global section if present
45 if "global" in config_data:
46 self._validate_global_section(config_data["global"])
48 logger.debug("Configuration structure validation completed")
50 def _validate_projects_section(self, projects_data: Any) -> None:
51 """Validate the projects section.
53 Args:
54 projects_data: Projects configuration data
56 Raises:
57 ValueError: If projects section is invalid
58 """
59 if not isinstance(projects_data, dict):
60 raise ValueError("'projects' section must be a dictionary")
62 if not projects_data:
63 raise ValueError("'projects' section cannot be empty")
65 project_ids = set()
66 collection_names = set()
68 for project_id, project_config in projects_data.items():
69 # Validate project ID
70 self._validate_project_id(project_id)
72 if project_id in project_ids:
73 raise ValueError(f"Duplicate project ID: '{project_id}'")
74 project_ids.add(project_id)
76 # Validate individual project configuration
77 self._validate_project_config(project_id, project_config)
79 # Check for duplicate collection names
80 if "collection_name" in project_config:
81 collection_name = project_config["collection_name"]
82 if collection_name in collection_names:
83 raise ValueError(
84 f"Duplicate collection name '{collection_name}' "
85 f"found in project '{project_id}'"
86 )
87 collection_names.add(collection_name)
89 def _validate_project_config(self, project_id: str, project_config: Any) -> None:
90 """Validate individual project configuration.
92 Args:
93 project_id: Project identifier
94 project_config: Project configuration data
96 Raises:
97 ValueError: If project configuration is invalid
98 """
99 if not isinstance(project_config, dict):
100 raise ValueError(
101 f"Project '{project_id}' configuration must be a dictionary"
102 )
104 # Validate required fields
105 if "display_name" not in project_config:
106 raise ValueError(f"Project '{project_id}' must have a 'display_name'")
108 display_name = project_config["display_name"]
109 if not isinstance(display_name, str) or not display_name.strip():
110 raise ValueError(
111 f"Project '{project_id}' display_name must be a non-empty string"
112 )
114 # Validate optional fields
115 if "description" in project_config:
116 description = project_config["description"]
117 if description is not None and not isinstance(description, str):
118 raise ValueError(
119 f"Project '{project_id}' description must be a string or null"
120 )
122 if "collection_name" in project_config:
123 collection_name = project_config["collection_name"]
124 if not isinstance(collection_name, str) or not collection_name.strip():
125 raise ValueError(
126 f"Project '{project_id}' collection_name must be a non-empty string"
127 )
128 self._validate_collection_name(collection_name)
130 # Validate sources section if present
131 if "sources" in project_config:
132 self._validate_sources_section(project_config["sources"])
134 # Validate overrides section if present
135 if "overrides" in project_config:
136 overrides = project_config["overrides"]
137 if not isinstance(overrides, dict):
138 raise ValueError(
139 f"Project '{project_id}' overrides must be a dictionary"
140 )
142 def _validate_sources_section(self, sources_data: Any) -> None:
143 """Validate sources configuration.
145 Args:
146 sources_data: Sources configuration data
148 Raises:
149 ValueError: If sources configuration is invalid
150 """
151 if not isinstance(sources_data, dict):
152 raise ValueError("'sources' section must be a dictionary")
154 # Allow empty sources section for testing purposes
155 # In production, users would typically have at least one source configured
156 if not sources_data:
157 logger.debug(
158 "Sources section is empty - this is allowed but no data will be ingested"
159 )
160 return
162 # Validate each source type
163 for source_type, source_configs in sources_data.items():
164 if not isinstance(source_configs, dict):
165 raise ValueError(f"Source type '{source_type}' must be a dictionary")
167 if not source_configs:
168 raise ValueError(f"Source type '{source_type}' cannot be empty")
170 # Validate each source configuration
171 for source_name, source_config in source_configs.items():
172 if not isinstance(source_config, dict):
173 raise ValueError(
174 f"Source '{source_name}' in '{source_type}' must be a dictionary"
175 )
177 # Note: source_type and source fields are automatically injected by the parser
178 # so we don't need to validate their presence here
180 def _validate_global_section(self, global_data: Any) -> None:
181 """Validate global configuration section.
183 Args:
184 global_data: Global configuration data
186 Raises:
187 ValueError: If global configuration is invalid
188 """
189 if not isinstance(global_data, dict):
190 raise ValueError("'global' section must be a dictionary")
192 # The actual validation of global config fields will be handled
193 # by the GlobalConfig pydantic model, so we just do basic structure checks here
195 # Validate qdrant section if present
196 if "qdrant" in global_data:
197 qdrant_config = global_data["qdrant"]
198 if not isinstance(qdrant_config, dict):
199 raise ValueError("'global.qdrant' must be a dictionary")
201 if "collection_name" in qdrant_config:
202 collection_name = qdrant_config["collection_name"]
203 if not isinstance(collection_name, str) or not collection_name.strip():
204 raise ValueError(
205 "'global.qdrant.collection_name' must be a non-empty string"
206 )
207 self._validate_collection_name(collection_name)
209 def _validate_project_id(self, project_id: str) -> None:
210 """Validate project ID format.
212 Args:
213 project_id: Project identifier to validate
215 Raises:
216 ValueError: If project ID is invalid
217 """
218 if not isinstance(project_id, str):
219 raise ValueError("Project ID must be a string")
221 if not project_id.strip():
222 raise ValueError("Project ID cannot be empty")
224 # Project IDs must be valid identifiers (alphanumeric + underscores + hyphens)
225 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
226 if not re.match(pattern, project_id):
227 raise ValueError(
228 f"Invalid project ID '{project_id}'. "
229 "Project IDs must start with a letter and contain only "
230 "letters, numbers, underscores, and hyphens."
231 )
233 # Check for reserved project IDs
234 reserved_ids = {"default", "global", "admin", "system"}
235 if project_id.lower() in reserved_ids:
236 logger.warning(
237 f"Project ID '{project_id}' is reserved and may cause conflicts"
238 )
240 def _validate_source_name(self, source_name: str) -> None:
241 """Validate source name format.
243 Args:
244 source_name: Source name to validate
246 Raises:
247 ValueError: If source name is invalid
248 """
249 if not isinstance(source_name, str):
250 raise ValueError("Source name must be a string")
252 if not source_name.strip():
253 raise ValueError("Source name cannot be empty")
255 # Source names should be valid identifiers
256 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
257 if not re.match(pattern, source_name):
258 raise ValueError(
259 f"Invalid source name '{source_name}'. "
260 "Source names must start with a letter and contain only "
261 "letters, numbers, underscores, and hyphens."
262 )
264 def _validate_source_config(
265 self, source_type: str, source_name: str, source_config: Any
266 ) -> None:
267 """Validate individual source configuration.
269 Args:
270 source_type: Type of the source
271 source_name: Name of the source
272 source_config: Source configuration data
274 Raises:
275 ValueError: If source configuration is invalid
276 """
277 if not isinstance(source_config, dict):
278 raise ValueError(
279 f"Source '{source_name}' of type '{source_type}' "
280 "configuration must be a dictionary"
281 )
283 # Basic validation - specific source validation will be handled
284 # by the individual source config classes
285 if not source_config:
286 raise ValueError(
287 f"Source '{source_name}' of type '{source_type}' "
288 "configuration cannot be empty"
289 )
291 def _validate_collection_name(self, collection_name: str) -> None:
292 """Validate QDrant collection name format.
294 Args:
295 collection_name: Collection name to validate
297 Raises:
298 ValueError: If collection name is invalid
299 """
300 # QDrant collection names have specific requirements
301 # They should be valid identifiers and not too long
302 if len(collection_name) > 255:
303 raise ValueError(
304 f"Collection name '{collection_name}' is too long (max 255 characters)"
305 )
307 # Collection names should be valid identifiers
308 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$"
309 if not re.match(pattern, collection_name):
310 raise ValueError(
311 f"Invalid collection name '{collection_name}'. "
312 "Collection names must start with a letter and contain only "
313 "letters, numbers, underscores, and hyphens."
314 )