Coverage for src/qdrant_loader/config/parser.py: 100%

75 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1"""Multi-project configuration parser. 

2 

3This module provides parsing functionality for multi-project configurations. 

4""" 

5 

6import re 

7from typing import Any 

8 

9from pydantic import ValidationError 

10 

11from ..utils.logging import LoggingConfig 

12from .global_config import GlobalConfig 

13from .models import ParsedConfig, ProjectConfig, ProjectsConfig 

14from .sources import SourcesConfig 

15from .validator import ConfigValidator 

16 

17logger = LoggingConfig.get_logger(__name__) 

18 

19 

20class MultiProjectConfigParser: 

21 """Parser for multi-project configurations.""" 

22 

23 def __init__(self, validator: ConfigValidator): 

24 """Initialize the parser with a validator. 

25 

26 Args: 

27 validator: Configuration validator instance 

28 """ 

29 self.validator = validator 

30 

31 def parse( 

32 self, config_data: dict[str, Any], skip_validation: bool = False 

33 ) -> ParsedConfig: 

34 """Parse configuration with multi-project support. 

35 

36 Args: 

37 config_data: Raw configuration data from YAML 

38 skip_validation: Whether to skip validation during parsing 

39 

40 Returns: 

41 ParsedConfig: Parsed configuration with project information 

42 

43 Raises: 

44 ValidationError: If configuration is invalid 

45 """ 

46 logger.debug("Starting configuration parsing") 

47 

48 # Validate configuration structure 

49 self.validator.validate_structure(config_data) 

50 

51 # Parse global configuration 

52 global_config = self._parse_global_config( 

53 config_data.get("global", {}), skip_validation 

54 ) 

55 

56 # Parse projects 

57 projects_config = self._parse_projects(config_data, global_config) 

58 

59 logger.debug( 

60 "Configuration parsing completed", 

61 project_count=len(projects_config.projects), 

62 ) 

63 

64 return ParsedConfig( 

65 global_config=global_config, 

66 projects_config=projects_config, 

67 ) 

68 

69 def _parse_global_config( 

70 self, global_data: dict[str, Any], skip_validation: bool = False 

71 ) -> GlobalConfig: 

72 """Parse global configuration section. 

73 

74 Args: 

75 global_data: Global configuration data 

76 skip_validation: Whether to skip validation during parsing 

77 

78 Returns: 

79 GlobalConfig: Parsed global configuration 

80 """ 

81 try: 

82 return GlobalConfig(**global_data, skip_validation=skip_validation) 

83 except ValidationError as e: 

84 logger.error("Failed to parse global configuration", error=str(e)) 

85 raise 

86 

87 def _parse_projects( 

88 self, config_data: dict[str, Any], global_config: GlobalConfig 

89 ) -> ProjectsConfig: 

90 """Parse project configurations. 

91 

92 Args: 

93 config_data: Raw configuration data 

94 global_config: Parsed global configuration 

95 

96 Returns: 

97 ProjectsConfig: Parsed projects configuration 

98 """ 

99 projects_config = ProjectsConfig() 

100 

101 # Handle multi-project format 

102 projects_data = config_data.get("projects", {}) 

103 for project_id, project_data in projects_data.items(): 

104 project_config = self._parse_project_config( 

105 project_id, project_data, global_config 

106 ) 

107 projects_config.add_project(project_config) 

108 logger.debug("Parsed project configuration", project_id=project_id) 

109 

110 return projects_config 

111 

112 def _parse_project_config( 

113 self, project_id: str, project_data: dict[str, Any], global_config: GlobalConfig 

114 ) -> ProjectConfig: 

115 """Parse individual project configuration. 

116 

117 Args: 

118 project_id: Project identifier 

119 project_data: Project configuration data 

120 global_config: Global configuration 

121 

122 Returns: 

123 ProjectConfig: Parsed project configuration 

124 """ 

125 # Validate project ID 

126 if not self._is_valid_project_id(project_id): 

127 raise ValueError( 

128 f"Invalid project ID '{project_id}'. " 

129 "Project IDs must be valid Python identifiers (alphanumeric + underscores)." 

130 ) 

131 

132 # Extract basic project information 

133 display_name = project_data.get("display_name", project_id) 

134 description = project_data.get("description") 

135 project_data.get("collection_name") 

136 

137 # Parse project-specific sources with automatic field injection 

138 sources_data = project_data.get("sources", {}) 

139 enhanced_sources_data = self._inject_source_metadata(sources_data) 

140 sources_config = SourcesConfig(**enhanced_sources_data) 

141 

142 # Extract configuration overrides 

143 overrides = project_data.get("overrides", {}) 

144 

145 # Merge project-specific overrides with global config 

146 merged_overrides = self._merge_configs(global_config, overrides) 

147 

148 return ProjectConfig( 

149 project_id=project_id, 

150 display_name=display_name, 

151 description=description, 

152 sources=sources_config, 

153 overrides=merged_overrides, 

154 ) 

155 

156 def _inject_source_metadata(self, sources_data: dict[str, Any]) -> dict[str, Any]: 

157 """Inject source_type and source fields into source configurations. 

158 

159 Args: 

160 sources_data: Raw sources configuration data 

161 

162 Returns: 

163 Dict[str, Any]: Enhanced sources data with injected metadata 

164 """ 

165 enhanced_data = {} 

166 

167 for source_type, source_configs in sources_data.items(): 

168 if not isinstance(source_configs, dict): 

169 enhanced_data[source_type] = source_configs 

170 continue 

171 

172 enhanced_source_configs = {} 

173 for source_name, source_config in source_configs.items(): 

174 if isinstance(source_config, dict): 

175 # Create a copy to avoid modifying the original 

176 enhanced_config = source_config.copy() 

177 

178 # Always inject source_type and source fields 

179 enhanced_config["source_type"] = source_type 

180 enhanced_config["source"] = source_name 

181 

182 enhanced_source_configs[source_name] = enhanced_config 

183 else: 

184 enhanced_source_configs[source_name] = source_config 

185 

186 enhanced_data[source_type] = enhanced_source_configs 

187 

188 return enhanced_data 

189 

190 def _is_valid_project_id(self, project_id: str) -> bool: 

191 """Validate project ID format. 

192 

193 Args: 

194 project_id: Project identifier to validate 

195 

196 Returns: 

197 bool: True if valid, False otherwise 

198 """ 

199 # Project IDs must be valid Python identifiers 

200 # Allow alphanumeric characters, underscores, and hyphens 

201 pattern = r"^[a-zA-Z][a-zA-Z0-9_-]*$" 

202 return bool(re.match(pattern, project_id)) 

203 

204 def _merge_configs( 

205 self, global_config: GlobalConfig, project_overrides: dict[str, Any] 

206 ) -> dict[str, Any]: 

207 """Merge project-specific overrides with global configuration. 

208 

209 Args: 

210 global_config: Global configuration 

211 project_overrides: Project-specific overrides 

212 

213 Returns: 

214 Dict[str, Any]: Merged configuration 

215 """ 

216 # Convert global config to dict 

217 global_dict = global_config.to_dict() 

218 

219 # Deep merge project overrides 

220 merged = self._deep_merge_dicts(global_dict, project_overrides) 

221 

222 return merged 

223 

224 def _deep_merge_dicts( 

225 self, base: dict[str, Any], override: dict[str, Any] 

226 ) -> dict[str, Any]: 

227 """Deep merge two dictionaries. 

228 

229 Args: 

230 base: Base dictionary 

231 override: Override dictionary 

232 

233 Returns: 

234 Dict[str, Any]: Merged dictionary 

235 """ 

236 result = base.copy() 

237 

238 for key, value in override.items(): 

239 if ( 

240 key in result 

241 and isinstance(result[key], dict) 

242 and isinstance(value, dict) 

243 ): 

244 result[key] = self._deep_merge_dicts(result[key], value) 

245 else: 

246 result[key] = value 

247 

248 return result