Coverage for src/qdrant_loader_mcp_server/search/enhanced/intent/strategy.py: 92%

108 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1""" 

2Adaptive Search Strategy for Intent-Based Configuration. 

3 

4This module implements the AdaptiveSearchStrategy that configures search 

5parameters based on classified intents to optimize search results. 

6""" 

7 

8from __future__ import annotations 

9 

10from typing import TYPE_CHECKING, Any 

11 

12from ....utils.logging import LoggingConfig 

13from .models import AdaptiveSearchConfig, IntentType, SearchIntent 

14 

15if TYPE_CHECKING: 

16 from ...models import SearchResult 

17 from ..knowledge_graph import DocumentKnowledgeGraph, TraversalStrategy 

18else: 

19 # Runtime imports to avoid circular dependencies 

20 try: 

21 from ...models import SearchResult 

22 from ..knowledge_graph import DocumentKnowledgeGraph, TraversalStrategy 

23 except ImportError: 

24 DocumentKnowledgeGraph = None 

25 TraversalStrategy = None 

26 SearchResult = None 

27 

28logger = LoggingConfig.get_logger(__name__) 

29 

30 

31class AdaptiveSearchStrategy: 

32 """Adaptive search strategy that configures search based on classified intent.""" 

33 

34 def __init__(self, knowledge_graph=None): 

35 """Initialize the adaptive search strategy.""" 

36 self.knowledge_graph = knowledge_graph 

37 self.logger = LoggingConfig.get_logger(__name__) 

38 

39 # Define intent-specific search configurations 

40 self.intent_configs = { 

41 IntentType.TECHNICAL_LOOKUP: AdaptiveSearchConfig( 

42 search_strategy="hybrid", 

43 vector_weight=0.8, # Higher vector weight for semantic similarity 

44 keyword_weight=0.2, 

45 use_knowledge_graph=True, 

46 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

47 max_graph_hops=2, 

48 kg_expansion_weight=0.3, 

49 result_filters={"content_type": ["code", "documentation", "technical"]}, 

50 ranking_boosts={"source_type": {"git": 1.4, "confluence": 1.2}}, 

51 source_type_preferences={"git": 1.5, "documentation": 1.3}, 

52 expand_query=True, 

53 expansion_aggressiveness=0.4, 

54 semantic_expansion=True, 

55 entity_expansion=True, 

56 max_results=25, 

57 min_score_threshold=0.15, 

58 authority_bias=0.3, 

59 ), 

60 IntentType.BUSINESS_CONTEXT: AdaptiveSearchConfig( 

61 search_strategy="hybrid", 

62 vector_weight=0.6, # Balanced approach 

63 keyword_weight=0.4, 

64 use_knowledge_graph=True, 

65 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

66 max_graph_hops=3, 

67 kg_expansion_weight=0.2, 

68 result_filters={ 

69 "content_type": ["requirements", "business", "strategy"] 

70 }, 

71 ranking_boosts={ 

72 "section_type": {"requirements": 1.5, "objectives": 1.4} 

73 }, 

74 source_type_preferences={"confluence": 1.4, "documentation": 1.2}, 

75 expand_query=True, 

76 expansion_aggressiveness=0.3, 

77 semantic_expansion=True, 

78 entity_expansion=False, 

79 max_results=20, 

80 min_score_threshold=0.1, 

81 authority_bias=0.4, 

82 ), 

83 IntentType.VENDOR_EVALUATION: AdaptiveSearchConfig( 

84 search_strategy="hybrid", 

85 vector_weight=0.5, # Equal weight for structured comparison 

86 keyword_weight=0.5, 

87 use_knowledge_graph=True, 

88 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

89 max_graph_hops=2, 

90 kg_expansion_weight=0.25, 

91 result_filters={ 

92 "content_type": ["proposal", "evaluation", "comparison"] 

93 }, 

94 ranking_boosts={"has_money_entities": 1.3, "has_org_entities": 1.2}, 

95 source_type_preferences={"confluence": 1.3, "documentation": 1.1}, 

96 expand_query=True, 

97 expansion_aggressiveness=0.35, 

98 semantic_expansion=True, 

99 entity_expansion=True, 

100 max_results=15, 

101 min_score_threshold=0.12, 

102 diversity_factor=0.3, # Encourage diverse vendor options 

103 authority_bias=0.2, 

104 ), 

105 IntentType.PROCEDURAL: AdaptiveSearchConfig( 

106 search_strategy="hybrid", 

107 vector_weight=0.7, # Higher semantic matching for procedures 

108 keyword_weight=0.3, 

109 use_knowledge_graph=True, 

110 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

111 max_graph_hops=2, 

112 kg_expansion_weight=0.2, 

113 result_filters={"content_type": ["guide", "tutorial", "procedure"]}, 

114 ranking_boosts={ 

115 "section_type": {"steps": 1.5, "procedure": 1.4, "guide": 1.3} 

116 }, 

117 source_type_preferences={"documentation": 1.4, "git": 1.2}, 

118 expand_query=True, 

119 expansion_aggressiveness=0.25, 

120 semantic_expansion=True, 

121 entity_expansion=False, 

122 max_results=15, 

123 min_score_threshold=0.15, 

124 temporal_bias=0.2, # Prefer recent procedures 

125 ), 

126 IntentType.INFORMATIONAL: AdaptiveSearchConfig( 

127 search_strategy="vector", # Vector-first for conceptual understanding 

128 vector_weight=0.9, 

129 keyword_weight=0.1, 

130 use_knowledge_graph=True, 

131 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

132 max_graph_hops=3, 

133 kg_expansion_weight=0.4, # More expansion for discovery 

134 result_filters={}, 

135 ranking_boosts={"section_type": {"overview": 1.4, "introduction": 1.3}}, 

136 source_type_preferences={"documentation": 1.3, "confluence": 1.1}, 

137 expand_query=True, 

138 expansion_aggressiveness=0.5, # Aggressive expansion for discovery 

139 semantic_expansion=True, 

140 entity_expansion=True, 

141 max_results=30, 

142 min_score_threshold=0.05, 

143 diversity_factor=0.4, # Encourage diverse perspectives 

144 authority_bias=0.3, 

145 ), 

146 IntentType.TROUBLESHOOTING: AdaptiveSearchConfig( 

147 search_strategy="hybrid", 

148 vector_weight=0.6, 

149 keyword_weight=0.4, # Higher keyword weight for specific errors 

150 use_knowledge_graph=True, 

151 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

152 max_graph_hops=2, 

153 kg_expansion_weight=0.15, 

154 result_filters={"content_type": ["troubleshooting", "fix", "solution"]}, 

155 ranking_boosts={ 

156 "has_problem_indicators": 1.4, 

157 "section_type": {"solution": 1.5}, 

158 }, 

159 source_type_preferences={"git": 1.3, "documentation": 1.2}, 

160 expand_query=False, # Don't expand error-specific queries 

161 expansion_aggressiveness=0.1, 

162 semantic_expansion=False, 

163 entity_expansion=False, 

164 max_results=10, 

165 min_score_threshold=0.2, 

166 temporal_bias=0.3, # Prefer recent solutions 

167 ), 

168 IntentType.EXPLORATORY: AdaptiveSearchConfig( 

169 search_strategy="vector", # Vector-first for exploration 

170 vector_weight=0.85, 

171 keyword_weight=0.15, 

172 use_knowledge_graph=True, 

173 kg_traversal_strategy=None, # Will be set in __post_init__ if available 

174 max_graph_hops=4, # Deeper exploration 

175 kg_expansion_weight=0.5, # Maximum expansion 

176 result_filters={}, 

177 ranking_boosts={}, 

178 source_type_preferences={}, 

179 expand_query=True, 

180 expansion_aggressiveness=0.6, # Very aggressive expansion 

181 semantic_expansion=True, 

182 entity_expansion=True, 

183 max_results=40, # More results for exploration 

184 min_score_threshold=0.03, # Lower threshold 

185 diversity_factor=0.6, # Maximum diversity 

186 authority_bias=0.1, 

187 ), 

188 # Fallback configuration 

189 IntentType.GENERAL: AdaptiveSearchConfig( 

190 search_strategy="hybrid", 

191 vector_weight=0.7, 

192 keyword_weight=0.3, 

193 use_knowledge_graph=False, 

194 expand_query=True, 

195 expansion_aggressiveness=0.3, 

196 semantic_expansion=True, 

197 entity_expansion=True, 

198 max_results=20, 

199 min_score_threshold=0.1, 

200 ), 

201 } 

202 

203 # Set TraversalStrategy defaults if available 

204 self._set_traversal_strategies() 

205 

206 logger.info( 

207 "Initialized adaptive search strategy with intent-specific configurations" 

208 ) 

209 

210 def _set_traversal_strategies(self): 

211 """Set default TraversalStrategy values if available.""" 

212 if TraversalStrategy is not None: 

213 # Set specific traversal strategies for each intent type 

214 traversal_map = { 

215 IntentType.TECHNICAL_LOOKUP: TraversalStrategy.SEMANTIC, 

216 IntentType.BUSINESS_CONTEXT: TraversalStrategy.WEIGHTED, 

217 IntentType.VENDOR_EVALUATION: TraversalStrategy.CENTRALITY, 

218 IntentType.PROCEDURAL: TraversalStrategy.BREADTH_FIRST, 

219 IntentType.INFORMATIONAL: TraversalStrategy.SEMANTIC, 

220 IntentType.TROUBLESHOOTING: TraversalStrategy.WEIGHTED, 

221 IntentType.EXPLORATORY: TraversalStrategy.BREADTH_FIRST, 

222 } 

223 

224 for intent_type, traversal_strategy in traversal_map.items(): 

225 if intent_type in self.intent_configs: 

226 self.intent_configs[intent_type].kg_traversal_strategy = ( 

227 traversal_strategy 

228 ) 

229 

230 def adapt_search( 

231 self, 

232 search_intent: SearchIntent, 

233 query: str, 

234 _base_results=None, 

235 ) -> AdaptiveSearchConfig: 

236 """Adapt search configuration based on classified intent.""" 

237 

238 try: 

239 # Get base configuration for the primary intent 

240 config = self._get_base_config(search_intent.intent_type) 

241 

242 # Apply confidence-based adjustments 

243 config = self._apply_confidence_adjustments(config, search_intent) 

244 

245 # Apply secondary intent blending 

246 if search_intent.secondary_intents: 

247 config = self._blend_secondary_intents( 

248 config, search_intent.secondary_intents 

249 ) 

250 

251 # Apply query-specific adaptations 

252 config = self._apply_query_adaptations(config, search_intent, query) 

253 

254 # Apply session context adaptations 

255 if search_intent.session_context: 

256 config = self._apply_session_adaptations( 

257 config, search_intent.session_context 

258 ) 

259 

260 logger.debug( 

261 f"Adapted search configuration for {search_intent.intent_type.value}", 

262 confidence=search_intent.confidence, 

263 vector_weight=config.vector_weight, 

264 use_kg=config.use_knowledge_graph, 

265 max_results=config.max_results, 

266 ) 

267 

268 return config 

269 

270 except Exception as e: 

271 logger.error(f"Failed to adapt search configuration: {e}") 

272 return self.intent_configs[IntentType.GENERAL] 

273 

274 def _get_base_config(self, intent_type: IntentType) -> AdaptiveSearchConfig: 

275 """Get base configuration for intent type.""" 

276 return self.intent_configs.get( 

277 intent_type, self.intent_configs[IntentType.GENERAL] 

278 ) 

279 

280 def _apply_confidence_adjustments( 

281 self, config: AdaptiveSearchConfig, search_intent: SearchIntent 

282 ) -> AdaptiveSearchConfig: 

283 """Apply confidence-based adjustments to the configuration.""" 

284 

285 # Low confidence: reduce aggressiveness, increase diversity 

286 if search_intent.confidence < 0.5: 

287 config.expansion_aggressiveness *= 0.7 

288 config.diversity_factor = min(1.0, config.diversity_factor + 0.2) 

289 config.min_score_threshold *= 0.8 

290 

291 # High confidence: increase precision, reduce diversity 

292 elif search_intent.confidence > 0.8: 

293 config.expansion_aggressiveness *= 1.3 

294 config.diversity_factor *= 0.7 

295 config.min_score_threshold *= 1.2 

296 

297 return config 

298 

299 def _blend_secondary_intents( 

300 self, 

301 config: AdaptiveSearchConfig, 

302 secondary_intents: list[tuple[IntentType, float]], 

303 ) -> AdaptiveSearchConfig: 

304 """Blend secondary intent configurations with primary.""" 

305 

306 for intent_type, confidence in secondary_intents: 

307 if confidence > 0.3: # Only blend significant secondary intents 

308 secondary_config = self.intent_configs.get(intent_type) 

309 if secondary_config: 

310 blend_factor = confidence * 0.3 # Max 30% blending 

311 

312 # Blend key parameters 

313 config.vector_weight = ( 

314 config.vector_weight * (1 - blend_factor) 

315 + secondary_config.vector_weight * blend_factor 

316 ) 

317 config.expansion_aggressiveness = ( 

318 config.expansion_aggressiveness * (1 - blend_factor) 

319 + secondary_config.expansion_aggressiveness * blend_factor 

320 ) 

321 # Safely handle potential None values for diversity_factor 

322 left = ( 

323 config.diversity_factor 

324 if config.diversity_factor is not None 

325 else 0 

326 ) 

327 right_base = ( 

328 secondary_config.diversity_factor 

329 if secondary_config.diversity_factor is not None 

330 else 0 

331 ) 

332 right = right_base * blend_factor 

333 config.diversity_factor = max(left, right) 

334 

335 return config 

336 

337 def _apply_query_adaptations( 

338 self, config: AdaptiveSearchConfig, search_intent: SearchIntent, query: str 

339 ) -> AdaptiveSearchConfig: 

340 """Apply query-specific adaptations.""" 

341 

342 # Short queries: increase expansion 

343 if len(query.split()) <= 3: 

344 config.expansion_aggressiveness *= 1.4 

345 config.semantic_expansion = True 

346 

347 # Long queries: reduce expansion, increase precision 

348 elif len(query.split()) >= 8: 

349 config.expansion_aggressiveness *= 0.7 

350 config.min_score_threshold *= 1.2 

351 

352 # Very complex queries: use knowledge graph more aggressively 

353 if search_intent.query_complexity > 0.7: 

354 config.use_knowledge_graph = True 

355 config.kg_expansion_weight *= 1.3 

356 config.max_graph_hops = min(4, config.max_graph_hops + 1) 

357 

358 # Question queries: increase semantic weight 

359 if search_intent.is_question: 

360 config.vector_weight = min(0.9, config.vector_weight + 0.1) 

361 config.semantic_expansion = True 

362 

363 # Technical queries: boost technical sources 

364 if search_intent.is_technical: 

365 config.source_type_preferences["git"] = ( 

366 config.source_type_preferences.get("git", 1.0) * 1.2 

367 ) 

368 config.authority_bias *= 1.2 

369 

370 return config 

371 

372 def _apply_session_adaptations( 

373 self, config: AdaptiveSearchConfig, session_context: dict[str, Any] 

374 ) -> AdaptiveSearchConfig: 

375 """Apply session context adaptations.""" 

376 

377 # Time-sensitive sessions: increase temporal bias 

378 if session_context.get("urgency") == "high": 

379 config.temporal_bias = min(1.0, config.temporal_bias + 0.3) 

380 config.max_results = min(15, config.max_results) 

381 

382 # Learning sessions: increase diversity and expansion 

383 session_type = session_context.get("session_type", "") 

384 if session_type == "learning": 

385 config.diversity_factor = min(1.0, config.diversity_factor + 0.2) 

386 config.expansion_aggressiveness *= 1.2 

387 config.max_results = min(30, config.max_results + 5) 

388 

389 # Focused sessions: increase precision 

390 elif session_type == "focused": 

391 config.min_score_threshold *= 1.3 

392 config.expansion_aggressiveness *= 0.8 

393 config.max_results = max(10, config.max_results - 5) 

394 

395 # User experience level 

396 experience_level = session_context.get("experience_level", "intermediate") 

397 if experience_level == "beginner": 

398 config.source_type_preferences["documentation"] = 1.4 

399 config.ranking_boosts["section_type"] = { 

400 "introduction": 1.5, 

401 "overview": 1.4, 

402 } 

403 elif experience_level == "expert": 

404 config.source_type_preferences["git"] = 1.3 

405 config.ranking_boosts["section_type"] = { 

406 "implementation": 1.4, 

407 "advanced": 1.3, 

408 } 

409 

410 return config 

411 

412 def get_strategy_stats(self) -> dict[str, Any]: 

413 """Get adaptive search strategy statistics.""" 

414 stats = { 

415 "intent_types_supported": len(self.intent_configs), 

416 "has_knowledge_graph": self.knowledge_graph is not None, 

417 "strategy_types": list( 

418 {config.search_strategy for config in self.intent_configs.values()} 

419 ), 

420 } 

421 

422 # Add traversal strategies if TraversalStrategy is available 

423 if TraversalStrategy is not None: 

424 stats["traversal_strategies"] = list( 

425 { 

426 config.kg_traversal_strategy.value 

427 for config in self.intent_configs.values() 

428 if config.use_knowledge_graph and config.kg_traversal_strategy 

429 } 

430 ) 

431 

432 return stats