Coverage for src/qdrant_loader_mcp_server/search/enhanced/intent/strategy.py: 92%
108 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1"""
2Adaptive Search Strategy for Intent-Based Configuration.
4This module implements the AdaptiveSearchStrategy that configures search
5parameters based on classified intents to optimize search results.
6"""
8from __future__ import annotations
10from typing import TYPE_CHECKING, Any
12from ....utils.logging import LoggingConfig
13from .models import AdaptiveSearchConfig, IntentType, SearchIntent
15if TYPE_CHECKING:
16 from ...models import SearchResult
17 from ..knowledge_graph import DocumentKnowledgeGraph, TraversalStrategy
18else:
19 # Runtime imports to avoid circular dependencies
20 try:
21 from ...models import SearchResult
22 from ..knowledge_graph import DocumentKnowledgeGraph, TraversalStrategy
23 except ImportError:
24 DocumentKnowledgeGraph = None
25 TraversalStrategy = None
26 SearchResult = None
28logger = LoggingConfig.get_logger(__name__)
31class AdaptiveSearchStrategy:
32 """Adaptive search strategy that configures search based on classified intent."""
34 def __init__(self, knowledge_graph=None):
35 """Initialize the adaptive search strategy."""
36 self.knowledge_graph = knowledge_graph
37 self.logger = LoggingConfig.get_logger(__name__)
39 # Define intent-specific search configurations
40 self.intent_configs = {
41 IntentType.TECHNICAL_LOOKUP: AdaptiveSearchConfig(
42 search_strategy="hybrid",
43 vector_weight=0.8, # Higher vector weight for semantic similarity
44 keyword_weight=0.2,
45 use_knowledge_graph=True,
46 kg_traversal_strategy=None, # Will be set in __post_init__ if available
47 max_graph_hops=2,
48 kg_expansion_weight=0.3,
49 result_filters={"content_type": ["code", "documentation", "technical"]},
50 ranking_boosts={"source_type": {"git": 1.4, "confluence": 1.2}},
51 source_type_preferences={"git": 1.5, "documentation": 1.3},
52 expand_query=True,
53 expansion_aggressiveness=0.4,
54 semantic_expansion=True,
55 entity_expansion=True,
56 max_results=25,
57 min_score_threshold=0.15,
58 authority_bias=0.3,
59 ),
60 IntentType.BUSINESS_CONTEXT: AdaptiveSearchConfig(
61 search_strategy="hybrid",
62 vector_weight=0.6, # Balanced approach
63 keyword_weight=0.4,
64 use_knowledge_graph=True,
65 kg_traversal_strategy=None, # Will be set in __post_init__ if available
66 max_graph_hops=3,
67 kg_expansion_weight=0.2,
68 result_filters={
69 "content_type": ["requirements", "business", "strategy"]
70 },
71 ranking_boosts={
72 "section_type": {"requirements": 1.5, "objectives": 1.4}
73 },
74 source_type_preferences={"confluence": 1.4, "documentation": 1.2},
75 expand_query=True,
76 expansion_aggressiveness=0.3,
77 semantic_expansion=True,
78 entity_expansion=False,
79 max_results=20,
80 min_score_threshold=0.1,
81 authority_bias=0.4,
82 ),
83 IntentType.VENDOR_EVALUATION: AdaptiveSearchConfig(
84 search_strategy="hybrid",
85 vector_weight=0.5, # Equal weight for structured comparison
86 keyword_weight=0.5,
87 use_knowledge_graph=True,
88 kg_traversal_strategy=None, # Will be set in __post_init__ if available
89 max_graph_hops=2,
90 kg_expansion_weight=0.25,
91 result_filters={
92 "content_type": ["proposal", "evaluation", "comparison"]
93 },
94 ranking_boosts={"has_money_entities": 1.3, "has_org_entities": 1.2},
95 source_type_preferences={"confluence": 1.3, "documentation": 1.1},
96 expand_query=True,
97 expansion_aggressiveness=0.35,
98 semantic_expansion=True,
99 entity_expansion=True,
100 max_results=15,
101 min_score_threshold=0.12,
102 diversity_factor=0.3, # Encourage diverse vendor options
103 authority_bias=0.2,
104 ),
105 IntentType.PROCEDURAL: AdaptiveSearchConfig(
106 search_strategy="hybrid",
107 vector_weight=0.7, # Higher semantic matching for procedures
108 keyword_weight=0.3,
109 use_knowledge_graph=True,
110 kg_traversal_strategy=None, # Will be set in __post_init__ if available
111 max_graph_hops=2,
112 kg_expansion_weight=0.2,
113 result_filters={"content_type": ["guide", "tutorial", "procedure"]},
114 ranking_boosts={
115 "section_type": {"steps": 1.5, "procedure": 1.4, "guide": 1.3}
116 },
117 source_type_preferences={"documentation": 1.4, "git": 1.2},
118 expand_query=True,
119 expansion_aggressiveness=0.25,
120 semantic_expansion=True,
121 entity_expansion=False,
122 max_results=15,
123 min_score_threshold=0.15,
124 temporal_bias=0.2, # Prefer recent procedures
125 ),
126 IntentType.INFORMATIONAL: AdaptiveSearchConfig(
127 search_strategy="vector", # Vector-first for conceptual understanding
128 vector_weight=0.9,
129 keyword_weight=0.1,
130 use_knowledge_graph=True,
131 kg_traversal_strategy=None, # Will be set in __post_init__ if available
132 max_graph_hops=3,
133 kg_expansion_weight=0.4, # More expansion for discovery
134 result_filters={},
135 ranking_boosts={"section_type": {"overview": 1.4, "introduction": 1.3}},
136 source_type_preferences={"documentation": 1.3, "confluence": 1.1},
137 expand_query=True,
138 expansion_aggressiveness=0.5, # Aggressive expansion for discovery
139 semantic_expansion=True,
140 entity_expansion=True,
141 max_results=30,
142 min_score_threshold=0.05,
143 diversity_factor=0.4, # Encourage diverse perspectives
144 authority_bias=0.3,
145 ),
146 IntentType.TROUBLESHOOTING: AdaptiveSearchConfig(
147 search_strategy="hybrid",
148 vector_weight=0.6,
149 keyword_weight=0.4, # Higher keyword weight for specific errors
150 use_knowledge_graph=True,
151 kg_traversal_strategy=None, # Will be set in __post_init__ if available
152 max_graph_hops=2,
153 kg_expansion_weight=0.15,
154 result_filters={"content_type": ["troubleshooting", "fix", "solution"]},
155 ranking_boosts={
156 "has_problem_indicators": 1.4,
157 "section_type": {"solution": 1.5},
158 },
159 source_type_preferences={"git": 1.3, "documentation": 1.2},
160 expand_query=False, # Don't expand error-specific queries
161 expansion_aggressiveness=0.1,
162 semantic_expansion=False,
163 entity_expansion=False,
164 max_results=10,
165 min_score_threshold=0.2,
166 temporal_bias=0.3, # Prefer recent solutions
167 ),
168 IntentType.EXPLORATORY: AdaptiveSearchConfig(
169 search_strategy="vector", # Vector-first for exploration
170 vector_weight=0.85,
171 keyword_weight=0.15,
172 use_knowledge_graph=True,
173 kg_traversal_strategy=None, # Will be set in __post_init__ if available
174 max_graph_hops=4, # Deeper exploration
175 kg_expansion_weight=0.5, # Maximum expansion
176 result_filters={},
177 ranking_boosts={},
178 source_type_preferences={},
179 expand_query=True,
180 expansion_aggressiveness=0.6, # Very aggressive expansion
181 semantic_expansion=True,
182 entity_expansion=True,
183 max_results=40, # More results for exploration
184 min_score_threshold=0.03, # Lower threshold
185 diversity_factor=0.6, # Maximum diversity
186 authority_bias=0.1,
187 ),
188 # Fallback configuration
189 IntentType.GENERAL: AdaptiveSearchConfig(
190 search_strategy="hybrid",
191 vector_weight=0.7,
192 keyword_weight=0.3,
193 use_knowledge_graph=False,
194 expand_query=True,
195 expansion_aggressiveness=0.3,
196 semantic_expansion=True,
197 entity_expansion=True,
198 max_results=20,
199 min_score_threshold=0.1,
200 ),
201 }
203 # Set TraversalStrategy defaults if available
204 self._set_traversal_strategies()
206 logger.info(
207 "Initialized adaptive search strategy with intent-specific configurations"
208 )
210 def _set_traversal_strategies(self):
211 """Set default TraversalStrategy values if available."""
212 if TraversalStrategy is not None:
213 # Set specific traversal strategies for each intent type
214 traversal_map = {
215 IntentType.TECHNICAL_LOOKUP: TraversalStrategy.SEMANTIC,
216 IntentType.BUSINESS_CONTEXT: TraversalStrategy.WEIGHTED,
217 IntentType.VENDOR_EVALUATION: TraversalStrategy.CENTRALITY,
218 IntentType.PROCEDURAL: TraversalStrategy.BREADTH_FIRST,
219 IntentType.INFORMATIONAL: TraversalStrategy.SEMANTIC,
220 IntentType.TROUBLESHOOTING: TraversalStrategy.WEIGHTED,
221 IntentType.EXPLORATORY: TraversalStrategy.BREADTH_FIRST,
222 }
224 for intent_type, traversal_strategy in traversal_map.items():
225 if intent_type in self.intent_configs:
226 self.intent_configs[intent_type].kg_traversal_strategy = (
227 traversal_strategy
228 )
230 def adapt_search(
231 self,
232 search_intent: SearchIntent,
233 query: str,
234 _base_results=None,
235 ) -> AdaptiveSearchConfig:
236 """Adapt search configuration based on classified intent."""
238 try:
239 # Get base configuration for the primary intent
240 config = self._get_base_config(search_intent.intent_type)
242 # Apply confidence-based adjustments
243 config = self._apply_confidence_adjustments(config, search_intent)
245 # Apply secondary intent blending
246 if search_intent.secondary_intents:
247 config = self._blend_secondary_intents(
248 config, search_intent.secondary_intents
249 )
251 # Apply query-specific adaptations
252 config = self._apply_query_adaptations(config, search_intent, query)
254 # Apply session context adaptations
255 if search_intent.session_context:
256 config = self._apply_session_adaptations(
257 config, search_intent.session_context
258 )
260 logger.debug(
261 f"Adapted search configuration for {search_intent.intent_type.value}",
262 confidence=search_intent.confidence,
263 vector_weight=config.vector_weight,
264 use_kg=config.use_knowledge_graph,
265 max_results=config.max_results,
266 )
268 return config
270 except Exception as e:
271 logger.error(f"Failed to adapt search configuration: {e}")
272 return self.intent_configs[IntentType.GENERAL]
274 def _get_base_config(self, intent_type: IntentType) -> AdaptiveSearchConfig:
275 """Get base configuration for intent type."""
276 return self.intent_configs.get(
277 intent_type, self.intent_configs[IntentType.GENERAL]
278 )
280 def _apply_confidence_adjustments(
281 self, config: AdaptiveSearchConfig, search_intent: SearchIntent
282 ) -> AdaptiveSearchConfig:
283 """Apply confidence-based adjustments to the configuration."""
285 # Low confidence: reduce aggressiveness, increase diversity
286 if search_intent.confidence < 0.5:
287 config.expansion_aggressiveness *= 0.7
288 config.diversity_factor = min(1.0, config.diversity_factor + 0.2)
289 config.min_score_threshold *= 0.8
291 # High confidence: increase precision, reduce diversity
292 elif search_intent.confidence > 0.8:
293 config.expansion_aggressiveness *= 1.3
294 config.diversity_factor *= 0.7
295 config.min_score_threshold *= 1.2
297 return config
299 def _blend_secondary_intents(
300 self,
301 config: AdaptiveSearchConfig,
302 secondary_intents: list[tuple[IntentType, float]],
303 ) -> AdaptiveSearchConfig:
304 """Blend secondary intent configurations with primary."""
306 for intent_type, confidence in secondary_intents:
307 if confidence > 0.3: # Only blend significant secondary intents
308 secondary_config = self.intent_configs.get(intent_type)
309 if secondary_config:
310 blend_factor = confidence * 0.3 # Max 30% blending
312 # Blend key parameters
313 config.vector_weight = (
314 config.vector_weight * (1 - blend_factor)
315 + secondary_config.vector_weight * blend_factor
316 )
317 config.expansion_aggressiveness = (
318 config.expansion_aggressiveness * (1 - blend_factor)
319 + secondary_config.expansion_aggressiveness * blend_factor
320 )
321 # Safely handle potential None values for diversity_factor
322 left = (
323 config.diversity_factor
324 if config.diversity_factor is not None
325 else 0
326 )
327 right_base = (
328 secondary_config.diversity_factor
329 if secondary_config.diversity_factor is not None
330 else 0
331 )
332 right = right_base * blend_factor
333 config.diversity_factor = max(left, right)
335 return config
337 def _apply_query_adaptations(
338 self, config: AdaptiveSearchConfig, search_intent: SearchIntent, query: str
339 ) -> AdaptiveSearchConfig:
340 """Apply query-specific adaptations."""
342 # Short queries: increase expansion
343 if len(query.split()) <= 3:
344 config.expansion_aggressiveness *= 1.4
345 config.semantic_expansion = True
347 # Long queries: reduce expansion, increase precision
348 elif len(query.split()) >= 8:
349 config.expansion_aggressiveness *= 0.7
350 config.min_score_threshold *= 1.2
352 # Very complex queries: use knowledge graph more aggressively
353 if search_intent.query_complexity > 0.7:
354 config.use_knowledge_graph = True
355 config.kg_expansion_weight *= 1.3
356 config.max_graph_hops = min(4, config.max_graph_hops + 1)
358 # Question queries: increase semantic weight
359 if search_intent.is_question:
360 config.vector_weight = min(0.9, config.vector_weight + 0.1)
361 config.semantic_expansion = True
363 # Technical queries: boost technical sources
364 if search_intent.is_technical:
365 config.source_type_preferences["git"] = (
366 config.source_type_preferences.get("git", 1.0) * 1.2
367 )
368 config.authority_bias *= 1.2
370 return config
372 def _apply_session_adaptations(
373 self, config: AdaptiveSearchConfig, session_context: dict[str, Any]
374 ) -> AdaptiveSearchConfig:
375 """Apply session context adaptations."""
377 # Time-sensitive sessions: increase temporal bias
378 if session_context.get("urgency") == "high":
379 config.temporal_bias = min(1.0, config.temporal_bias + 0.3)
380 config.max_results = min(15, config.max_results)
382 # Learning sessions: increase diversity and expansion
383 session_type = session_context.get("session_type", "")
384 if session_type == "learning":
385 config.diversity_factor = min(1.0, config.diversity_factor + 0.2)
386 config.expansion_aggressiveness *= 1.2
387 config.max_results = min(30, config.max_results + 5)
389 # Focused sessions: increase precision
390 elif session_type == "focused":
391 config.min_score_threshold *= 1.3
392 config.expansion_aggressiveness *= 0.8
393 config.max_results = max(10, config.max_results - 5)
395 # User experience level
396 experience_level = session_context.get("experience_level", "intermediate")
397 if experience_level == "beginner":
398 config.source_type_preferences["documentation"] = 1.4
399 config.ranking_boosts["section_type"] = {
400 "introduction": 1.5,
401 "overview": 1.4,
402 }
403 elif experience_level == "expert":
404 config.source_type_preferences["git"] = 1.3
405 config.ranking_boosts["section_type"] = {
406 "implementation": 1.4,
407 "advanced": 1.3,
408 }
410 return config
412 def get_strategy_stats(self) -> dict[str, Any]:
413 """Get adaptive search strategy statistics."""
414 stats = {
415 "intent_types_supported": len(self.intent_configs),
416 "has_knowledge_graph": self.knowledge_graph is not None,
417 "strategy_types": list(
418 {config.search_strategy for config in self.intent_configs.values()}
419 ),
420 }
422 # Add traversal strategies if TraversalStrategy is available
423 if TraversalStrategy is not None:
424 stats["traversal_strategies"] = list(
425 {
426 config.kg_traversal_strategy.value
427 for config in self.intent_configs.values()
428 if config.use_knowledge_graph and config.kg_traversal_strategy
429 }
430 )
432 return stats