Coverage for src/qdrant_loader_mcp_server/search/enhanced/intent/strategy.py: 92%

1"""

2Adaptive Search Strategy for Intent-Based Configuration.

4This module implements the AdaptiveSearchStrategy that configures search

5parameters based on classified intents to optimize search results.

6"""

8from __future__ import annotations

10from typing import TYPE_CHECKING, Any

12from ....utils.logging import LoggingConfig

13from .models import AdaptiveSearchConfig, IntentType, SearchIntent

15if TYPE_CHECKING:

16 from ...models import SearchResult

17 from ..knowledge_graph import DocumentKnowledgeGraph, TraversalStrategy

18else:

19 # Runtime imports to avoid circular dependencies

20 try:

21 from ...models import SearchResult

22 from ..knowledge_graph import DocumentKnowledgeGraph, TraversalStrategy

23 except ImportError:

24 DocumentKnowledgeGraph = None

25 TraversalStrategy = None

26 SearchResult = None

28logger = LoggingConfig.get_logger(__name__)

31class AdaptiveSearchStrategy:

32 """Adaptive search strategy that configures search based on classified intent."""

34 def __init__(self, knowledge_graph=None):

35 """Initialize the adaptive search strategy."""

36 self.knowledge_graph = knowledge_graph

37 self.logger = LoggingConfig.get_logger(__name__)

39 # Define intent-specific search configurations

40 self.intent_configs = {

41 IntentType.TECHNICAL_LOOKUP: AdaptiveSearchConfig(

42 search_strategy="hybrid",

43 vector_weight=0.8, # Higher vector weight for semantic similarity

44 keyword_weight=0.2,

45 use_knowledge_graph=True,

46 kg_traversal_strategy=None, # Will be set in __post_init__ if available

47 max_graph_hops=2,

48 kg_expansion_weight=0.3,

49 result_filters={"content_type": ["code", "documentation", "technical"]},

50 ranking_boosts={"source_type": {"git": 1.4, "confluence": 1.2}},

51 source_type_preferences={"git": 1.5, "documentation": 1.3},

52 expand_query=True,

53 expansion_aggressiveness=0.4,

54 semantic_expansion=True,

55 entity_expansion=True,

56 max_results=25,

57 min_score_threshold=0.15,

58 authority_bias=0.3,

59 ),

60 IntentType.BUSINESS_CONTEXT: AdaptiveSearchConfig(

61 search_strategy="hybrid",

62 vector_weight=0.6, # Balanced approach

63 keyword_weight=0.4,

64 use_knowledge_graph=True,

65 kg_traversal_strategy=None, # Will be set in __post_init__ if available

66 max_graph_hops=3,

67 kg_expansion_weight=0.2,

68 result_filters={

69 "content_type": ["requirements", "business", "strategy"]

70 },

71 ranking_boosts={

72 "section_type": {"requirements": 1.5, "objectives": 1.4}

73 },

74 source_type_preferences={"confluence": 1.4, "documentation": 1.2},

75 expand_query=True,

76 expansion_aggressiveness=0.3,

77 semantic_expansion=True,

78 entity_expansion=False,

79 max_results=20,

80 min_score_threshold=0.1,

81 authority_bias=0.4,

82 ),

83 IntentType.VENDOR_EVALUATION: AdaptiveSearchConfig(

84 search_strategy="hybrid",

85 vector_weight=0.5, # Equal weight for structured comparison

86 keyword_weight=0.5,

87 use_knowledge_graph=True,

88 kg_traversal_strategy=None, # Will be set in __post_init__ if available

89 max_graph_hops=2,

90 kg_expansion_weight=0.25,

91 result_filters={

92 "content_type": ["proposal", "evaluation", "comparison"]

93 },

94 ranking_boosts={"has_money_entities": 1.3, "has_org_entities": 1.2},

95 source_type_preferences={"confluence": 1.3, "documentation": 1.1},

96 expand_query=True,

97 expansion_aggressiveness=0.35,

98 semantic_expansion=True,

99 entity_expansion=True,

100 max_results=15,

101 min_score_threshold=0.12,

102 diversity_factor=0.3, # Encourage diverse vendor options

103 authority_bias=0.2,

104 ),

105 IntentType.PROCEDURAL: AdaptiveSearchConfig(

106 search_strategy="hybrid",

107 vector_weight=0.7, # Higher semantic matching for procedures

108 keyword_weight=0.3,

109 use_knowledge_graph=True,

110 kg_traversal_strategy=None, # Will be set in __post_init__ if available

111 max_graph_hops=2,

112 kg_expansion_weight=0.2,

113 result_filters={"content_type": ["guide", "tutorial", "procedure"]},

114 ranking_boosts={

115 "section_type": {"steps": 1.5, "procedure": 1.4, "guide": 1.3}

116 },

117 source_type_preferences={"documentation": 1.4, "git": 1.2},

118 expand_query=True,

119 expansion_aggressiveness=0.25,

120 semantic_expansion=True,

121 entity_expansion=False,

122 max_results=15,

123 min_score_threshold=0.15,

124 temporal_bias=0.2, # Prefer recent procedures

125 ),

126 IntentType.INFORMATIONAL: AdaptiveSearchConfig(

127 search_strategy="vector", # Vector-first for conceptual understanding

128 vector_weight=0.9,

129 keyword_weight=0.1,

130 use_knowledge_graph=True,

131 kg_traversal_strategy=None, # Will be set in __post_init__ if available

132 max_graph_hops=3,

133 kg_expansion_weight=0.4, # More expansion for discovery

134 result_filters={},

135 ranking_boosts={"section_type": {"overview": 1.4, "introduction": 1.3}},

136 source_type_preferences={"documentation": 1.3, "confluence": 1.1},

137 expand_query=True,

138 expansion_aggressiveness=0.5, # Aggressive expansion for discovery

139 semantic_expansion=True,

140 entity_expansion=True,

141 max_results=30,

142 min_score_threshold=0.05,

143 diversity_factor=0.4, # Encourage diverse perspectives

144 authority_bias=0.3,

145 ),

146 IntentType.TROUBLESHOOTING: AdaptiveSearchConfig(

147 search_strategy="hybrid",

148 vector_weight=0.6,

149 keyword_weight=0.4, # Higher keyword weight for specific errors

150 use_knowledge_graph=True,

151 kg_traversal_strategy=None, # Will be set in __post_init__ if available

152 max_graph_hops=2,

153 kg_expansion_weight=0.15,

154 result_filters={"content_type": ["troubleshooting", "fix", "solution"]},

155 ranking_boosts={

156 "has_problem_indicators": 1.4,

157 "section_type": {"solution": 1.5},

158 },

159 source_type_preferences={"git": 1.3, "documentation": 1.2},

160 expand_query=False, # Don't expand error-specific queries

161 expansion_aggressiveness=0.1,

162 semantic_expansion=False,

163 entity_expansion=False,

164 max_results=10,

165 min_score_threshold=0.2,

166 temporal_bias=0.3, # Prefer recent solutions

167 ),

168 IntentType.EXPLORATORY: AdaptiveSearchConfig(

169 search_strategy="vector", # Vector-first for exploration

170 vector_weight=0.85,

171 keyword_weight=0.15,

172 use_knowledge_graph=True,

173 kg_traversal_strategy=None, # Will be set in __post_init__ if available

174 max_graph_hops=4, # Deeper exploration

175 kg_expansion_weight=0.5, # Maximum expansion

176 result_filters={},

177 ranking_boosts={},

178 source_type_preferences={},

179 expand_query=True,

180 expansion_aggressiveness=0.6, # Very aggressive expansion

181 semantic_expansion=True,

182 entity_expansion=True,

183 max_results=40, # More results for exploration

184 min_score_threshold=0.03, # Lower threshold

185 diversity_factor=0.6, # Maximum diversity

186 authority_bias=0.1,

187 ),

188 # Fallback configuration

189 IntentType.GENERAL: AdaptiveSearchConfig(

190 search_strategy="hybrid",

191 vector_weight=0.7,

192 keyword_weight=0.3,

193 use_knowledge_graph=False,

194 expand_query=True,

195 expansion_aggressiveness=0.3,

196 semantic_expansion=True,

197 entity_expansion=True,

198 max_results=20,

199 min_score_threshold=0.1,

200 ),

201 }

202

203 # Set TraversalStrategy defaults if available

204 self._set_traversal_strategies()

205

206 logger.info(

207 "Initialized adaptive search strategy with intent-specific configurations"

208 )

209

210 def _set_traversal_strategies(self):

211 """Set default TraversalStrategy values if available."""

212 if TraversalStrategy is not None:

213 # Set specific traversal strategies for each intent type

214 traversal_map = {

215 IntentType.TECHNICAL_LOOKUP: TraversalStrategy.SEMANTIC,

216 IntentType.BUSINESS_CONTEXT: TraversalStrategy.WEIGHTED,

217 IntentType.VENDOR_EVALUATION: TraversalStrategy.CENTRALITY,

218 IntentType.PROCEDURAL: TraversalStrategy.BREADTH_FIRST,

219 IntentType.INFORMATIONAL: TraversalStrategy.SEMANTIC,

220 IntentType.TROUBLESHOOTING: TraversalStrategy.WEIGHTED,

221 IntentType.EXPLORATORY: TraversalStrategy.BREADTH_FIRST,

222 }

223

224 for intent_type, traversal_strategy in traversal_map.items():

225 if intent_type in self.intent_configs:

226 self.intent_configs[intent_type].kg_traversal_strategy = (

227 traversal_strategy

228 )

229

230 def adapt_search(

231 self,

232 search_intent: SearchIntent,

233 query: str,

234 _base_results=None,

235 ) -> AdaptiveSearchConfig:

236 """Adapt search configuration based on classified intent."""

237

238 try:

239 # Get base configuration for the primary intent

240 config = self._get_base_config(search_intent.intent_type)

241

242 # Apply confidence-based adjustments

243 config = self._apply_confidence_adjustments(config, search_intent)

244

245 # Apply secondary intent blending

246 if search_intent.secondary_intents:

247 config = self._blend_secondary_intents(

248 config, search_intent.secondary_intents

249 )

250

251 # Apply query-specific adaptations

252 config = self._apply_query_adaptations(config, search_intent, query)

253

254 # Apply session context adaptations

255 if search_intent.session_context:

256 config = self._apply_session_adaptations(

257 config, search_intent.session_context

258 )

259

260 logger.debug(

261 f"Adapted search configuration for {search_intent.intent_type.value}",

262 confidence=search_intent.confidence,

263 vector_weight=config.vector_weight,

264 use_kg=config.use_knowledge_graph,

265 max_results=config.max_results,

266 )

267

268 return config

269

270 except Exception as e:

271 logger.error(f"Failed to adapt search configuration: {e}")

272 return self.intent_configs[IntentType.GENERAL]

273

274 def _get_base_config(self, intent_type: IntentType) -> AdaptiveSearchConfig:

275 """Get base configuration for intent type."""

276 return self.intent_configs.get(

277 intent_type, self.intent_configs[IntentType.GENERAL]

278 )

279

280 def _apply_confidence_adjustments(

281 self, config: AdaptiveSearchConfig, search_intent: SearchIntent

282 ) -> AdaptiveSearchConfig:

283 """Apply confidence-based adjustments to the configuration."""

284

285 # Low confidence: reduce aggressiveness, increase diversity

286 if search_intent.confidence < 0.5:

287 config.expansion_aggressiveness *= 0.7

288 config.diversity_factor = min(1.0, config.diversity_factor + 0.2)

289 config.min_score_threshold *= 0.8

290

291 # High confidence: increase precision, reduce diversity

292 elif search_intent.confidence > 0.8:

293 config.expansion_aggressiveness *= 1.3

294 config.diversity_factor *= 0.7

295 config.min_score_threshold *= 1.2

296

297 return config

298

299 def _blend_secondary_intents(

300 self,

301 config: AdaptiveSearchConfig,

302 secondary_intents: list[tuple[IntentType, float]],

303 ) -> AdaptiveSearchConfig:

304 """Blend secondary intent configurations with primary."""

305

306 for intent_type, confidence in secondary_intents:

307 if confidence > 0.3: # Only blend significant secondary intents

308 secondary_config = self.intent_configs.get(intent_type)

309 if secondary_config:

310 blend_factor = confidence * 0.3 # Max 30% blending

311

312 # Blend key parameters

313 config.vector_weight = (

314 config.vector_weight * (1 - blend_factor)

315 + secondary_config.vector_weight * blend_factor

316 )

317 config.expansion_aggressiveness = (

318 config.expansion_aggressiveness * (1 - blend_factor)

319 + secondary_config.expansion_aggressiveness * blend_factor

320 )

321 # Safely handle potential None values for diversity_factor

322 left = (

323 config.diversity_factor

324 if config.diversity_factor is not None

325 else 0

326 )

327 right_base = (

328 secondary_config.diversity_factor

329 if secondary_config.diversity_factor is not None

330 else 0

331 )

332 right = right_base * blend_factor

333 config.diversity_factor = max(left, right)

334

335 return config

336

337 def _apply_query_adaptations(

338 self, config: AdaptiveSearchConfig, search_intent: SearchIntent, query: str

339 ) -> AdaptiveSearchConfig:

340 """Apply query-specific adaptations."""

341

342 # Short queries: increase expansion

343 if len(query.split()) <= 3:

344 config.expansion_aggressiveness *= 1.4

345 config.semantic_expansion = True

346

347 # Long queries: reduce expansion, increase precision

348 elif len(query.split()) >= 8:

349 config.expansion_aggressiveness *= 0.7

350 config.min_score_threshold *= 1.2

351

352 # Very complex queries: use knowledge graph more aggressively

353 if search_intent.query_complexity > 0.7:

354 config.use_knowledge_graph = True

355 config.kg_expansion_weight *= 1.3

356 config.max_graph_hops = min(4, config.max_graph_hops + 1)

357

358 # Question queries: increase semantic weight

359 if search_intent.is_question:

360 config.vector_weight = min(0.9, config.vector_weight + 0.1)

361 config.semantic_expansion = True

362

363 # Technical queries: boost technical sources

364 if search_intent.is_technical:

365 config.source_type_preferences["git"] = (

366 config.source_type_preferences.get("git", 1.0) * 1.2

367 )

368 config.authority_bias *= 1.2

369

370 return config

371

372 def _apply_session_adaptations(

373 self, config: AdaptiveSearchConfig, session_context: dict[str, Any]

374 ) -> AdaptiveSearchConfig:

375 """Apply session context adaptations."""

376

377 # Time-sensitive sessions: increase temporal bias

378 if session_context.get("urgency") == "high":

379 config.temporal_bias = min(1.0, config.temporal_bias + 0.3)

380 config.max_results = min(15, config.max_results)

381

382 # Learning sessions: increase diversity and expansion

383 session_type = session_context.get("session_type", "")

384 if session_type == "learning":

385 config.diversity_factor = min(1.0, config.diversity_factor + 0.2)

386 config.expansion_aggressiveness *= 1.2

387 config.max_results = min(30, config.max_results + 5)

388

389 # Focused sessions: increase precision

390 elif session_type == "focused":

391 config.min_score_threshold *= 1.3

392 config.expansion_aggressiveness *= 0.8

393 config.max_results = max(10, config.max_results - 5)

394

395 # User experience level

396 experience_level = session_context.get("experience_level", "intermediate")

397 if experience_level == "beginner":

398 config.source_type_preferences["documentation"] = 1.4

399 config.ranking_boosts["section_type"] = {

400 "introduction": 1.5,

401 "overview": 1.4,

402 }

403 elif experience_level == "expert":

404 config.source_type_preferences["git"] = 1.3

405 config.ranking_boosts["section_type"] = {

406 "implementation": 1.4,

407 "advanced": 1.3,

408 }

409

410 return config

411

412 def get_strategy_stats(self) -> dict[str, Any]:

413 """Get adaptive search strategy statistics."""

414 stats = {

415 "intent_types_supported": len(self.intent_configs),

416 "has_knowledge_graph": self.knowledge_graph is not None,

417 "strategy_types": list(

418 {config.search_strategy for config in self.intent_configs.values()}

419 ),

420 }

421

422 # Add traversal strategies if TraversalStrategy is available

423 if TraversalStrategy is not None:

424 stats["traversal_strategies"] = list(

425 {

426 config.kg_traversal_strategy.value

427 for config in self.intent_configs.values()

428 if config.use_knowledge_graph and config.kg_traversal_strategy

429 }

430 )

431

432 return stats