Coverage for src/qdrant_loader_mcp_server/search/enhanced/intent/classifier.py: 91%

1"""

2Intent Classification Engine for Search Enhancement.

4This module implements the main IntentClassifier that uses spaCy analysis and

5behavioral patterns to classify search intents with high accuracy.

6"""

8from __future__ import annotations

10import time

11from typing import TYPE_CHECKING, Any

13from ....utils.logging import LoggingConfig

14from .models import IntentType, SearchIntent

16_SPACY_IMPORT_ERROR: BaseException | None = None

18if TYPE_CHECKING:

19 from ...nlp.spacy_analyzer import QueryAnalysis, SpaCyQueryAnalyzer

20else:

21 try:

22 from ...nlp.spacy_analyzer import QueryAnalysis, SpaCyQueryAnalyzer

23 except (

24 ImportError,

25 ModuleNotFoundError,

26 ) as _exc: # pragma: no cover - optional dep

27 # Provide safe sentinels for runtime to avoid NameErrors in annotations

28 QueryAnalysis = Any # type: ignore[assignment]

29 SpaCyQueryAnalyzer = Any # type: ignore[assignment]

30 _SPACY_IMPORT_ERROR = _exc

32logger = LoggingConfig.get_logger(__name__)

35class IntentClassifier:

36 """Advanced intent classification using spaCy analysis and behavioral patterns."""

38 def __init__(self, spacy_analyzer):

39 """Initialize the intent classifier.

41 The constructor validates that the spaCy analyzer dependency is available.

42 If a valid analyzer instance is not provided, it attempts a runtime import.

43 On failure, it raises an ImportError with actionable guidance so callers

44 fail fast rather than encountering None-attribute errors later.

45 """

46 if spacy_analyzer is None:

47 # Do not perform ad-hoc imports here; require explicit injection

48 if _SPACY_IMPORT_ERROR is not None:

49 raise ImportError(

50 "SpaCyQueryAnalyzer is not available. Install optional NLP deps (spacy and model) "

51 "and provide an initialized analyzer instance to IntentClassifier."

52 ) from _SPACY_IMPORT_ERROR

53 raise ImportError(

54 "A spaCy analyzer instance must be provided to IntentClassifier. "

55 "Use SpaCyQueryAnalyzer() and pass it explicitly."

56 )

57 self.spacy_analyzer = spacy_analyzer

59 # Final sanity check to fail fast if analyzer is misconfigured

60 if not hasattr(self.spacy_analyzer, "analyze_query_semantic"):

61 raise ImportError(

62 "Provided spaCy analyzer does not implement 'analyze_query_semantic'. "

63 "Pass a compatible analyzer or install the default SpaCyQueryAnalyzer."

64 )

65 self.logger = LoggingConfig.get_logger(__name__)

67 # Intent classification patterns using spaCy linguistic features

68 self.intent_patterns = {

69 IntentType.TECHNICAL_LOOKUP: {

70 "keywords": {

71 "api",

72 "apis",

73 "endpoint",

74 "endpoints",

75 "function",

76 "functions",

77 "method",

78 "methods",

79 "class",

80 "classes",

81 "library",

82 "libraries",

83 "framework",

84 "frameworks",

85 "code",

86 "implementation",

87 "syntax",

88 "documentation",

89 "docs",

90 "reference",

91 "specification",

92 "protocol",

93 },

94 "pos_patterns": [

95 ["NOUN", "NOUN"], # "API documentation"

96 ["ADJ", "NOUN"], # "REST API"

97 ["VERB", "NOUN"], # "implement authentication"

98 ["NOUN", "VERB"], # "code example"

99 ],

100 "entity_types": {"PRODUCT", "ORG", "LANGUAGE"},

101 "question_words": {"how", "what"},

102 "linguistic_indicators": {

103 "has_code_terms": True,

104 "technical_complexity": 0.6,

105 "verb_imperative": True,

106 },

107 "weight": 1.0,

108 },

109 IntentType.BUSINESS_CONTEXT: {

110 "keywords": {

111 "requirements",

112 "requirement",

113 "objectives",

114 "objective",

115 "goals",

116 "goal",

117 "strategy",

118 "strategies",

119 "business",

120 "scope",

121 "stakeholder",

122 "stakeholders",

123 "budget",

124 "timeline",

125 "deliverable",

126 "deliverables",

127 "milestone",

128 "criteria",

129 "specification",

130 "specifications",

131 "priority",

132 "priorities",

133 },

134 "pos_patterns": [

135 ["NOUN", "NOUN"], # "business requirements"

136 ["ADJ", "NOUN"], # "functional requirements"

137 ["MODAL", "VERB"], # "should implement"

138 ["DET", "NOUN", "VERB"], # "the system should"

139 ],

140 "entity_types": {"ORG", "MONEY", "PERCENT", "CARDINAL"},

141 "question_words": {"what", "why", "which"},

142 "linguistic_indicators": {

143 "has_business_terms": True,

144 "formal_language": True,

145 "future_tense": True,

146 },

147 "weight": 1.0,

148 },

149 IntentType.VENDOR_EVALUATION: {

150 "keywords": {

151 "vendor",

152 "vendors",

153 "supplier",

154 "suppliers",

155 "proposal",

156 "proposals",

157 "bid",

158 "bids",

159 "quote",

160 "quotes",

161 "cost",

162 "costs",

163 "price",

164 "pricing",

165 "comparison",

166 "compare",

167 "evaluate",

168 "evaluation",

169 "criteria",

170 "selection",

171 "recommendation",

172 "assessment",

173 "analysis",

174 },

175 "pos_patterns": [

176 ["NOUN", "NOUN"], # "vendor proposal"

177 ["VERB", "NOUN"], # "compare vendors"

178 ["ADJ", "NOUN"], # "best vendor"

179 ["NOUN", "VERB", "ADJ"], # "vendor is better"

180 ],

181 "entity_types": {"ORG", "MONEY", "PERSON"},

182 "question_words": {"which", "who", "what", "how much"},

183 "linguistic_indicators": {

184 "has_comparison": True,

185 "has_evaluation_terms": True,

186 "superlative_forms": True,

187 },

188 "weight": 1.0,

189 },

190 IntentType.PROCEDURAL: {

191 "keywords": {

192 "how",

193 "steps",

194 "step",

195 "process",

196 "procedure",

197 "guide",

198 "tutorial",

199 "walkthrough",

200 "instructions",

201 "setup",

202 "configure",

203 "install",

204 "deploy",

205 "implement",

206 "create",

207 "build",

208 "make",

209 "do",

210 },

211 "pos_patterns": [

212 ["VERB", "NOUN"], # "install package"

213 ["VERB", "DET", "NOUN"], # "setup the system"

214 ["ADV", "VERB"], # "how configure"

215 ["NOUN", "VERB"], # "steps install"

216 ],

217 "entity_types": set(),

218 "question_words": {"how", "when", "where"},

219 "linguistic_indicators": {

220 "imperative_mood": True,

221 "action_oriented": True,

222 "sequential_indicators": True,

223 },

224 "weight": 1.0,

225 },

226 IntentType.INFORMATIONAL: {

227 "keywords": {

228 "what",

229 "definition",

230 "meaning",

231 "explain",

232 "overview",

233 "about",

234 "introduction",

235 "basics",

236 "fundamentals",

237 "concept",

238 "concepts",

239 "understand",

240 "learn",

241 "know",

242 "information",

243 "details",

244 },

245 "pos_patterns": [

246 ["NOUN"], # "authentication"

247 ["ADJ", "NOUN"], # "basic concept"

248 ["VERB", "NOUN"], # "understand API"

249 ["NOUN", "VERB"], # "concept explains"

250 ],

251 "entity_types": set(),

252 "question_words": {"what", "who", "when", "where"},

253 "linguistic_indicators": {

254 "knowledge_seeking": True,

255 "present_tense": True,

256 "general_terms": True,

257 },

258 "weight": 1.0,

259 },

260 IntentType.TROUBLESHOOTING: {

261 "keywords": {

262 "error",

263 "errors",

264 "problem",

265 "problems",

266 "issue",

267 "issues",

268 "bug",

269 "bugs",

270 "fix",

271 "fixes",

272 "solve",

273 "solution",

274 "solutions",

275 "troubleshoot",

276 "debug",

277 "debugging",

278 "failed",

279 "failing",

280 "broken",

281 "not working",

282 "doesn't work",

283 },

284 "pos_patterns": [

285 ["NOUN", "VERB"], # "error occurs"

286 ["VERB", "NOUN"], # "fix error"

287 ["ADJ", "NOUN"], # "broken system"

288 ["NOUN", "ADJ"], # "system broken"

289 ],

290 "entity_types": set(),

291 "question_words": {"why", "how", "what"},

292 "linguistic_indicators": {

293 "negative_sentiment": True,

294 "problem_indicators": True,

295 "past_tense": True,

296 },

297 "weight": 1.0,

298 },

299 IntentType.EXPLORATORY: {

300 "keywords": {

301 "explore",

302 "discover",

303 "find",

304 "search",

305 "browse",

306 "look",

307 "see",

308 "show",

309 "list",

310 "available",

311 "options",

312 "alternatives",

313 "similar",

314 "related",

315 "examples",

316 "samples",

317 },

318 "pos_patterns": [

319 ["VERB"], # "explore"

320 ["VERB", "NOUN"], # "find examples"

321 ["ADJ", "NOUN"], # "similar tools"

322 ["DET", "NOUN"], # "some options"

323 ],

324 "entity_types": set(),

325 "question_words": {"what", "which"},

326 "linguistic_indicators": {

327 "open_ended": True,

328 "discovery_oriented": True,

329 "broad_scope": True,

330 },

331 "weight": 0.8,

332 },

333 }

334

335 # Behavioral pattern recognition

336 self.session_patterns = {

337 "technical_session": [IntentType.TECHNICAL_LOOKUP, IntentType.PROCEDURAL],

338 "business_session": [

339 IntentType.BUSINESS_CONTEXT,

340 IntentType.VENDOR_EVALUATION,

341 ],

342 "learning_session": [

343 IntentType.INFORMATIONAL,

344 IntentType.EXPLORATORY,

345 IntentType.PROCEDURAL,

346 ],

347 "problem_solving": [

348 IntentType.TROUBLESHOOTING,

349 IntentType.PROCEDURAL,

350 IntentType.TECHNICAL_LOOKUP,

351 ],

352 }

353

354 # Cache for intent classification results

355 self._intent_cache: dict[str, SearchIntent] = {}

356

357 logger.info("Initialized intent classifier with spaCy integration")

358

359 def classify_intent(

360 self,

361 query: str,

362 session_context: dict[str, Any] | None = None,

363 behavioral_context: list[str] | None = None,

364 ) -> SearchIntent:

365 """Classify search intent using comprehensive spaCy analysis."""

366

367 start_time = time.time()

368

369 # Check cache first

370 cache_key = f"{query}:{str(session_context)}:{str(behavioral_context)}"

371 if cache_key in self._intent_cache:

372 cached = self._intent_cache[cache_key]

373 logger.debug(f"Using cached intent classification for: {query[:50]}...")

374 return cached

375

376 # Ensure analyzer is available and valid (extra safety beyond __init__)

377 if not hasattr(self.spacy_analyzer, "analyze_query_semantic"):

378 raise ImportError(

379 "SpaCy analyzer is not initialized correctly. Missing 'analyze_query_semantic'."

380 )

381

382 try:

383 # Step 1: Perform spaCy semantic analysis

384 spacy_analysis = self.spacy_analyzer.analyze_query_semantic(query)

385

386 # Step 2: Extract linguistic features for intent classification

387 linguistic_features = self._extract_linguistic_features(

388 spacy_analysis, query

389 )

390

391 # Step 3: Score each intent type using pattern matching

392 intent_scores = self._score_intent_patterns(

393 spacy_analysis, linguistic_features, query

394 )

395

396 # Step 4: Apply behavioral context weighting

397 if behavioral_context:

398 intent_scores = self._apply_behavioral_weighting(

399 intent_scores, behavioral_context

400 )

401

402 # Step 5: Apply session context boosting

403 if session_context:

404 intent_scores = self._apply_session_context(

405 intent_scores, session_context

406 )

407

408 # Step 6: Determine primary and secondary intents

409 primary_intent, confidence = self._select_primary_intent(intent_scores)

410 secondary_intents = self._select_secondary_intents(

411 intent_scores, primary_intent

412 )

413

414 # Step 7: Build supporting evidence

415 supporting_evidence = self._build_evidence(

416 spacy_analysis, linguistic_features, intent_scores

417 )

418

419 # Step 8: Create intent result

420 classification_time = (time.time() - start_time) * 1000

421

422 search_intent = SearchIntent(

423 intent_type=primary_intent,

424 confidence=confidence,

425 secondary_intents=secondary_intents,

426 supporting_evidence=supporting_evidence,

427 linguistic_features=linguistic_features,

428 query_complexity=spacy_analysis.complexity_score,

429 is_question=spacy_analysis.is_question,

430 is_technical=spacy_analysis.is_technical,

431 session_context=session_context or {},

432 previous_intents=behavioral_context or [],

433 classification_time_ms=classification_time,

434 )

435

436 # Cache the result

437 self._intent_cache[cache_key] = search_intent

438

439 logger.debug(

440 f"Classified intent in {classification_time:.2f}ms",

441 query_length=len(query),

442 primary_intent=primary_intent.value,

443 confidence=confidence,

444 secondary_count=len(secondary_intents),

445 )

446

447 return search_intent

448

449 except Exception as e:

450 logger.error(f"Intent classification failed: {e}")

451 # Return fallback intent

452 classification_time = (time.time() - start_time) * 1000

453 return SearchIntent(

454 intent_type=IntentType.GENERAL,

455 confidence=0.5,

456 classification_time_ms=classification_time,

457 )

458

459 def _extract_linguistic_features(

460 self, spacy_analysis, query: str

461 ) -> dict[str, Any]:

462 """Extract comprehensive linguistic features for intent classification."""

463

464 features = {

465 # Basic query characteristics

466 "query_length": len(query.split()),

467 "has_question_mark": "?" in query,

468 "starts_with_question_word": False,

469 "starts_with_verb": False,

470 "has_imperative_verbs": False,

471 "has_modal_verbs": False,

472 # spaCy-derived features

473 "entity_count": len(spacy_analysis.entities),

474 "concept_count": len(spacy_analysis.main_concepts),

475 "keyword_count": len(spacy_analysis.semantic_keywords),

476 "pos_diversity": len(set(spacy_analysis.pos_patterns)),

477 # Semantic features

478 "technical_indicators": 0,

479 "business_indicators": 0,

480 "procedural_indicators": 0,

481 "problem_indicators": 0,

482 # Entity type analysis

483 "entity_types": [ent[1] for ent in spacy_analysis.entities],

484 "has_org_entities": any(ent[1] == "ORG" for ent in spacy_analysis.entities),

485 "has_product_entities": any(

486 ent[1] == "PRODUCT" for ent in spacy_analysis.entities

487 ),

488 "has_person_entities": any(

489 ent[1] == "PERSON" for ent in spacy_analysis.entities

490 ),

491 "has_money_entities": any(

492 ent[1] == "MONEY" for ent in spacy_analysis.entities

493 ),

494 }

495

496 # Analyze question word patterns

497 question_words = {

498 "what",

499 "how",

500 "why",

501 "when",

502 "who",

503 "where",

504 "which",

505 "whose",

506 }

507 query_lower = query.lower()

508 first_word = query_lower.split()[0] if query_lower.split() else ""

509 features["starts_with_question_word"] = first_word in question_words

510

511 # Count technical, business, and procedural indicators

512 technical_terms = {

513 "api",

514 "code",

515 "function",

516 "method",

517 "library",

518 "framework",

519 "implementation",

520 }

521 business_terms = {

522 "requirements",

523 "objectives",

524 "strategy",

525 "business",

526 "scope",

527 "criteria",

528 }

529 procedural_terms = {

530 "how",

531 "steps",

532 "process",

533 "guide",

534 "setup",

535 "install",

536 "configure",

537 }

538 problem_terms = {

539 "error",

540 "problem",

541 "issue",

542 "bug",

543 "fix",

544 "solve",

545 "broken",

546 "failed",

547 }

548

549 keywords_lower = [kw.lower() for kw in spacy_analysis.semantic_keywords]

550 features["technical_indicators"] = sum(

551 1 for term in technical_terms if term in keywords_lower

552 )

553 features["business_indicators"] = sum(

554 1 for term in business_terms if term in keywords_lower

555 )

556 features["procedural_indicators"] = sum(

557 1 for term in procedural_terms if term in keywords_lower

558 )

559 features["problem_indicators"] = sum(

560 1 for term in problem_terms if term in keywords_lower

561 )

562

563 # POS pattern analysis

564 pos_patterns = spacy_analysis.pos_patterns

565 features["starts_with_verb"] = bool(pos_patterns) and pos_patterns[0] == "VERB"

566 # Imperative: sentence starts with a verb and does not start with a question word

567 features["has_imperative_verbs"] = (

568 ("VERB" in pos_patterns)

569 and features["starts_with_verb"]

570 and not features.get("starts_with_question_word", False)

571 )

572 features["has_modal_verbs"] = any(

573 pos in ["MD", "MODAL"] for pos in pos_patterns

574 )

575

576 return features

577

578 def _score_intent_patterns(

579 self,

580 spacy_analysis,

581 linguistic_features: dict[str, Any],

582 query: str,

583 ) -> dict[IntentType, float]:

584 """Score each intent type using pattern matching."""

585

586 intent_scores = {}

587 keywords_set = {kw.lower() for kw in spacy_analysis.semantic_keywords}

588

589 for intent_type, pattern in self.intent_patterns.items():

590 score = 0.0

591

592 # 1. Keyword matching (40% weight)

593 keyword_matches = len(keywords_set.intersection(pattern["keywords"]))

594 keyword_score = keyword_matches / max(len(pattern["keywords"]), 1)

595 score += keyword_score * 0.4

596

597 # 2. POS pattern matching (25% weight)

598 pos_score = self._match_pos_patterns(

599 spacy_analysis.pos_patterns, pattern["pos_patterns"]

600 )

601 score += pos_score * 0.25

602

603 # 3. Entity type matching (20% weight)

604 entity_score = self._match_entity_types(

605 spacy_analysis.entities, pattern["entity_types"]

606 )

607 score += entity_score * 0.20

608

609 # 4. Question word matching (10% weight)

610 question_score = self._match_question_words(

611 query, pattern["question_words"]

612 )

613 score += question_score * 0.10

614

615 # 5. Linguistic indicator bonus (5% weight)

616 indicator_score = self._match_linguistic_indicators(

617 linguistic_features, pattern.get("linguistic_indicators", {})

618 )

619 score += indicator_score * 0.05

620

621 # Apply pattern weight

622 score *= pattern.get("weight", 1.0)

623

624 intent_scores[intent_type] = score

625

626 return intent_scores

627

628 def _match_pos_patterns(

629 self, query_pos: list[str], target_patterns: list[list[str]]

630 ) -> float:

631 """Match POS tag patterns in the query."""

632 if not target_patterns or not query_pos:

633 return 0.0

634

635 matches = 0

636 total_patterns = len(target_patterns)

637

638 for pattern in target_patterns:

639 if self._contains_pos_sequence(query_pos, pattern):

640 matches += 1

641

642 return matches / total_patterns

643

644 def _contains_pos_sequence(self, pos_tags: list[str], sequence: list[str]) -> bool:

645 """Check if POS sequence exists in the query."""

646 if len(sequence) > len(pos_tags):

647 return False

648

649 for i in range(len(pos_tags) - len(sequence) + 1):

650 if pos_tags[i : i + len(sequence)] == sequence:

651 return True

652

653 return False

654

655 def _match_entity_types(

656 self, query_entities: list[tuple[str, str]], target_types: set[str]

657 ) -> float:

658 """Match entity types in the query."""

659 if not target_types:

660 return 0.0

661

662 query_entity_types = {ent[1] for ent in query_entities}

663 matches = len(query_entity_types.intersection(target_types))

664

665 return matches / len(target_types)

666

667 def _match_question_words(self, query: str, target_words: set[str]) -> float:

668 """Match question words in the query."""

669 if not target_words:

670 return 0.0

671

672 query_words = set(query.lower().split())

673 matches = len(query_words.intersection(target_words))

674

675 return matches / len(target_words)

676

677 def _match_linguistic_indicators(

678 self, features: dict[str, Any], target_indicators: dict[str, Any]

679 ) -> float:

680 """Match linguistic indicators."""

681 if not target_indicators:

682 return 0.0

683

684 score = 0.0

685 total_indicators = len(target_indicators)

686

687 for indicator, expected_value in target_indicators.items():

688 if indicator in features:

689 if isinstance(expected_value, bool):

690 if features[indicator] == expected_value:

691 score += 1.0

692 elif isinstance(expected_value, int | float):

693 # For numeric indicators, use magnitude-aware similarity

694 actual_value = features.get(indicator, 0)

695 if isinstance(actual_value, int | float):

696 denom = max(abs(expected_value), abs(actual_value), 1.0)

697 similarity = 1.0 - abs(actual_value - expected_value) / denom

698 score += max(0.0, similarity)

699

700 return score / max(total_indicators, 1)

701

702 def _apply_behavioral_weighting(

703 self, intent_scores: dict[IntentType, float], behavioral_context: list[str]

704 ) -> dict[IntentType, float]:

705 """Apply behavioral context weighting to intent scores."""

706

707 if not behavioral_context:

708 return intent_scores

709

710 # Convert string intents to IntentType

711 previous_intents = []

712 for intent_str in behavioral_context[-5:]: # Last 5 intents

713 try:

714 previous_intents.append(IntentType(intent_str))

715 except ValueError:

716 continue

717

718 if not previous_intents:

719 return intent_scores

720

721 weighted_scores = intent_scores.copy()

722

723 # Boost scores for intents that commonly follow previous intents

724 intent_transitions = {

725 IntentType.INFORMATIONAL: [

726 IntentType.PROCEDURAL,

727 IntentType.TECHNICAL_LOOKUP,

728 ],

729 IntentType.TECHNICAL_LOOKUP: [

730 IntentType.PROCEDURAL,

731 IntentType.TROUBLESHOOTING,

732 ],

733 IntentType.BUSINESS_CONTEXT: [

734 IntentType.VENDOR_EVALUATION,

735 IntentType.TECHNICAL_LOOKUP,

736 ],

737 IntentType.VENDOR_EVALUATION: [

738 IntentType.BUSINESS_CONTEXT,

739 IntentType.TECHNICAL_LOOKUP,

740 ],

741 IntentType.PROCEDURAL: [

742 IntentType.TROUBLESHOOTING,

743 IntentType.TECHNICAL_LOOKUP,

744 ],

745 IntentType.TROUBLESHOOTING: [

746 IntentType.PROCEDURAL,

747 IntentType.TECHNICAL_LOOKUP,

748 ],

749 }

750

751 most_recent_intent = previous_intents[-1]

752 likely_next_intents = intent_transitions.get(most_recent_intent, [])

753

754 for intent_type in likely_next_intents:

755 if intent_type in weighted_scores:

756 weighted_scores[intent_type] *= 1.2 # 20% boost

757

758 # Apply session pattern recognition

759 for _pattern_name, pattern_intents in self.session_patterns.items():

760 pattern_match_score = sum(

761 1 for intent in previous_intents if intent in pattern_intents

762 ) / len(pattern_intents)

763

764 if pattern_match_score > 0.5: # More than half of pattern matched

765 for intent_type in pattern_intents:

766 if intent_type in weighted_scores:

767 weighted_scores[intent_type] *= 1.0 + pattern_match_score * 0.3

768

769 return weighted_scores

770

771 def _apply_session_context(

772 self, intent_scores: dict[IntentType, float], session_context: dict[str, Any]

773 ) -> dict[IntentType, float]:

774 """Apply session context to intent scores."""

775

776 weighted_scores = intent_scores.copy()

777

778 # Apply domain context boosting

779 domain = session_context.get("domain", "")

780 if domain == "technical":

781 weighted_scores[IntentType.TECHNICAL_LOOKUP] *= 1.3

782 weighted_scores[IntentType.PROCEDURAL] *= 1.2

783 elif domain == "business":

784 weighted_scores[IntentType.BUSINESS_CONTEXT] *= 1.3

785 weighted_scores[IntentType.VENDOR_EVALUATION] *= 1.2

786

787 # Apply user role context

788 user_role = session_context.get("user_role", "")

789 if user_role in ["developer", "engineer", "architect"]:

790 weighted_scores[IntentType.TECHNICAL_LOOKUP] *= 1.2

791 weighted_scores[IntentType.PROCEDURAL] *= 1.1

792 elif user_role in ["manager", "analyst", "consultant"]:

793 weighted_scores[IntentType.BUSINESS_CONTEXT] *= 1.2

794 weighted_scores[IntentType.VENDOR_EVALUATION] *= 1.1

795

796 # Apply urgency context

797 urgency = session_context.get("urgency", "normal")

798 if urgency == "high":

799 weighted_scores[IntentType.TROUBLESHOOTING] *= 1.4

800 weighted_scores[IntentType.PROCEDURAL] *= 1.2

801

802 return weighted_scores

803

804 def _select_primary_intent(

805 self, intent_scores: dict[IntentType, float]

806 ) -> tuple[IntentType, float]:

807 """Select the primary intent with highest confidence."""

808

809 if not intent_scores:

810 return IntentType.GENERAL, 0.5

811

812 # Find the highest scoring intent

813 primary_intent = max(intent_scores, key=intent_scores.get)

814 raw_score = intent_scores[primary_intent]

815

816 # Normalize confidence score

817 total_score = sum(intent_scores.values())

818 confidence = raw_score / max(total_score, 1.0)

819

820 # Apply confidence threshold

821 if confidence < 0.3:

822 return IntentType.GENERAL, confidence

823

824 return primary_intent, confidence

825

826 def _select_secondary_intents(

827 self, intent_scores: dict[IntentType, float], primary_intent: IntentType

828 ) -> list[tuple[IntentType, float]]:

829 """Select secondary intents with meaningful confidence."""

830

831 secondary_intents = []

832

833 # If primary intent is GENERAL (fallback), don't calculate secondary intents

834 if primary_intent == IntentType.GENERAL or primary_intent not in intent_scores:

835 return secondary_intents

836

837 # Sort intents by score, excluding primary

838 sorted_intents = sorted(

839 [

840 (intent, score)

841 for intent, score in intent_scores.items()

842 if intent != primary_intent

843 ],

844 key=lambda x: x[1],

845 reverse=True,

846 )

847

848 # Include intents with score > 30% of primary intent score

849 primary_score = intent_scores[primary_intent]

850 threshold = primary_score * 0.3

851

852 for intent, score in sorted_intents[:3]: # Max 3 secondary intents

853 if score >= threshold:

854 confidence = score / max(sum(intent_scores.values()), 1.0)

855 secondary_intents.append((intent, confidence))

856

857 return secondary_intents

858

859 def _build_evidence(

860 self,

861 spacy_analysis,

862 linguistic_features: dict[str, Any],

863 intent_scores: dict[IntentType, float],

864 ) -> dict[str, Any]:

865 """Build supporting evidence for the intent classification."""

866

867 return {

868 "spacy_processing_time": spacy_analysis.processing_time_ms,

869 "query_complexity": spacy_analysis.complexity_score,

870 "semantic_keywords": spacy_analysis.semantic_keywords[:5], # Top 5

871 "extracted_entities": [

872 ent[0] for ent in spacy_analysis.entities[:3]

873 ], # Top 3

874 "main_concepts": spacy_analysis.main_concepts[:3], # Top 3

875 "intent_signals": spacy_analysis.intent_signals,

876 "linguistic_features": {

877 "technical_indicators": linguistic_features.get(

878 "technical_indicators", 0

879 ),

880 "business_indicators": linguistic_features.get(

881 "business_indicators", 0

882 ),

883 "procedural_indicators": linguistic_features.get(

884 "procedural_indicators", 0

885 ),

886 "problem_indicators": linguistic_features.get("problem_indicators", 0),

887 },

888 "top_intent_scores": dict(

889 sorted(intent_scores.items(), key=lambda x: x[1], reverse=True)[:3]

890 ),

891 }

892

893 def clear_cache(self):

894 """Clear intent classification cache."""

895 self._intent_cache.clear()

896 logger.debug("Cleared intent classification cache")

897

898 def get_cache_stats(self) -> dict[str, int]:

899 """Get cache statistics."""

900 return {

901 "intent_cache_size": len(self._intent_cache),

902 }