Coverage for src/qdrant_loader_mcp_server/search/processor.py: 86%
44 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:45 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:45 +0000
1"""Query processor for handling search queries."""
3import re
4from typing import Any
6from openai import AsyncOpenAI
8from ..config import OpenAIConfig
9from ..utils.logging import LoggingConfig
12class QueryProcessor:
13 """Query processor for handling search queries."""
15 def __init__(self, openai_config: OpenAIConfig):
16 """Initialize the query processor."""
17 self.openai_client: AsyncOpenAI | None = AsyncOpenAI(
18 api_key=openai_config.api_key
19 )
20 self.logger = LoggingConfig.get_logger(__name__)
22 async def process_query(self, query: str) -> dict[str, Any]:
23 """Process a search query.
25 Args:
26 query: The search query string
28 Returns:
29 Processed query information including intent and filters
30 """
31 try:
32 # Clean and normalize query
33 cleaned_query = self._clean_query(query)
35 # Handle empty queries
36 if not cleaned_query:
37 return {
38 "query": cleaned_query,
39 "intent": "general",
40 "source_type": None,
41 "processed": False,
42 }
44 # Infer query intent
45 intent, inference_failed = await self._infer_intent(cleaned_query)
47 # Extract source type if present
48 source_type = self._extract_source_type(cleaned_query, intent)
50 return {
51 "query": cleaned_query,
52 "intent": intent,
53 "source_type": source_type,
54 "processed": not inference_failed,
55 }
56 except Exception as e:
57 self.logger.error("Query processing failed", error=str(e), query=query)
58 # Return fallback response instead of raising exception
59 return {
60 "query": query,
61 "intent": "general",
62 "source_type": None,
63 "processed": False,
64 }
66 def _clean_query(self, query: str) -> str:
67 """Clean and normalize the query.
69 Args:
70 query: The raw query string
72 Returns:
73 Cleaned query string
74 """
75 # Remove extra whitespace
76 query = re.sub(r"\s+", " ", query.strip())
77 return query
79 async def _infer_intent(self, query: str) -> tuple[str, bool]:
80 """Infer the intent of the query using OpenAI.
82 Args:
83 query: The cleaned query string
85 Returns:
86 Tuple of (inferred intent, whether inference failed)
87 """
88 try:
89 if self.openai_client is None:
90 raise RuntimeError("OpenAI client not initialized")
92 response = await self.openai_client.chat.completions.create(
93 model="gpt-3.5-turbo",
94 messages=[
95 {
96 "role": "system",
97 "content": "You are a query intent classifier. Classify the query into one of these categories: code, documentation, issue, or general. Respond with just the category name.",
98 },
99 {"role": "user", "content": query},
100 ],
101 temperature=0,
102 )
104 if not response.choices or not response.choices[0].message:
105 return "general", False # Default to general if no response
107 content = response.choices[0].message.content
108 if not content:
109 return "general", False # Default to general if empty content
111 return content.strip().lower(), False
112 except Exception as e:
113 self.logger.error("Intent inference failed", error=str(e), query=query)
114 return (
115 "general",
116 True,
117 ) # Default to general if inference fails, mark as failed
119 def _extract_source_type(self, query: str, intent: str) -> str | None:
120 """Extract source type from query and intent.
122 Args:
123 query: The cleaned query string
124 intent: The inferred intent
126 Returns:
127 Source type if found, None otherwise
128 """
129 # Check for explicit source type mentions
130 source_keywords = {
131 "git": ["git", "code", "repository", "repo"],
132 "confluence": ["confluence", "doc", "documentation", "wiki"],
133 "jira": ["jira", "issue", "ticket", "bug"],
134 "localfile": ["localfile", "local", "file", "files", "filesystem", "disk"],
135 }
137 # Check for explicit source type mentions
138 query_lower = query.lower()
139 for source_type, keywords in source_keywords.items():
140 if any(keyword in query_lower for keyword in keywords):
141 return source_type
143 # Return None to search across all source types
144 return None