Coverage for src/qdrant_loader_mcp_server/search/processor.py: 86%

44 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:45 +0000

1"""Query processor for handling search queries.""" 

2 

3import re 

4from typing import Any 

5 

6from openai import AsyncOpenAI 

7 

8from ..config import OpenAIConfig 

9from ..utils.logging import LoggingConfig 

10 

11 

12class QueryProcessor: 

13 """Query processor for handling search queries.""" 

14 

15 def __init__(self, openai_config: OpenAIConfig): 

16 """Initialize the query processor.""" 

17 self.openai_client: AsyncOpenAI | None = AsyncOpenAI( 

18 api_key=openai_config.api_key 

19 ) 

20 self.logger = LoggingConfig.get_logger(__name__) 

21 

22 async def process_query(self, query: str) -> dict[str, Any]: 

23 """Process a search query. 

24 

25 Args: 

26 query: The search query string 

27 

28 Returns: 

29 Processed query information including intent and filters 

30 """ 

31 try: 

32 # Clean and normalize query 

33 cleaned_query = self._clean_query(query) 

34 

35 # Handle empty queries 

36 if not cleaned_query: 

37 return { 

38 "query": cleaned_query, 

39 "intent": "general", 

40 "source_type": None, 

41 "processed": False, 

42 } 

43 

44 # Infer query intent 

45 intent, inference_failed = await self._infer_intent(cleaned_query) 

46 

47 # Extract source type if present 

48 source_type = self._extract_source_type(cleaned_query, intent) 

49 

50 return { 

51 "query": cleaned_query, 

52 "intent": intent, 

53 "source_type": source_type, 

54 "processed": not inference_failed, 

55 } 

56 except Exception as e: 

57 self.logger.error("Query processing failed", error=str(e), query=query) 

58 # Return fallback response instead of raising exception 

59 return { 

60 "query": query, 

61 "intent": "general", 

62 "source_type": None, 

63 "processed": False, 

64 } 

65 

66 def _clean_query(self, query: str) -> str: 

67 """Clean and normalize the query. 

68 

69 Args: 

70 query: The raw query string 

71 

72 Returns: 

73 Cleaned query string 

74 """ 

75 # Remove extra whitespace 

76 query = re.sub(r"\s+", " ", query.strip()) 

77 return query 

78 

79 async def _infer_intent(self, query: str) -> tuple[str, bool]: 

80 """Infer the intent of the query using OpenAI. 

81 

82 Args: 

83 query: The cleaned query string 

84 

85 Returns: 

86 Tuple of (inferred intent, whether inference failed) 

87 """ 

88 try: 

89 if self.openai_client is None: 

90 raise RuntimeError("OpenAI client not initialized") 

91 

92 response = await self.openai_client.chat.completions.create( 

93 model="gpt-3.5-turbo", 

94 messages=[ 

95 { 

96 "role": "system", 

97 "content": "You are a query intent classifier. Classify the query into one of these categories: code, documentation, issue, or general. Respond with just the category name.", 

98 }, 

99 {"role": "user", "content": query}, 

100 ], 

101 temperature=0, 

102 ) 

103 

104 if not response.choices or not response.choices[0].message: 

105 return "general", False # Default to general if no response 

106 

107 content = response.choices[0].message.content 

108 if not content: 

109 return "general", False # Default to general if empty content 

110 

111 return content.strip().lower(), False 

112 except Exception as e: 

113 self.logger.error("Intent inference failed", error=str(e), query=query) 

114 return ( 

115 "general", 

116 True, 

117 ) # Default to general if inference fails, mark as failed 

118 

119 def _extract_source_type(self, query: str, intent: str) -> str | None: 

120 """Extract source type from query and intent. 

121 

122 Args: 

123 query: The cleaned query string 

124 intent: The inferred intent 

125 

126 Returns: 

127 Source type if found, None otherwise 

128 """ 

129 # Check for explicit source type mentions 

130 source_keywords = { 

131 "git": ["git", "code", "repository", "repo"], 

132 "confluence": ["confluence", "doc", "documentation", "wiki"], 

133 "jira": ["jira", "issue", "ticket", "bug"], 

134 "localfile": ["localfile", "local", "file", "files", "filesystem", "disk"], 

135 } 

136 

137 # Check for explicit source type mentions 

138 query_lower = query.lower() 

139 for source_type, keywords in source_keywords.items(): 

140 if any(keyword in query_lower for keyword in keywords): 

141 return source_type 

142 

143 # Return None to search across all source types 

144 return None