Coverage for src/qdrant_loader_mcp_server/search/engine.py: 92%

49 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:45 +0000

1"""Search engine implementation for the MCP server.""" 

2 

3from typing import Any, Dict, List, Optional 

4 

5from openai import AsyncOpenAI 

6from qdrant_client import QdrantClient 

7from qdrant_client.http import models 

8from qdrant_client.models import Filter 

9 

10from .hybrid_search import HybridSearchEngine 

11from .models import SearchResult 

12from .processor import QueryProcessor 

13from ..config import OpenAIConfig, QdrantConfig 

14from ..utils.logging import LoggingConfig 

15 

16logger = LoggingConfig.get_logger(__name__) 

17 

18 

19class SearchEngine: 

20 """Main search engine that orchestrates query processing and search.""" 

21 

22 def __init__(self): 

23 """Initialize the search engine.""" 

24 self.client: QdrantClient | None = None 

25 self.config: QdrantConfig | None = None 

26 self.openai_client: AsyncOpenAI | None = None 

27 self.hybrid_search: HybridSearchEngine | None = None 

28 self.logger = LoggingConfig.get_logger(__name__) 

29 

30 async def initialize( 

31 self, config: QdrantConfig, openai_config: OpenAIConfig 

32 ) -> None: 

33 """Initialize the search engine with configuration.""" 

34 self.config = config 

35 try: 

36 self.client = QdrantClient(url=config.url, api_key=config.api_key) 

37 self.openai_client = AsyncOpenAI(api_key=openai_config.api_key) 

38 

39 # Ensure collection exists 

40 if self.client is None: 

41 raise RuntimeError("Failed to initialize Qdrant client") 

42 

43 collections = self.client.get_collections().collections 

44 if not any(c.name == config.collection_name for c in collections): 

45 self.client.create_collection( 

46 collection_name=config.collection_name, 

47 vectors_config=models.VectorParams( 

48 size=1536, # Default size for OpenAI embeddings 

49 distance=models.Distance.COSINE, 

50 ), 

51 ) 

52 

53 # Initialize hybrid search 

54 if self.client and self.openai_client: 

55 self.hybrid_search = HybridSearchEngine( 

56 qdrant_client=self.client, 

57 openai_client=self.openai_client, 

58 collection_name=config.collection_name, 

59 ) 

60 

61 self.logger.info("Successfully connected to Qdrant", url=config.url) 

62 except Exception as e: 

63 self.logger.error( 

64 "Failed to connect to Qdrant server", 

65 error=str(e), 

66 url=config.url, 

67 hint="Make sure Qdrant is running and accessible at the configured URL", 

68 ) 

69 raise RuntimeError( 

70 f"Failed to connect to Qdrant server at {config.url}. " 

71 "Please ensure Qdrant is running and accessible." 

72 ) from None # Suppress the original exception 

73 

74 async def cleanup(self) -> None: 

75 """Cleanup resources.""" 

76 if self.client: 

77 self.client.close() 

78 self.client = None 

79 

80 async def search( 

81 self, 

82 query: str, 

83 source_types: list[str] | None = None, 

84 limit: int = 5, 

85 project_ids: list[str] | None = None, 

86 ) -> list[SearchResult]: 

87 """Search for documents using hybrid search. 

88 

89 Args: 

90 query: Search query text 

91 source_types: Optional list of source types to filter by 

92 limit: Maximum number of results to return 

93 project_ids: Optional list of project IDs to filter by 

94 """ 

95 if not self.hybrid_search: 

96 raise RuntimeError("Search engine not initialized") 

97 

98 self.logger.debug( 

99 "Performing search", 

100 query=query, 

101 source_types=source_types, 

102 limit=limit, 

103 project_ids=project_ids, 

104 ) 

105 

106 try: 

107 results = await self.hybrid_search.search( 

108 query=query, 

109 source_types=source_types, 

110 limit=limit, 

111 project_ids=project_ids, 

112 ) 

113 

114 self.logger.info( 

115 "Search completed", 

116 query=query, 

117 result_count=len(results), 

118 project_ids=project_ids, 

119 ) 

120 

121 return results 

122 except Exception as e: 

123 self.logger.error("Search failed", error=str(e), query=query) 

124 raise