Coverage for src/qdrant_loader_mcp_server/search/enhanced/cdi/extractors/similarity.py: 93%

14 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3from .....utils.logging import LoggingConfig 

4from ....models import SearchResult 

5from ....nlp.spacy_analyzer import SpaCyQueryAnalyzer 

6from ..interfaces import SimilarityComputer 

7from ..models import ( 

8 DocumentSimilarity, 

9 SimilarityMetric, 

10) 

11 

12 

13class DefaultSimilarityComputer(SimilarityComputer): 

14 """Adapter around the legacy similarity logic.""" 

15 

16 def __init__(self, spacy_analyzer: SpaCyQueryAnalyzer): 

17 # Import directly from CDI calculators to avoid cyclic import via 

18 # cross_document_intelligence re-export module. 

19 from ..calculators import ( 

20 DocumentSimilarityCalculator as LegacySimilarityCalculator, # type: ignore[misc] 

21 ) 

22 

23 self._legacy = LegacySimilarityCalculator(spacy_analyzer) 

24 self.spacy_analyzer = spacy_analyzer 

25 self.logger = LoggingConfig.get_logger(__name__) 

26 

27 def compute( 

28 self, 

29 doc1: SearchResult, 

30 doc2: SearchResult, 

31 ) -> DocumentSimilarity: 

32 # Delegate to the embedded legacy calculator to avoid behavior change 

33 return self._legacy.calculate_similarity( 

34 doc1, 

35 doc2, 

36 metrics=[ 

37 SimilarityMetric.ENTITY_OVERLAP, 

38 SimilarityMetric.TOPIC_OVERLAP, 

39 SimilarityMetric.METADATA_SIMILARITY, 

40 SimilarityMetric.CONTENT_FEATURES, 

41 ], 

42 )