Coverage for src/qdrant_loader_mcp_server/search/enhanced/cdi/extractors/similarity.py: 93%
14 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1from __future__ import annotations
3from .....utils.logging import LoggingConfig
4from ....models import SearchResult
5from ....nlp.spacy_analyzer import SpaCyQueryAnalyzer
6from ..interfaces import SimilarityComputer
7from ..models import (
8 DocumentSimilarity,
9 SimilarityMetric,
10)
13class DefaultSimilarityComputer(SimilarityComputer):
14 """Adapter around the legacy similarity logic."""
16 def __init__(self, spacy_analyzer: SpaCyQueryAnalyzer):
17 # Import directly from CDI calculators to avoid cyclic import via
18 # cross_document_intelligence re-export module.
19 from ..calculators import (
20 DocumentSimilarityCalculator as LegacySimilarityCalculator, # type: ignore[misc]
21 )
23 self._legacy = LegacySimilarityCalculator(spacy_analyzer)
24 self.spacy_analyzer = spacy_analyzer
25 self.logger = LoggingConfig.get_logger(__name__)
27 def compute(
28 self,
29 doc1: SearchResult,
30 doc2: SearchResult,
31 ) -> DocumentSimilarity:
32 # Delegate to the embedded legacy calculator to avoid behavior change
33 return self._legacy.calculate_similarity(
34 doc1,
35 doc2,
36 metrics=[
37 SimilarityMetric.ENTITY_OVERLAP,
38 SimilarityMetric.TOPIC_OVERLAP,
39 SimilarityMetric.METADATA_SIMILARITY,
40 SimilarityMetric.CONTENT_FEATURES,
41 ],
42 )