Coverage for src/qdrant_loader_mcp_server/search/enhanced/cdi/extractors/clustering.py: 86%
14 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1from __future__ import annotations
3from ....models import SearchResult
4from ..interfaces import Clusterer
5from ..models import ClusteringStrategy, DocumentCluster
8class DefaultClusterer(Clusterer):
9 """Adapter to legacy DocumentClusterAnalyzer for behavior parity."""
11 def __init__(self, similarity_calculator):
12 """Initialize the clusterer.
14 The provided `similarity_calculator` is expected to expose a `spacy_analyzer`
15 attribute compatible with the legacy `DocumentSimilarityCalculator`.
17 - If `spacy_analyzer` is present, it will be used to construct the legacy
18 similarity calculator and analyzer.
19 - If absent, a clear ValueError is raised describing the missing attribute
20 and expected type, rather than failing with an AttributeError later.
21 """
22 if not hasattr(similarity_calculator, "spacy_analyzer"):
23 raise ValueError(
24 "similarity_calculator must provide a 'spacy_analyzer' attribute compatible "
25 "with the legacy DocumentSimilarityCalculator."
26 )
27 # Import from CDI modules directly to avoid cycles via re-export module
28 from ..analyzers import (
29 DocumentClusterAnalyzer as LegacyClusterAnalyzer, # type: ignore[misc]
30 )
31 from ..calculators import (
32 DocumentSimilarityCalculator as LegacySimilarityCalculator, # type: ignore[misc]
33 )
35 self._legacy_similarity = LegacySimilarityCalculator(similarity_calculator.spacy_analyzer) # type: ignore[attr-defined]
36 self._legacy = LegacyClusterAnalyzer(self._legacy_similarity)
38 def cluster(
39 self,
40 results: list[SearchResult],
41 strategy: ClusteringStrategy | None = None,
42 max_clusters: int | None = None,
43 min_cluster_size: int | None = None,
44 ) -> list[DocumentCluster]:
45 return self._legacy.create_clusters(
46 results,
47 strategy=strategy or ClusteringStrategy.MIXED_FEATURES,
48 max_clusters=max_clusters or 10,
49 min_cluster_size=min_cluster_size or 2,
50 )