Coverage for src/qdrant_loader_mcp_server/search/enhanced/cdi/extractors/clustering.py: 86%

14 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3from ....models import SearchResult 

4from ..interfaces import Clusterer 

5from ..models import ClusteringStrategy, DocumentCluster 

6 

7 

8class DefaultClusterer(Clusterer): 

9 """Adapter to legacy DocumentClusterAnalyzer for behavior parity.""" 

10 

11 def __init__(self, similarity_calculator): 

12 """Initialize the clusterer. 

13 

14 The provided `similarity_calculator` is expected to expose a `spacy_analyzer` 

15 attribute compatible with the legacy `DocumentSimilarityCalculator`. 

16 

17 - If `spacy_analyzer` is present, it will be used to construct the legacy 

18 similarity calculator and analyzer. 

19 - If absent, a clear ValueError is raised describing the missing attribute 

20 and expected type, rather than failing with an AttributeError later. 

21 """ 

22 if not hasattr(similarity_calculator, "spacy_analyzer"): 

23 raise ValueError( 

24 "similarity_calculator must provide a 'spacy_analyzer' attribute compatible " 

25 "with the legacy DocumentSimilarityCalculator." 

26 ) 

27 # Import from CDI modules directly to avoid cycles via re-export module 

28 from ..analyzers import ( 

29 DocumentClusterAnalyzer as LegacyClusterAnalyzer, # type: ignore[misc] 

30 ) 

31 from ..calculators import ( 

32 DocumentSimilarityCalculator as LegacySimilarityCalculator, # type: ignore[misc] 

33 ) 

34 

35 self._legacy_similarity = LegacySimilarityCalculator(similarity_calculator.spacy_analyzer) # type: ignore[attr-defined] 

36 self._legacy = LegacyClusterAnalyzer(self._legacy_similarity) 

37 

38 def cluster( 

39 self, 

40 results: list[SearchResult], 

41 strategy: ClusteringStrategy | None = None, 

42 max_clusters: int | None = None, 

43 min_cluster_size: int | None = None, 

44 ) -> list[DocumentCluster]: 

45 return self._legacy.create_clusters( 

46 results, 

47 strategy=strategy or ClusteringStrategy.MIXED_FEATURES, 

48 max_clusters=max_clusters or 10, 

49 min_cluster_size=min_cluster_size or 2, 

50 )