Coverage for src/qdrant_loader_mcp_server/search/hybrid/components/deduplication.py: 72%

29 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3import json 

4from collections.abc import Hashable, Iterable, Mapping 

5from typing import Any 

6 

7 

8class ResultDeduplicator: 

9 """Remove duplicate results based on a stable key function.""" 

10 

11 def __init__(self, key_attr: str = "id"): 

12 self.key_attr = key_attr 

13 

14 def deduplicate(self, results: Iterable[Any]) -> list[Any]: 

15 seen: set[Hashable] = set() 

16 unique: list[Any] = [] 

17 for item in results: 

18 # Support both Mapping (dict-like) and object attributes 

19 if isinstance(item, Mapping): 

20 key_obj = item.get(self.key_attr, None) 

21 else: 

22 key_obj = getattr(item, self.key_attr, None) 

23 if key_obj is None: 

24 unique.append(item) 

25 continue 

26 # Ensure the key used for set membership is hashable; fallback to string 

27 key: Hashable 

28 try: 

29 hash(key_obj) 

30 key = key_obj # type: ignore[assignment] 

31 except TypeError: 

32 # Deterministic serialization for unhashable keys 

33 try: 

34 key = json.dumps(key_obj, sort_keys=True, default=str) 

35 except Exception: 

36 # Fall back to a stable string representation 

37 key = repr(key_obj) 

38 if key not in seen: 

39 seen.add(key) 

40 unique.append(item) 

41 return unique