Coverage for src/qdrant_loader_mcp_server/search/hybrid/components/deduplication.py: 72%
29 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1from __future__ import annotations
3import json
4from collections.abc import Hashable, Iterable, Mapping
5from typing import Any
8class ResultDeduplicator:
9 """Remove duplicate results based on a stable key function."""
11 def __init__(self, key_attr: str = "id"):
12 self.key_attr = key_attr
14 def deduplicate(self, results: Iterable[Any]) -> list[Any]:
15 seen: set[Hashable] = set()
16 unique: list[Any] = []
17 for item in results:
18 # Support both Mapping (dict-like) and object attributes
19 if isinstance(item, Mapping):
20 key_obj = item.get(self.key_attr, None)
21 else:
22 key_obj = getattr(item, self.key_attr, None)
23 if key_obj is None:
24 unique.append(item)
25 continue
26 # Ensure the key used for set membership is hashable; fallback to string
27 key: Hashable
28 try:
29 hash(key_obj)
30 key = key_obj # type: ignore[assignment]
31 except TypeError:
32 # Deterministic serialization for unhashable keys
33 try:
34 key = json.dumps(key_obj, sort_keys=True, default=str)
35 except Exception:
36 # Fall back to a stable string representation
37 key = repr(key_obj)
38 if key not in seen:
39 seen.add(key)
40 unique.append(item)
41 return unique