Coverage for src/qdrant_loader_mcp_server/search/hybrid/orchestration/relationships.py: 92%

1from __future__ import annotations

3from typing import Any

5from ...components.search_result_models import HybridSearchResult

6from ...enhanced.cross_document_intelligence import ( # noqa: F401 - type hint usage

7 ClusteringStrategy,

8 DocumentCluster,

10from ...hybrid.components.relationships import (

11 analyze_content_similarity,

12 analyze_entity_overlap,

13 analyze_hierarchy_relationship,

14 analyze_source_similarity,

15 analyze_topic_overlap,

16)

19def analyze_cluster_relationships(

20 engine: Any, clusters: list[DocumentCluster], documents: list[HybridSearchResult]

21) -> list[dict[str, Any]]:

22 if len(clusters) < 2:

23 return []

25 relationships: list[dict[str, Any]] = []

26 doc_lookup = engine._build_document_lookup(documents, robust=True)

28 for i, cluster_a in enumerate(clusters):

29 for _, cluster_b in enumerate(clusters[i + 1 :], i + 1):

30 relationship = analyze_cluster_pair(

31 engine, cluster_a, cluster_b, doc_lookup

32 )

33 if relationship and relationship["strength"] > 0.1:

34 relationships.append(

35 {

36 "cluster_a_id": cluster_a.cluster_id,

37 "cluster_b_id": cluster_b.cluster_id,

38 "cluster_a_name": cluster_a.name,

39 "cluster_b_name": cluster_b.name,

40 "relationship_type": relationship["type"],

41 "strength": relationship["strength"],

42 "description": relationship["description"],

43 "shared_elements": relationship["shared_elements"],

44 }

45 )

47 relationships.sort(key=lambda x: x["strength"], reverse=True)

48 return relationships[:10]

51def analyze_cluster_pair(

52 engine: Any,

53 cluster_a: DocumentCluster,

54 cluster_b: DocumentCluster,

55 doc_lookup: dict,

56) -> dict[str, Any] | None:

57 docs_a: list[HybridSearchResult] = []

58 for doc_id in cluster_a.documents:

59 doc = engine._find_document_by_id(doc_id, doc_lookup)

60 if doc:

61 docs_a.append(doc)

63 docs_b: list[HybridSearchResult] = []

64 for doc_id in cluster_b.documents:

65 doc = engine._find_document_by_id(doc_id, doc_lookup)

66 if doc:

67 docs_b.append(doc)

69 if not docs_a or not docs_b:

70 return None

72 candidates: list[dict[str, Any]] = []

74 rel = analyze_entity_overlap(cluster_a, cluster_b)

75 if rel:

76 candidates.append(rel)

78 rel = analyze_topic_overlap(cluster_a, cluster_b)

79 if rel:

80 candidates.append(rel)

82 rel = analyze_source_similarity(docs_a, docs_b)

83 if rel:

84 candidates.append(rel)

86 rel = analyze_hierarchy_relationship(docs_a, docs_b)

87 if rel:

88 candidates.append(rel)

90 rel = analyze_content_similarity(docs_a, docs_b)

91 if rel:

92 candidates.append(rel)

94 if candidates:

95 return max(candidates, key=lambda x: x["strength"])

96 return None