Coverage for src/qdrant_loader_mcp_server/search/hybrid/orchestration/relationships.py: 92%

49 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3from typing import Any 

4 

5from ...components.search_result_models import HybridSearchResult 

6from ...enhanced.cross_document_intelligence import ( # noqa: F401 - type hint usage 

7 ClusteringStrategy, 

8 DocumentCluster, 

9) 

10from ...hybrid.components.relationships import ( 

11 analyze_content_similarity, 

12 analyze_entity_overlap, 

13 analyze_hierarchy_relationship, 

14 analyze_source_similarity, 

15 analyze_topic_overlap, 

16) 

17 

18 

19def analyze_cluster_relationships( 

20 engine: Any, clusters: list[DocumentCluster], documents: list[HybridSearchResult] 

21) -> list[dict[str, Any]]: 

22 if len(clusters) < 2: 

23 return [] 

24 

25 relationships: list[dict[str, Any]] = [] 

26 doc_lookup = engine._build_document_lookup(documents, robust=True) 

27 

28 for i, cluster_a in enumerate(clusters): 

29 for _, cluster_b in enumerate(clusters[i + 1 :], i + 1): 

30 relationship = analyze_cluster_pair( 

31 engine, cluster_a, cluster_b, doc_lookup 

32 ) 

33 if relationship and relationship["strength"] > 0.1: 

34 relationships.append( 

35 { 

36 "cluster_a_id": cluster_a.cluster_id, 

37 "cluster_b_id": cluster_b.cluster_id, 

38 "cluster_a_name": cluster_a.name, 

39 "cluster_b_name": cluster_b.name, 

40 "relationship_type": relationship["type"], 

41 "strength": relationship["strength"], 

42 "description": relationship["description"], 

43 "shared_elements": relationship["shared_elements"], 

44 } 

45 ) 

46 

47 relationships.sort(key=lambda x: x["strength"], reverse=True) 

48 return relationships[:10] 

49 

50 

51def analyze_cluster_pair( 

52 engine: Any, 

53 cluster_a: DocumentCluster, 

54 cluster_b: DocumentCluster, 

55 doc_lookup: dict, 

56) -> dict[str, Any] | None: 

57 docs_a: list[HybridSearchResult] = [] 

58 for doc_id in cluster_a.documents: 

59 doc = engine._find_document_by_id(doc_id, doc_lookup) 

60 if doc: 

61 docs_a.append(doc) 

62 

63 docs_b: list[HybridSearchResult] = [] 

64 for doc_id in cluster_b.documents: 

65 doc = engine._find_document_by_id(doc_id, doc_lookup) 

66 if doc: 

67 docs_b.append(doc) 

68 

69 if not docs_a or not docs_b: 

70 return None 

71 

72 candidates: list[dict[str, Any]] = [] 

73 

74 rel = analyze_entity_overlap(cluster_a, cluster_b) 

75 if rel: 

76 candidates.append(rel) 

77 

78 rel = analyze_topic_overlap(cluster_a, cluster_b) 

79 if rel: 

80 candidates.append(rel) 

81 

82 rel = analyze_source_similarity(docs_a, docs_b) 

83 if rel: 

84 candidates.append(rel) 

85 

86 rel = analyze_hierarchy_relationship(docs_a, docs_b) 

87 if rel: 

88 candidates.append(rel) 

89 

90 rel = analyze_content_similarity(docs_a, docs_b) 

91 if rel: 

92 candidates.append(rel) 

93 

94 if candidates: 

95 return max(candidates, key=lambda x: x["strength"]) 

96 return None