Coverage for src/qdrant_loader_mcp_server/search/hybrid/orchestration/relationships.py: 92%
49 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1from __future__ import annotations
3from typing import Any
5from ...components.search_result_models import HybridSearchResult
6from ...enhanced.cross_document_intelligence import ( # noqa: F401 - type hint usage
7 ClusteringStrategy,
8 DocumentCluster,
9)
10from ...hybrid.components.relationships import (
11 analyze_content_similarity,
12 analyze_entity_overlap,
13 analyze_hierarchy_relationship,
14 analyze_source_similarity,
15 analyze_topic_overlap,
16)
19def analyze_cluster_relationships(
20 engine: Any, clusters: list[DocumentCluster], documents: list[HybridSearchResult]
21) -> list[dict[str, Any]]:
22 if len(clusters) < 2:
23 return []
25 relationships: list[dict[str, Any]] = []
26 doc_lookup = engine._build_document_lookup(documents, robust=True)
28 for i, cluster_a in enumerate(clusters):
29 for _, cluster_b in enumerate(clusters[i + 1 :], i + 1):
30 relationship = analyze_cluster_pair(
31 engine, cluster_a, cluster_b, doc_lookup
32 )
33 if relationship and relationship["strength"] > 0.1:
34 relationships.append(
35 {
36 "cluster_a_id": cluster_a.cluster_id,
37 "cluster_b_id": cluster_b.cluster_id,
38 "cluster_a_name": cluster_a.name,
39 "cluster_b_name": cluster_b.name,
40 "relationship_type": relationship["type"],
41 "strength": relationship["strength"],
42 "description": relationship["description"],
43 "shared_elements": relationship["shared_elements"],
44 }
45 )
47 relationships.sort(key=lambda x: x["strength"], reverse=True)
48 return relationships[:10]
51def analyze_cluster_pair(
52 engine: Any,
53 cluster_a: DocumentCluster,
54 cluster_b: DocumentCluster,
55 doc_lookup: dict,
56) -> dict[str, Any] | None:
57 docs_a: list[HybridSearchResult] = []
58 for doc_id in cluster_a.documents:
59 doc = engine._find_document_by_id(doc_id, doc_lookup)
60 if doc:
61 docs_a.append(doc)
63 docs_b: list[HybridSearchResult] = []
64 for doc_id in cluster_b.documents:
65 doc = engine._find_document_by_id(doc_id, doc_lookup)
66 if doc:
67 docs_b.append(doc)
69 if not docs_a or not docs_b:
70 return None
72 candidates: list[dict[str, Any]] = []
74 rel = analyze_entity_overlap(cluster_a, cluster_b)
75 if rel:
76 candidates.append(rel)
78 rel = analyze_topic_overlap(cluster_a, cluster_b)
79 if rel:
80 candidates.append(rel)
82 rel = analyze_source_similarity(docs_a, docs_b)
83 if rel:
84 candidates.append(rel)
86 rel = analyze_hierarchy_relationship(docs_a, docs_b)
87 if rel:
88 candidates.append(rel)
90 rel = analyze_content_similarity(docs_a, docs_b)
91 if rel:
92 candidates.append(rel)
94 if candidates:
95 return max(candidates, key=lambda x: x["strength"])
96 return None