Coverage for src/qdrant_loader_mcp_server/search/enhanced/kg/extractors.py: 98%
52 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1from __future__ import annotations
3from typing import Any
6def extract_entities_from_result(result: Any) -> list[str]:
7 entities: set[str] = set()
9 for attr_name in ("source_title", "parent_title", "section_title", "project_name"):
10 value = getattr(result, attr_name, None)
11 if isinstance(value, str):
12 stripped = value.strip()
13 if stripped:
14 entities.add(stripped)
16 return list(entities)
19def extract_topics_from_result(result: Any) -> list[str]:
20 topics: list[str] = []
21 # breadcrumb_text may be missing or non-string
22 breadcrumb_text = getattr(result, "breadcrumb_text", None)
23 if isinstance(breadcrumb_text, str):
24 stripped_breadcrumb = breadcrumb_text.strip()
25 if stripped_breadcrumb:
26 topics.extend(
27 [
28 section.strip()
29 for section in stripped_breadcrumb.split(" > ")
30 if section.strip()
31 ]
32 )
34 # section_type and source_type may be missing
35 section_type = getattr(result, "section_type", None)
36 if isinstance(section_type, str) and section_type.strip():
37 topics.append(section_type.strip())
39 source_type = getattr(result, "source_type", None)
40 if isinstance(source_type, str) and source_type.strip():
41 topics.append(source_type.strip())
43 return list(set(topics))
46def extract_concepts_from_result(result: Any) -> list[str]:
47 concepts: list[str] = []
49 section_title = getattr(result, "section_title", None)
50 if isinstance(section_title, str):
51 stripped_section = section_title.strip()
52 if stripped_section:
53 concepts.append(stripped_section)
55 hierarchy_context = getattr(result, "hierarchy_context", None)
56 if isinstance(hierarchy_context, str):
57 stripped_hierarchy = hierarchy_context.strip()
58 if stripped_hierarchy:
59 concepts.append(stripped_hierarchy)
61 return list(set(concepts))
64def extract_keywords_from_result(result: Any) -> list[str]:
65 keywords_set: set[str] = set()
67 def filtered_words(text: Any, limit: int | None = 10) -> list[str]:
68 if not isinstance(text, str):
69 return []
70 words = text.lower().split()
71 if limit is not None:
72 words = words[:limit]
73 return [w for w in words if len(w) > 3 and w.isalpha()]
75 text = getattr(result, "text", None)
76 keywords_set.update(filtered_words(text, 10))
78 source_title = getattr(result, "source_title", None)
79 keywords_set.update(filtered_words(source_title, None))
81 return list(keywords_set)