Coverage for src/qdrant_loader_mcp_server/search/enhanced/kg/extractors.py: 98%

52 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3from typing import Any 

4 

5 

6def extract_entities_from_result(result: Any) -> list[str]: 

7 entities: set[str] = set() 

8 

9 for attr_name in ("source_title", "parent_title", "section_title", "project_name"): 

10 value = getattr(result, attr_name, None) 

11 if isinstance(value, str): 

12 stripped = value.strip() 

13 if stripped: 

14 entities.add(stripped) 

15 

16 return list(entities) 

17 

18 

19def extract_topics_from_result(result: Any) -> list[str]: 

20 topics: list[str] = [] 

21 # breadcrumb_text may be missing or non-string 

22 breadcrumb_text = getattr(result, "breadcrumb_text", None) 

23 if isinstance(breadcrumb_text, str): 

24 stripped_breadcrumb = breadcrumb_text.strip() 

25 if stripped_breadcrumb: 

26 topics.extend( 

27 [ 

28 section.strip() 

29 for section in stripped_breadcrumb.split(" > ") 

30 if section.strip() 

31 ] 

32 ) 

33 

34 # section_type and source_type may be missing 

35 section_type = getattr(result, "section_type", None) 

36 if isinstance(section_type, str) and section_type.strip(): 

37 topics.append(section_type.strip()) 

38 

39 source_type = getattr(result, "source_type", None) 

40 if isinstance(source_type, str) and source_type.strip(): 

41 topics.append(source_type.strip()) 

42 

43 return list(set(topics)) 

44 

45 

46def extract_concepts_from_result(result: Any) -> list[str]: 

47 concepts: list[str] = [] 

48 

49 section_title = getattr(result, "section_title", None) 

50 if isinstance(section_title, str): 

51 stripped_section = section_title.strip() 

52 if stripped_section: 

53 concepts.append(stripped_section) 

54 

55 hierarchy_context = getattr(result, "hierarchy_context", None) 

56 if isinstance(hierarchy_context, str): 

57 stripped_hierarchy = hierarchy_context.strip() 

58 if stripped_hierarchy: 

59 concepts.append(stripped_hierarchy) 

60 

61 return list(set(concepts)) 

62 

63 

64def extract_keywords_from_result(result: Any) -> list[str]: 

65 keywords_set: set[str] = set() 

66 

67 def filtered_words(text: Any, limit: int | None = 10) -> list[str]: 

68 if not isinstance(text, str): 

69 return [] 

70 words = text.lower().split() 

71 if limit is not None: 

72 words = words[:limit] 

73 return [w for w in words if len(w) > 3 and w.isalpha()] 

74 

75 text = getattr(result, "text", None) 

76 keywords_set.update(filtered_words(text, 10)) 

77 

78 source_title = getattr(result, "source_title", None) 

79 keywords_set.update(filtered_words(source_title, None)) 

80 

81 return list(keywords_set)