Coverage for src/qdrant_loader_mcp_server/mcp/handlers/search/filters.py: 92%

101 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3import os 

4from typing import Any 

5 

6from ....mcp.formatters.utils import FormatterUtils 

7from ....search.components.search_result_models import HybridSearchResult 

8 

9 

10def apply_hierarchy_filters( 

11 results: list[HybridSearchResult], hierarchy_filter: dict[str, Any] 

12) -> list[HybridSearchResult]: 

13 filtered_results = [] 

14 for result in results: 

15 if result.source_type not in ["confluence", "localfile"]: 

16 continue 

17 if "depth" in hierarchy_filter: 

18 file_path_val = getattr(result, "file_path", None) 

19 if result.source_type == "localfile" and file_path_val: 

20 path_parts = [p for p in file_path_val.split("/") if p] 

21 folder_depth = max(0, len(path_parts) - 2) 

22 if folder_depth != hierarchy_filter["depth"]: 

23 continue 

24 elif hasattr(result, "depth") and result.depth != hierarchy_filter["depth"]: 

25 continue 

26 if "parent_title" in hierarchy_filter: 

27 expected_parent = hierarchy_filter["parent_title"] 

28 if result.source_type == "localfile": 

29 file_path_val = getattr(result, "file_path", None) 

30 if file_path_val: 

31 path_parts = [p for p in file_path_val.split("/") if p] 

32 parent_folder = path_parts[-2] if len(path_parts) > 1 else "" 

33 if parent_folder != expected_parent: 

34 continue 

35 else: 

36 continue 

37 else: 

38 parent_title_val = getattr(result, "parent_title", None) 

39 if parent_title_val != expected_parent: 

40 continue 

41 if hierarchy_filter.get("root_only", False): 

42 if not result.is_root_document(): 

43 continue 

44 if "has_children" in hierarchy_filter and result.source_type != "localfile": 

45 if result.has_children() != hierarchy_filter["has_children"]: 

46 continue 

47 filtered_results.append(result) 

48 return filtered_results 

49 

50 

51def apply_attachment_filters( 

52 results: list[HybridSearchResult], attachment_filter: dict[str, Any] 

53) -> list[HybridSearchResult]: 

54 filtered_results = [] 

55 for result in results: 

56 if result.source_type != "confluence": 

57 continue 

58 if "attachments_only" in attachment_filter and not result.is_attachment: 

59 continue 

60 if "parent_document_title" in attachment_filter: 

61 if ( 

62 result.parent_document_title 

63 != attachment_filter["parent_document_title"] 

64 ): 

65 continue 

66 if "file_type" in attachment_filter: 

67 result_file_type = result.get_file_type() 

68 if result_file_type != attachment_filter["file_type"]: 

69 continue 

70 _min_size = attachment_filter.get("file_size_min") 

71 if ( 

72 _min_size is not None 

73 and result.file_size is not None 

74 and result.file_size < _min_size 

75 ): 

76 continue 

77 _max_size = attachment_filter.get("file_size_max") 

78 if ( 

79 _max_size is not None 

80 and result.file_size is not None 

81 and result.file_size > _max_size 

82 ): 

83 continue 

84 if "author" in attachment_filter: 

85 if result.attachment_author != attachment_filter["author"]: 

86 continue 

87 filtered_results.append(result) 

88 return filtered_results 

89 

90 

91def apply_lightweight_attachment_filters( 

92 results: list[HybridSearchResult], 

93 attachment_filter: dict[str, Any], 

94 file_type_extractor: Any | None = None, 

95) -> list[HybridSearchResult]: 

96 filtered_results = [] 

97 for result in results: 

98 _is_attachment_flag = bool(getattr(result, "is_attachment", False)) 

99 _original_filename = getattr(result, "original_filename", None) 

100 _file_path = getattr(result, "file_path", None) 

101 _is_path_file = False 

102 if isinstance(_file_path, str) and not _file_path.endswith("/"): 

103 _basename = os.path.basename(_file_path) 

104 _is_path_file = "." in _basename 

105 is_attachment = _is_attachment_flag or bool(_original_filename) or _is_path_file 

106 if not is_attachment: 

107 continue 

108 if attachment_filter.get("attachments_only") and not bool( 

109 getattr(result, "is_attachment", False) 

110 ): 

111 continue 

112 if attachment_filter.get("file_type"): 

113 if file_type_extractor is not None: 

114 file_type = file_type_extractor(result) 

115 else: 

116 file_type = FormatterUtils.extract_file_type_minimal(result) 

117 if file_type != attachment_filter["file_type"]: 

118 continue 

119 _file_size = getattr(result, "file_size", None) 

120 if ( 

121 attachment_filter.get("file_size_min") is not None 

122 and _file_size is not None 

123 and _file_size < attachment_filter["file_size_min"] 

124 ): 

125 continue 

126 if ( 

127 attachment_filter.get("file_size_max") is not None 

128 and _file_size is not None 

129 and _file_size > attachment_filter["file_size_max"] 

130 ): 

131 continue 

132 if attachment_filter.get("parent_document_title"): 

133 parent_title = getattr(result, "parent_document_title", None) or getattr( 

134 result, "parent_title", None 

135 ) 

136 if parent_title != attachment_filter["parent_document_title"]: 

137 continue 

138 if attachment_filter.get("author"): 

139 author = getattr(result, "attachment_author", None) or getattr( 

140 result, "author", None 

141 ) 

142 if author != attachment_filter["author"]: 

143 continue 

144 filtered_results.append(result) 

145 return filtered_results