Coverage for src/qdrant_loader_mcp_server/mcp/handlers/search/filters.py: 92%
101 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1from __future__ import annotations
3import os
4from typing import Any
6from ....mcp.formatters.utils import FormatterUtils
7from ....search.components.search_result_models import HybridSearchResult
10def apply_hierarchy_filters(
11 results: list[HybridSearchResult], hierarchy_filter: dict[str, Any]
12) -> list[HybridSearchResult]:
13 filtered_results = []
14 for result in results:
15 if result.source_type not in ["confluence", "localfile"]:
16 continue
17 if "depth" in hierarchy_filter:
18 file_path_val = getattr(result, "file_path", None)
19 if result.source_type == "localfile" and file_path_val:
20 path_parts = [p for p in file_path_val.split("/") if p]
21 folder_depth = max(0, len(path_parts) - 2)
22 if folder_depth != hierarchy_filter["depth"]:
23 continue
24 elif hasattr(result, "depth") and result.depth != hierarchy_filter["depth"]:
25 continue
26 if "parent_title" in hierarchy_filter:
27 expected_parent = hierarchy_filter["parent_title"]
28 if result.source_type == "localfile":
29 file_path_val = getattr(result, "file_path", None)
30 if file_path_val:
31 path_parts = [p for p in file_path_val.split("/") if p]
32 parent_folder = path_parts[-2] if len(path_parts) > 1 else ""
33 if parent_folder != expected_parent:
34 continue
35 else:
36 continue
37 else:
38 parent_title_val = getattr(result, "parent_title", None)
39 if parent_title_val != expected_parent:
40 continue
41 if hierarchy_filter.get("root_only", False):
42 if not result.is_root_document():
43 continue
44 if "has_children" in hierarchy_filter and result.source_type != "localfile":
45 if result.has_children() != hierarchy_filter["has_children"]:
46 continue
47 filtered_results.append(result)
48 return filtered_results
51def apply_attachment_filters(
52 results: list[HybridSearchResult], attachment_filter: dict[str, Any]
53) -> list[HybridSearchResult]:
54 filtered_results = []
55 for result in results:
56 if result.source_type != "confluence":
57 continue
58 if "attachments_only" in attachment_filter and not result.is_attachment:
59 continue
60 if "parent_document_title" in attachment_filter:
61 if (
62 result.parent_document_title
63 != attachment_filter["parent_document_title"]
64 ):
65 continue
66 if "file_type" in attachment_filter:
67 result_file_type = result.get_file_type()
68 if result_file_type != attachment_filter["file_type"]:
69 continue
70 _min_size = attachment_filter.get("file_size_min")
71 if (
72 _min_size is not None
73 and result.file_size is not None
74 and result.file_size < _min_size
75 ):
76 continue
77 _max_size = attachment_filter.get("file_size_max")
78 if (
79 _max_size is not None
80 and result.file_size is not None
81 and result.file_size > _max_size
82 ):
83 continue
84 if "author" in attachment_filter:
85 if result.attachment_author != attachment_filter["author"]:
86 continue
87 filtered_results.append(result)
88 return filtered_results
91def apply_lightweight_attachment_filters(
92 results: list[HybridSearchResult],
93 attachment_filter: dict[str, Any],
94 file_type_extractor: Any | None = None,
95) -> list[HybridSearchResult]:
96 filtered_results = []
97 for result in results:
98 _is_attachment_flag = bool(getattr(result, "is_attachment", False))
99 _original_filename = getattr(result, "original_filename", None)
100 _file_path = getattr(result, "file_path", None)
101 _is_path_file = False
102 if isinstance(_file_path, str) and not _file_path.endswith("/"):
103 _basename = os.path.basename(_file_path)
104 _is_path_file = "." in _basename
105 is_attachment = _is_attachment_flag or bool(_original_filename) or _is_path_file
106 if not is_attachment:
107 continue
108 if attachment_filter.get("attachments_only") and not bool(
109 getattr(result, "is_attachment", False)
110 ):
111 continue
112 if attachment_filter.get("file_type"):
113 if file_type_extractor is not None:
114 file_type = file_type_extractor(result)
115 else:
116 file_type = FormatterUtils.extract_file_type_minimal(result)
117 if file_type != attachment_filter["file_type"]:
118 continue
119 _file_size = getattr(result, "file_size", None)
120 if (
121 attachment_filter.get("file_size_min") is not None
122 and _file_size is not None
123 and _file_size < attachment_filter["file_size_min"]
124 ):
125 continue
126 if (
127 attachment_filter.get("file_size_max") is not None
128 and _file_size is not None
129 and _file_size > attachment_filter["file_size_max"]
130 ):
131 continue
132 if attachment_filter.get("parent_document_title"):
133 parent_title = getattr(result, "parent_document_title", None) or getattr(
134 result, "parent_title", None
135 )
136 if parent_title != attachment_filter["parent_document_title"]:
137 continue
138 if attachment_filter.get("author"):
139 author = getattr(result, "attachment_author", None) or getattr(
140 result, "author", None
141 )
142 if author != attachment_filter["author"]:
143 continue
144 filtered_results.append(result)
145 return filtered_results