Coverage for src/qdrant_loader_mcp_server/search/components/combining/filters.py: 74%

34 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1from __future__ import annotations 

2 

3from typing import Any 

4 

5 

6def should_skip_result( 

7 metadata: dict[str, Any], 

8 result_filters: dict[str, Any], 

9 query_context: dict[str, Any], 

10) -> bool: 

11 # Content type filtering 

12 if "content_type" in result_filters: 

13 allowed_content_types = result_filters["content_type"] 

14 content_analysis = metadata.get("content_type_analysis", {}) 

15 

16 has_matching_content = False 

17 for content_type in allowed_content_types: 

18 if content_type == "code" and content_analysis.get("has_code_blocks"): 

19 has_matching_content = True 

20 break 

21 elif content_type == "documentation" and not content_analysis.get( 

22 "has_code_blocks" 

23 ): 

24 has_matching_content = True 

25 break 

26 elif content_type == "technical" and query_context.get("is_technical"): 

27 has_matching_content = True 

28 break 

29 elif content_type in ["requirements", "business", "strategy"]: 

30 if count_business_indicators(metadata) > 0: 

31 has_matching_content = True 

32 break 

33 elif content_type in ["guide", "tutorial", "procedure"]: 

34 section_type = metadata.get("section_type", "").lower() 

35 if any( 

36 proc_word in section_type 

37 for proc_word in ["step", "guide", "procedure", "tutorial"] 

38 ): 

39 has_matching_content = True 

40 break 

41 

42 if not has_matching_content: 

43 return True 

44 

45 return False 

46 

47 

48def count_business_indicators(metadata: dict) -> int: 

49 business_terms = [ 

50 "requirement", 

51 "business", 

52 "strategy", 

53 "goal", 

54 "objective", 

55 "process", 

56 ] 

57 title = metadata.get("title", "").lower() 

58 content = metadata.get("content", "").lower() 

59 

60 return sum(1 for term in business_terms if term in title or term in content)