Coverage for src/qdrant_loader/core/pipeline/source_filter.py: 45%

33 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:50 +0000

1"""Utility for filtering sources based on type and name.""" 

2 

3from qdrant_loader.config import SourcesConfig 

4 

5 

6class SourceFilter: 

7 """Utility for filtering sources based on type and name.""" 

8 

9 def filter_sources( 

10 self, 

11 sources_config: SourcesConfig, 

12 source_type: str | None = None, 

13 source: str | None = None, 

14 ) -> SourcesConfig: 

15 """Filter sources based on criteria. 

16 

17 Args: 

18 sources_config: The original sources configuration 

19 source_type: Filter by source type (e.g., 'git', 'confluence') 

20 source: Filter by specific source name 

21 

22 Returns: 

23 Filtered sources configuration 

24 """ 

25 # If no filters, return original config 

26 if not source_type and not source: 

27 return sources_config 

28 

29 # Create a new config with filtered sources 

30 filtered_config = SourcesConfig() 

31 

32 # Filter by source type 

33 if source_type: 

34 source_type_lower = source_type.lower() 

35 

36 if source_type_lower == "git" and sources_config.git: 

37 filtered_config.git = self._filter_by_name(sources_config.git, source) 

38 elif source_type_lower == "confluence" and sources_config.confluence: 

39 filtered_config.confluence = self._filter_by_name( 

40 sources_config.confluence, source 

41 ) 

42 elif source_type_lower == "jira" and sources_config.jira: 

43 filtered_config.jira = self._filter_by_name(sources_config.jira, source) 

44 elif source_type_lower == "publicdocs" and sources_config.publicdocs: 

45 filtered_config.publicdocs = self._filter_by_name( 

46 sources_config.publicdocs, source 

47 ) 

48 elif source_type_lower == "localfile" and sources_config.localfile: 

49 filtered_config.localfile = self._filter_by_name( 

50 sources_config.localfile, source 

51 ) 

52 else: 

53 # No source type filter, but filter by name across all types 

54 if sources_config.git: 

55 filtered_config.git = self._filter_by_name(sources_config.git, source) 

56 if sources_config.confluence: 

57 filtered_config.confluence = self._filter_by_name( 

58 sources_config.confluence, source 

59 ) 

60 if sources_config.jira: 

61 filtered_config.jira = self._filter_by_name(sources_config.jira, source) 

62 if sources_config.publicdocs: 

63 filtered_config.publicdocs = self._filter_by_name( 

64 sources_config.publicdocs, source 

65 ) 

66 if sources_config.localfile: 

67 filtered_config.localfile = self._filter_by_name( 

68 sources_config.localfile, source 

69 ) 

70 

71 return filtered_config 

72 

73 def _filter_by_name(self, source_configs: dict, source_name: str | None): 

74 """Filter source configs by name.""" 

75 if not source_name: 

76 return source_configs 

77 

78 # Return only the source with the matching name 

79 return { 

80 name: config 

81 for name, config in source_configs.items() 

82 if name == source_name 

83 }