Coverage for src/qdrant_loader/core/chunking/progress_tracker.py: 81%

37 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:50 +0000

1"""Progress tracking utility for chunking operations.""" 

2 

3import logging 

4from typing import Dict, Optional 

5import structlog 

6 

7 

8class ChunkingProgressTracker: 

9 """Tracks and reports progress for chunking operations.""" 

10 

11 def __init__(self, logger: structlog.BoundLogger): 

12 self.logger = logger 

13 self._progress: Dict[str, Dict] = {} 

14 # Check if debug mode is enabled by checking the root logger level 

15 self._is_debug_mode = logging.getLogger().getEffectiveLevel() <= logging.DEBUG 

16 

17 def start_chunking( 

18 self, 

19 document_id: str, 

20 source: str, 

21 source_type: str, 

22 content_length: int, 

23 file_name: str, 

24 ) -> None: 

25 """Start tracking chunking progress for a document.""" 

26 self._progress[document_id] = { 

27 "source": source, 

28 "source_type": source_type, 

29 "content_length": content_length, 

30 "file_name": file_name, 

31 "chunks_created": 0, 

32 "started": True, 

33 } 

34 

35 if self._is_debug_mode: 

36 # Detailed logging for debug mode 

37 self.logger.debug( 

38 "Starting chunking", 

39 extra={ 

40 "source": source, 

41 "source_type": source_type, 

42 "content_length": content_length, 

43 "file_name": file_name, 

44 }, 

45 ) 

46 else: 

47 # Concise logging for info mode 

48 self.logger.info(f"Chunking {file_name} ({content_length:,} chars)") 

49 

50 def update_progress(self, document_id: str, chunks_created: int) -> None: 

51 """Update the number of chunks created for a document.""" 

52 if document_id in self._progress: 

53 self._progress[document_id]["chunks_created"] = chunks_created 

54 

55 def finish_chunking( 

56 self, document_id: str, total_chunks: int, strategy_name: Optional[str] = None 

57 ) -> None: 

58 """Finish tracking chunking progress for a document.""" 

59 if document_id not in self._progress: 

60 return 

61 

62 progress = self._progress[document_id] 

63 

64 if self._is_debug_mode: 

65 # Detailed logging for debug mode 

66 self.logger.debug( 

67 "Finished chunking", 

68 extra={ 

69 "source": progress["source"], 

70 "source_type": progress["source_type"], 

71 "file_name": progress["file_name"], 

72 "total_chunks": total_chunks, 

73 "strategy": strategy_name, 

74 "content_length": progress["content_length"], 

75 }, 

76 ) 

77 else: 

78 # Concise logging for info mode 

79 strategy_info = f" using {strategy_name}" if strategy_name else "" 

80 self.logger.debug( 

81 f"✓ Created {total_chunks} chunks from {progress['file_name']}{strategy_info}" 

82 ) 

83 

84 # Clean up 

85 del self._progress[document_id] 

86 

87 def log_error(self, document_id: str, error: str) -> None: 

88 """Log an error during chunking.""" 

89 if document_id in self._progress: 

90 progress = self._progress[document_id] 

91 self.logger.error( 

92 f"Chunking failed for {progress['file_name']}: {error}", 

93 extra={ 

94 "source": progress["source"], 

95 "source_type": progress["source_type"], 

96 "file_name": progress["file_name"], 

97 "error": error, 

98 }, 

99 ) 

100 # Clean up 

101 del self._progress[document_id] 

102 else: 

103 self.logger.error(f"Chunking failed: {error}") 

104 

105 def log_fallback(self, document_id: str, reason: str) -> None: 

106 """Log when falling back to default chunking.""" 

107 if document_id in self._progress: 

108 progress = self._progress[document_id] 

109 if self._is_debug_mode: 

110 self.logger.debug( 

111 "Falling back to default chunking", 

112 extra={ 

113 "source": progress["source"], 

114 "source_type": progress["source_type"], 

115 "file_name": progress["file_name"], 

116 "reason": reason, 

117 }, 

118 ) 

119 else: 

120 self.logger.info( 

121 f"⚠ Falling back to default chunking for {progress['file_name']}: {reason}" 

122 )