Coverage for src/qdrant_loader/core/chunking/progress_tracker.py: 81%
37 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""Progress tracking utility for chunking operations."""
3import logging
4from typing import Dict, Optional
5import structlog
8class ChunkingProgressTracker:
9 """Tracks and reports progress for chunking operations."""
11 def __init__(self, logger: structlog.BoundLogger):
12 self.logger = logger
13 self._progress: Dict[str, Dict] = {}
14 # Check if debug mode is enabled by checking the root logger level
15 self._is_debug_mode = logging.getLogger().getEffectiveLevel() <= logging.DEBUG
17 def start_chunking(
18 self,
19 document_id: str,
20 source: str,
21 source_type: str,
22 content_length: int,
23 file_name: str,
24 ) -> None:
25 """Start tracking chunking progress for a document."""
26 self._progress[document_id] = {
27 "source": source,
28 "source_type": source_type,
29 "content_length": content_length,
30 "file_name": file_name,
31 "chunks_created": 0,
32 "started": True,
33 }
35 if self._is_debug_mode:
36 # Detailed logging for debug mode
37 self.logger.debug(
38 "Starting chunking",
39 extra={
40 "source": source,
41 "source_type": source_type,
42 "content_length": content_length,
43 "file_name": file_name,
44 },
45 )
46 else:
47 # Concise logging for info mode
48 self.logger.info(f"Chunking {file_name} ({content_length:,} chars)")
50 def update_progress(self, document_id: str, chunks_created: int) -> None:
51 """Update the number of chunks created for a document."""
52 if document_id in self._progress:
53 self._progress[document_id]["chunks_created"] = chunks_created
55 def finish_chunking(
56 self, document_id: str, total_chunks: int, strategy_name: Optional[str] = None
57 ) -> None:
58 """Finish tracking chunking progress for a document."""
59 if document_id not in self._progress:
60 return
62 progress = self._progress[document_id]
64 if self._is_debug_mode:
65 # Detailed logging for debug mode
66 self.logger.debug(
67 "Finished chunking",
68 extra={
69 "source": progress["source"],
70 "source_type": progress["source_type"],
71 "file_name": progress["file_name"],
72 "total_chunks": total_chunks,
73 "strategy": strategy_name,
74 "content_length": progress["content_length"],
75 },
76 )
77 else:
78 # Concise logging for info mode
79 strategy_info = f" using {strategy_name}" if strategy_name else ""
80 self.logger.debug(
81 f"✓ Created {total_chunks} chunks from {progress['file_name']}{strategy_info}"
82 )
84 # Clean up
85 del self._progress[document_id]
87 def log_error(self, document_id: str, error: str) -> None:
88 """Log an error during chunking."""
89 if document_id in self._progress:
90 progress = self._progress[document_id]
91 self.logger.error(
92 f"Chunking failed for {progress['file_name']}: {error}",
93 extra={
94 "source": progress["source"],
95 "source_type": progress["source_type"],
96 "file_name": progress["file_name"],
97 "error": error,
98 },
99 )
100 # Clean up
101 del self._progress[document_id]
102 else:
103 self.logger.error(f"Chunking failed: {error}")
105 def log_fallback(self, document_id: str, reason: str) -> None:
106 """Log when falling back to default chunking."""
107 if document_id in self._progress:
108 progress = self._progress[document_id]
109 if self._is_debug_mode:
110 self.logger.debug(
111 "Falling back to default chunking",
112 extra={
113 "source": progress["source"],
114 "source_type": progress["source_type"],
115 "file_name": progress["file_name"],
116 "reason": reason,
117 },
118 )
119 else:
120 self.logger.info(
121 f"⚠ Falling back to default chunking for {progress['file_name']}: {reason}"
122 )