Coverage for src/qdrant_loader/utils/logging.py: 65%
135 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""Centralized logging configuration for the application."""
3import logging
4import structlog
5import sys
6import re
7from typing import Any, Dict
10class QdrantVersionFilter(logging.Filter):
11 """Filter to suppress Qdrant version check warnings."""
13 def filter(self, record):
14 return "version check" not in record.getMessage().lower()
17class ApplicationFilter(logging.Filter):
18 """Filter to only show logs from our application."""
20 def filter(self, record):
21 # Only show logs from our application
22 return not record.name.startswith(("httpx", "httpcore", "urllib3"))
25class VerbosityFilter(logging.Filter):
26 """Filter to reduce verbosity of debug messages."""
28 def filter(self, record):
29 # Suppress overly verbose debug messages
30 message = record.getMessage()
31 verbose_patterns = [
32 "HTTP Request:",
33 "Response status:",
34 "Request headers:",
35 "Response headers:",
36 ]
37 return not any(pattern in message for pattern in verbose_patterns)
40class CleanFileHandler(logging.FileHandler):
41 """Custom file handler that strips ANSI color codes from log messages."""
43 # ANSI escape sequence pattern
44 ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
46 def emit(self, record):
47 """Emit a record, stripping ANSI codes from the message."""
48 try:
49 # Get the formatted message
50 msg = self.format(record)
51 # Strip ANSI escape sequences
52 clean_msg = self.ANSI_ESCAPE.sub("", msg)
53 # Write the clean message
54 stream = self.stream
55 stream.write(clean_msg + self.terminator)
56 self.flush()
57 except Exception:
58 self.handleError(record)
61class CleanFormatter(logging.Formatter):
62 """Custom formatter that shows only the message for INFO level logs."""
64 def __init__(self, use_custom_renderer=False):
65 super().__init__()
66 self.use_custom_renderer = use_custom_renderer
68 def format(self, record):
69 message = record.getMessage()
71 # If we're using the custom renderer, just return the message as-is
72 # since the CustomConsoleRenderer already handled the formatting
73 if self.use_custom_renderer:
74 return message
76 # For INFO level, just show the message
77 if record.levelno == logging.INFO:
78 return message
79 else:
80 # For other levels, we need to reorder timestamp and level
81 # Check if message starts with a timestamp (HH:MM:SS format)
82 # The message might contain ANSI color codes, so we need to account for that
83 time_pattern = (
84 r"^(?:\x1b\[[0-9;]*m)?(\d{2}:\d{2}:\d{2})(?:\x1b\[[0-9;]*m)?\s+(.*)"
85 )
86 match = re.match(time_pattern, message)
88 if match:
89 timestamp = match.group(1)
90 rest_of_message = match.group(2)
91 return f"{timestamp} [{record.levelname}] {rest_of_message}"
92 else:
93 # No timestamp found, just add level name
94 return f"[{record.levelname}] {message}"
97class FileRenderer:
98 """Custom renderer for file output without timestamps (FileFormatter will add them)."""
100 def __call__(self, logger, method_name, event_dict):
101 # Extract the main message
102 event = event_dict.pop("event", "")
104 # Format additional key-value pairs
105 if event_dict:
106 extras = " ".join(f"{k}={v}" for k, v in event_dict.items())
107 return f"{event} {extras}".strip()
108 else:
109 return event
112class FileFormatter(logging.Formatter):
113 """Custom formatter for file output that provides clean, readable logs without ANSI codes."""
115 # ANSI escape sequence pattern
116 ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
118 def format(self, record):
119 # Get the timestamp
120 timestamp = self.formatTime(record, "%Y-%m-%d %H:%M:%S")
122 # Get the level name
123 level = record.levelname
125 # Get the message (this will be the already formatted structlog message)
126 message = record.getMessage()
128 # First, strip ANSI escape sequences
129 clean_message = self.ANSI_ESCAPE.sub("", message)
131 # Now check if the clean message starts with a timestamp and remove it
132 # Pattern for structlog output: "HH:MM:SS message content"
133 time_pattern = r"^\d{2}:\d{2}:\d{2}\s+"
134 if re.match(time_pattern, clean_message):
135 # Remove the structlog timestamp since we're adding our own
136 clean_message = re.sub(time_pattern, "", clean_message)
138 # Format based on log level
139 if record.levelno == logging.INFO:
140 # For INFO level, use a clean format: timestamp | message
141 return f"{timestamp} | {clean_message}"
142 else:
143 # For other levels, include the level: timestamp | [LEVEL] message
144 return f"{timestamp} | [{level}] {clean_message}"
147class CustomConsoleRenderer:
148 """Custom console renderer that formats timestamp and level correctly."""
150 def __init__(self, colors=True):
151 self.colors = colors
152 self._console_renderer = structlog.dev.ConsoleRenderer(colors=colors)
153 # ANSI color codes
154 self.gray = "\033[90m" if colors else ""
155 self.green = "\033[92m" if colors else "" # Bright green for INFO
156 self.yellow = "\033[93m" if colors else "" # Bright yellow for WARNING
157 self.red = "\033[91m" if colors else "" # Bright red for ERROR
158 self.magenta = "\033[95m" if colors else "" # Bright magenta for CRITICAL
159 self.cyan = "\033[96m" if colors else "" # Bright cyan for DEBUG
160 self.reset = "\033[0m" if colors else ""
162 def _get_level_color(self, level):
163 """Get the appropriate color for a log level."""
164 level_colors = {
165 "DEBUG": self.cyan,
166 "INFO": self.green, # Green for INFO
167 "WARNING": self.yellow,
168 "ERROR": self.red,
169 "CRITICAL": self.magenta,
170 }
171 return level_colors.get(level, "")
173 def __call__(self, logger, method_name, event_dict):
174 # Extract timestamp if present
175 timestamp = event_dict.pop("timestamp", None)
177 # Get the level from method_name
178 level = method_name.upper()
180 # Use the default console renderer to format the rest
181 formatted = self._console_renderer(logger, method_name, event_dict)
183 # If we have a timestamp
184 if timestamp and isinstance(timestamp, str) and len(timestamp) >= 8:
185 time_part = timestamp[:8] # Get HH:MM:SS part
187 # Remove the timestamp from the formatted message if it's there
188 if formatted.startswith(time_part):
189 formatted = formatted[len(time_part) :].lstrip()
191 # Add gray color to timestamp
192 colored_timestamp = f"{self.gray}{time_part}{self.reset}"
194 # Get colored level for all levels including INFO
195 level_color = self._get_level_color(level)
196 colored_level = (
197 f"{level_color}[{level}]{self.reset}" if level_color else f"[{level}]"
198 )
200 # Show timestamp, colored level, and message for all levels
201 return f"{colored_timestamp} {colored_level} {formatted}"
203 # Fallback if no timestamp
204 level_color = self._get_level_color(level)
205 colored_level = (
206 f"{level_color}[{level}]{self.reset}" if level_color else f"[{level}]"
207 )
208 return f"{colored_level} {formatted}"
211class LoggingConfig:
212 """Centralized logging configuration."""
214 _initialized = False
215 _current_config = None
217 @classmethod
218 def setup(
219 cls,
220 level: str = "INFO",
221 format: str = "console",
222 file: str | None = None,
223 suppress_qdrant_warnings: bool = True,
224 clean_output: bool = True,
225 ) -> None:
226 """Setup logging configuration.
228 Args:
229 level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
230 format: Log format (json or console)
231 file: Path to log file (optional)
232 suppress_qdrant_warnings: Whether to suppress Qdrant version check warnings
233 clean_output: Whether to use clean, less verbose output
234 """
235 try:
236 # Convert string level to logging level
237 numeric_level = getattr(logging, level.upper())
238 except AttributeError:
239 raise ValueError(f"Invalid log level: {level}") from None
241 # Reset logging configuration
242 logging.getLogger().handlers = []
243 structlog.reset_defaults()
245 # Create a list of handlers
246 handlers = []
248 # Add console handler
249 console_handler = logging.StreamHandler()
251 if clean_output and format == "console":
252 # Use clean formatter for console output
253 console_handler.setFormatter(CleanFormatter(use_custom_renderer=True))
254 else:
255 console_handler.setFormatter(logging.Formatter("%(message)s"))
257 console_handler.addFilter(ApplicationFilter()) # Only show our application logs
259 if clean_output:
260 console_handler.addFilter(VerbosityFilter()) # Reduce verbosity
262 handlers.append(console_handler)
264 # Add file handler if file is configured
265 if file:
266 file_handler = CleanFileHandler(file)
267 file_handler.setFormatter(FileFormatter())
268 # Don't apply verbosity filter to file logs - keep everything for debugging
269 handlers.append(file_handler)
271 # Configure standard logging
272 logging.basicConfig(
273 level=numeric_level,
274 format="%(message)s",
275 handlers=handlers,
276 )
278 # Add filter to suppress Qdrant version check warnings
279 if suppress_qdrant_warnings:
280 qdrant_logger = logging.getLogger("qdrant_client")
281 qdrant_logger.addFilter(QdrantVersionFilter())
283 # Configure structlog processors based on format and clean_output
284 if clean_output and format == "console":
285 # Minimal processors for clean output
286 processors = [
287 structlog.stdlib.filter_by_level,
288 structlog.processors.TimeStamper(fmt="%H:%M:%S"),
289 CustomConsoleRenderer(colors=True),
290 ]
291 else:
292 # Full processors for detailed output
293 processors = [
294 structlog.stdlib.filter_by_level,
295 structlog.stdlib.add_logger_name,
296 structlog.stdlib.add_log_level,
297 structlog.processors.TimeStamper(fmt="iso"),
298 structlog.processors.StackInfoRenderer(),
299 structlog.processors.UnicodeDecoder(),
300 structlog.processors.CallsiteParameterAdder(
301 [
302 structlog.processors.CallsiteParameter.FILENAME,
303 structlog.processors.CallsiteParameter.FUNC_NAME,
304 structlog.processors.CallsiteParameter.LINENO,
305 ]
306 ),
307 ]
309 if format == "json":
310 processors.append(structlog.processors.JSONRenderer())
311 else:
312 processors.append(structlog.dev.ConsoleRenderer(colors=True))
314 # Configure structlog
315 structlog.configure(
316 processors=processors,
317 wrapper_class=structlog.make_filtering_bound_logger(numeric_level),
318 logger_factory=structlog.stdlib.LoggerFactory(),
319 cache_logger_on_first_use=False, # Disable caching to ensure new configuration is used
320 )
322 cls._initialized = True
323 cls._current_config = (
324 level,
325 format,
326 file,
327 suppress_qdrant_warnings,
328 clean_output,
329 )
331 @classmethod
332 def get_logger(cls, name: str | None = None) -> structlog.BoundLogger:
333 """Get a logger instance.
335 Args:
336 name: Logger name. If None, will use the calling module's name.
338 Returns:
339 structlog.BoundLogger: Logger instance
340 """
341 if not cls._initialized:
342 # Initialize with default settings if not already initialized
343 cls.setup()
344 return structlog.get_logger(name)