Coverage for src/qdrant_loader/utils/logging.py: 65%

135 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:50 +0000

1"""Centralized logging configuration for the application.""" 

2 

3import logging 

4import structlog 

5import sys 

6import re 

7from typing import Any, Dict 

8 

9 

10class QdrantVersionFilter(logging.Filter): 

11 """Filter to suppress Qdrant version check warnings.""" 

12 

13 def filter(self, record): 

14 return "version check" not in record.getMessage().lower() 

15 

16 

17class ApplicationFilter(logging.Filter): 

18 """Filter to only show logs from our application.""" 

19 

20 def filter(self, record): 

21 # Only show logs from our application 

22 return not record.name.startswith(("httpx", "httpcore", "urllib3")) 

23 

24 

25class VerbosityFilter(logging.Filter): 

26 """Filter to reduce verbosity of debug messages.""" 

27 

28 def filter(self, record): 

29 # Suppress overly verbose debug messages 

30 message = record.getMessage() 

31 verbose_patterns = [ 

32 "HTTP Request:", 

33 "Response status:", 

34 "Request headers:", 

35 "Response headers:", 

36 ] 

37 return not any(pattern in message for pattern in verbose_patterns) 

38 

39 

40class CleanFileHandler(logging.FileHandler): 

41 """Custom file handler that strips ANSI color codes from log messages.""" 

42 

43 # ANSI escape sequence pattern 

44 ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") 

45 

46 def emit(self, record): 

47 """Emit a record, stripping ANSI codes from the message.""" 

48 try: 

49 # Get the formatted message 

50 msg = self.format(record) 

51 # Strip ANSI escape sequences 

52 clean_msg = self.ANSI_ESCAPE.sub("", msg) 

53 # Write the clean message 

54 stream = self.stream 

55 stream.write(clean_msg + self.terminator) 

56 self.flush() 

57 except Exception: 

58 self.handleError(record) 

59 

60 

61class CleanFormatter(logging.Formatter): 

62 """Custom formatter that shows only the message for INFO level logs.""" 

63 

64 def __init__(self, use_custom_renderer=False): 

65 super().__init__() 

66 self.use_custom_renderer = use_custom_renderer 

67 

68 def format(self, record): 

69 message = record.getMessage() 

70 

71 # If we're using the custom renderer, just return the message as-is 

72 # since the CustomConsoleRenderer already handled the formatting 

73 if self.use_custom_renderer: 

74 return message 

75 

76 # For INFO level, just show the message 

77 if record.levelno == logging.INFO: 

78 return message 

79 else: 

80 # For other levels, we need to reorder timestamp and level 

81 # Check if message starts with a timestamp (HH:MM:SS format) 

82 # The message might contain ANSI color codes, so we need to account for that 

83 time_pattern = ( 

84 r"^(?:\x1b\[[0-9;]*m)?(\d{2}:\d{2}:\d{2})(?:\x1b\[[0-9;]*m)?\s+(.*)" 

85 ) 

86 match = re.match(time_pattern, message) 

87 

88 if match: 

89 timestamp = match.group(1) 

90 rest_of_message = match.group(2) 

91 return f"{timestamp} [{record.levelname}] {rest_of_message}" 

92 else: 

93 # No timestamp found, just add level name 

94 return f"[{record.levelname}] {message}" 

95 

96 

97class FileRenderer: 

98 """Custom renderer for file output without timestamps (FileFormatter will add them).""" 

99 

100 def __call__(self, logger, method_name, event_dict): 

101 # Extract the main message 

102 event = event_dict.pop("event", "") 

103 

104 # Format additional key-value pairs 

105 if event_dict: 

106 extras = " ".join(f"{k}={v}" for k, v in event_dict.items()) 

107 return f"{event} {extras}".strip() 

108 else: 

109 return event 

110 

111 

112class FileFormatter(logging.Formatter): 

113 """Custom formatter for file output that provides clean, readable logs without ANSI codes.""" 

114 

115 # ANSI escape sequence pattern 

116 ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") 

117 

118 def format(self, record): 

119 # Get the timestamp 

120 timestamp = self.formatTime(record, "%Y-%m-%d %H:%M:%S") 

121 

122 # Get the level name 

123 level = record.levelname 

124 

125 # Get the message (this will be the already formatted structlog message) 

126 message = record.getMessage() 

127 

128 # First, strip ANSI escape sequences 

129 clean_message = self.ANSI_ESCAPE.sub("", message) 

130 

131 # Now check if the clean message starts with a timestamp and remove it 

132 # Pattern for structlog output: "HH:MM:SS message content" 

133 time_pattern = r"^\d{2}:\d{2}:\d{2}\s+" 

134 if re.match(time_pattern, clean_message): 

135 # Remove the structlog timestamp since we're adding our own 

136 clean_message = re.sub(time_pattern, "", clean_message) 

137 

138 # Format based on log level 

139 if record.levelno == logging.INFO: 

140 # For INFO level, use a clean format: timestamp | message 

141 return f"{timestamp} | {clean_message}" 

142 else: 

143 # For other levels, include the level: timestamp | [LEVEL] message 

144 return f"{timestamp} | [{level}] {clean_message}" 

145 

146 

147class CustomConsoleRenderer: 

148 """Custom console renderer that formats timestamp and level correctly.""" 

149 

150 def __init__(self, colors=True): 

151 self.colors = colors 

152 self._console_renderer = structlog.dev.ConsoleRenderer(colors=colors) 

153 # ANSI color codes 

154 self.gray = "\033[90m" if colors else "" 

155 self.green = "\033[92m" if colors else "" # Bright green for INFO 

156 self.yellow = "\033[93m" if colors else "" # Bright yellow for WARNING 

157 self.red = "\033[91m" if colors else "" # Bright red for ERROR 

158 self.magenta = "\033[95m" if colors else "" # Bright magenta for CRITICAL 

159 self.cyan = "\033[96m" if colors else "" # Bright cyan for DEBUG 

160 self.reset = "\033[0m" if colors else "" 

161 

162 def _get_level_color(self, level): 

163 """Get the appropriate color for a log level.""" 

164 level_colors = { 

165 "DEBUG": self.cyan, 

166 "INFO": self.green, # Green for INFO 

167 "WARNING": self.yellow, 

168 "ERROR": self.red, 

169 "CRITICAL": self.magenta, 

170 } 

171 return level_colors.get(level, "") 

172 

173 def __call__(self, logger, method_name, event_dict): 

174 # Extract timestamp if present 

175 timestamp = event_dict.pop("timestamp", None) 

176 

177 # Get the level from method_name 

178 level = method_name.upper() 

179 

180 # Use the default console renderer to format the rest 

181 formatted = self._console_renderer(logger, method_name, event_dict) 

182 

183 # If we have a timestamp 

184 if timestamp and isinstance(timestamp, str) and len(timestamp) >= 8: 

185 time_part = timestamp[:8] # Get HH:MM:SS part 

186 

187 # Remove the timestamp from the formatted message if it's there 

188 if formatted.startswith(time_part): 

189 formatted = formatted[len(time_part) :].lstrip() 

190 

191 # Add gray color to timestamp 

192 colored_timestamp = f"{self.gray}{time_part}{self.reset}" 

193 

194 # Get colored level for all levels including INFO 

195 level_color = self._get_level_color(level) 

196 colored_level = ( 

197 f"{level_color}[{level}]{self.reset}" if level_color else f"[{level}]" 

198 ) 

199 

200 # Show timestamp, colored level, and message for all levels 

201 return f"{colored_timestamp} {colored_level} {formatted}" 

202 

203 # Fallback if no timestamp 

204 level_color = self._get_level_color(level) 

205 colored_level = ( 

206 f"{level_color}[{level}]{self.reset}" if level_color else f"[{level}]" 

207 ) 

208 return f"{colored_level} {formatted}" 

209 

210 

211class LoggingConfig: 

212 """Centralized logging configuration.""" 

213 

214 _initialized = False 

215 _current_config = None 

216 

217 @classmethod 

218 def setup( 

219 cls, 

220 level: str = "INFO", 

221 format: str = "console", 

222 file: str | None = None, 

223 suppress_qdrant_warnings: bool = True, 

224 clean_output: bool = True, 

225 ) -> None: 

226 """Setup logging configuration. 

227 

228 Args: 

229 level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) 

230 format: Log format (json or console) 

231 file: Path to log file (optional) 

232 suppress_qdrant_warnings: Whether to suppress Qdrant version check warnings 

233 clean_output: Whether to use clean, less verbose output 

234 """ 

235 try: 

236 # Convert string level to logging level 

237 numeric_level = getattr(logging, level.upper()) 

238 except AttributeError: 

239 raise ValueError(f"Invalid log level: {level}") from None 

240 

241 # Reset logging configuration 

242 logging.getLogger().handlers = [] 

243 structlog.reset_defaults() 

244 

245 # Create a list of handlers 

246 handlers = [] 

247 

248 # Add console handler 

249 console_handler = logging.StreamHandler() 

250 

251 if clean_output and format == "console": 

252 # Use clean formatter for console output 

253 console_handler.setFormatter(CleanFormatter(use_custom_renderer=True)) 

254 else: 

255 console_handler.setFormatter(logging.Formatter("%(message)s")) 

256 

257 console_handler.addFilter(ApplicationFilter()) # Only show our application logs 

258 

259 if clean_output: 

260 console_handler.addFilter(VerbosityFilter()) # Reduce verbosity 

261 

262 handlers.append(console_handler) 

263 

264 # Add file handler if file is configured 

265 if file: 

266 file_handler = CleanFileHandler(file) 

267 file_handler.setFormatter(FileFormatter()) 

268 # Don't apply verbosity filter to file logs - keep everything for debugging 

269 handlers.append(file_handler) 

270 

271 # Configure standard logging 

272 logging.basicConfig( 

273 level=numeric_level, 

274 format="%(message)s", 

275 handlers=handlers, 

276 ) 

277 

278 # Add filter to suppress Qdrant version check warnings 

279 if suppress_qdrant_warnings: 

280 qdrant_logger = logging.getLogger("qdrant_client") 

281 qdrant_logger.addFilter(QdrantVersionFilter()) 

282 

283 # Configure structlog processors based on format and clean_output 

284 if clean_output and format == "console": 

285 # Minimal processors for clean output 

286 processors = [ 

287 structlog.stdlib.filter_by_level, 

288 structlog.processors.TimeStamper(fmt="%H:%M:%S"), 

289 CustomConsoleRenderer(colors=True), 

290 ] 

291 else: 

292 # Full processors for detailed output 

293 processors = [ 

294 structlog.stdlib.filter_by_level, 

295 structlog.stdlib.add_logger_name, 

296 structlog.stdlib.add_log_level, 

297 structlog.processors.TimeStamper(fmt="iso"), 

298 structlog.processors.StackInfoRenderer(), 

299 structlog.processors.UnicodeDecoder(), 

300 structlog.processors.CallsiteParameterAdder( 

301 [ 

302 structlog.processors.CallsiteParameter.FILENAME, 

303 structlog.processors.CallsiteParameter.FUNC_NAME, 

304 structlog.processors.CallsiteParameter.LINENO, 

305 ] 

306 ), 

307 ] 

308 

309 if format == "json": 

310 processors.append(structlog.processors.JSONRenderer()) 

311 else: 

312 processors.append(structlog.dev.ConsoleRenderer(colors=True)) 

313 

314 # Configure structlog 

315 structlog.configure( 

316 processors=processors, 

317 wrapper_class=structlog.make_filtering_bound_logger(numeric_level), 

318 logger_factory=structlog.stdlib.LoggerFactory(), 

319 cache_logger_on_first_use=False, # Disable caching to ensure new configuration is used 

320 ) 

321 

322 cls._initialized = True 

323 cls._current_config = ( 

324 level, 

325 format, 

326 file, 

327 suppress_qdrant_warnings, 

328 clean_output, 

329 ) 

330 

331 @classmethod 

332 def get_logger(cls, name: str | None = None) -> structlog.BoundLogger: 

333 """Get a logger instance. 

334 

335 Args: 

336 name: Logger name. If None, will use the calling module's name. 

337 

338 Returns: 

339 structlog.BoundLogger: Logger instance 

340 """ 

341 if not cls._initialized: 

342 # Initialize with default settings if not already initialized 

343 cls.setup() 

344 return structlog.get_logger(name)