Coverage for src/qdrant_loader/cli/cli.py: 55%

278 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:50 +0000

1"""CLI module for QDrant Loader.""" 

2 

3import asyncio 

4import json 

5import os 

6import signal 

7from pathlib import Path 

8 

9import click 

10import tomli 

11from click.decorators import group, option 

12from click.exceptions import ClickException 

13from click.types import Choice 

14from click.types import Path as ClickPath 

15from click.utils import echo 

16 

17from qdrant_loader.cli.asyncio import async_command 

18 

19# Minimal imports at startup - everything else is lazy loaded 

20logger = None # Will be initialized when needed 

21 

22 

23def _get_logger(): 

24 """Get logger with lazy import.""" 

25 global logger 

26 if logger is None: 

27 from qdrant_loader.utils.logging import LoggingConfig 

28 

29 logger = LoggingConfig.get_logger(__name__) 

30 return logger 

31 

32 

33def _get_version() -> str: 

34 """Get version using importlib.metadata.""" 

35 try: 

36 from importlib.metadata import version 

37 

38 return version("qdrant-loader") 

39 except ImportError: 

40 # Fallback for older Python versions 

41 return "unknown" 

42 except Exception: 

43 # Fallback if package not found or other error 

44 return "unknown" 

45 

46 

47def _check_for_updates(): 

48 """Check for version updates in the background.""" 

49 try: 

50 # Lazy import to avoid slow startup 

51 from qdrant_loader.utils.version_check import check_version_async 

52 

53 current_version = _get_version() 

54 check_version_async(current_version, silent=False) 

55 except Exception: 

56 # Silently fail if version check doesn't work 

57 pass 

58 

59 

60@group(name="qdrant-loader") 

61@option( 

62 "--log-level", 

63 type=Choice( 

64 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False 

65 ), 

66 default="INFO", 

67 help="Set the logging level.", 

68) 

69@click.version_option( 

70 version=_get_version(), 

71 message="qDrant Loader v.%(version)s", 

72) 

73def cli(log_level: str = "INFO") -> None: 

74 """QDrant Loader CLI.""" 

75 # Initialize basic logging first 

76 _setup_logging(log_level) 

77 

78 # Check for updates in background (non-blocking) 

79 _check_for_updates() 

80 

81 

82def _setup_logging(log_level: str, workspace_config=None) -> None: 

83 """Setup logging configuration with workspace support. 

84 

85 Args: 

86 log_level: Logging level 

87 workspace_config: Optional workspace configuration for custom log path 

88 """ 

89 try: 

90 # Lazy import to avoid slow startup 

91 from qdrant_loader.utils.logging import LoggingConfig 

92 

93 # Get logging configuration from settings if available 

94 log_format = "console" 

95 

96 # Use workspace log path if available, otherwise default 

97 if workspace_config: 

98 log_file = str(workspace_config.logs_path) 

99 else: 

100 log_file = "qdrant-loader.log" 

101 

102 # Reconfigure logging with the provided configuration 

103 LoggingConfig.setup( 

104 level=log_level, 

105 format=log_format, 

106 file=log_file, 

107 ) 

108 

109 # Update the global logger with new configuration 

110 global logger 

111 logger = LoggingConfig.get_logger(__name__) 

112 

113 except Exception as e: 

114 raise ClickException(f"Failed to setup logging: {str(e)!s}") from e 

115 

116 

117def _setup_workspace(workspace_path: Path): 

118 """Setup and validate workspace configuration. 

119 

120 Args: 

121 workspace_path: Path to the workspace directory 

122 

123 Returns: 

124 WorkspaceConfig: Validated workspace configuration 

125 

126 Raises: 

127 ClickException: If workspace setup fails 

128 """ 

129 try: 

130 # Lazy import to avoid slow startup 

131 from qdrant_loader.config.workspace import ( 

132 WorkspaceConfig, 

133 setup_workspace, 

134 create_workspace_structure, 

135 ) 

136 

137 # Create workspace structure if needed 

138 create_workspace_structure(workspace_path) 

139 

140 # Setup and validate workspace 

141 workspace_config = setup_workspace(workspace_path) 

142 

143 # Use the global logger (now properly initialized) 

144 logger = _get_logger() 

145 logger.info("Using workspace", workspace=str(workspace_config.workspace_path)) 

146 if workspace_config.env_path: 

147 logger.info( 

148 "Environment file found", env_path=str(workspace_config.env_path) 

149 ) 

150 

151 if workspace_config.config_path: 

152 logger.info( 

153 "Config file found", config_path=str(workspace_config.config_path) 

154 ) 

155 

156 return workspace_config 

157 

158 except ValueError as e: 

159 raise ClickException(str(e)) from e 

160 except Exception as e: 

161 raise ClickException(f"Failed to setup workspace: {str(e)!s}") from e 

162 

163 

164def _load_config_with_workspace( 

165 workspace_config=None, 

166 config_path: Path | None = None, 

167 env_path: Path | None = None, 

168 skip_validation: bool = False, 

169) -> None: 

170 """Load configuration with workspace or traditional mode. 

171 

172 Args: 

173 workspace_config: Optional workspace configuration 

174 config_path: Optional path to config file (traditional mode) 

175 env_path: Optional path to .env file (traditional mode) 

176 skip_validation: If True, skip directory validation and creation 

177 """ 

178 try: 

179 # Lazy import to avoid slow startup 

180 from qdrant_loader.config import ( 

181 initialize_config, 

182 initialize_config_with_workspace, 

183 ) 

184 

185 if workspace_config: 

186 # Workspace mode 

187 _get_logger().debug("Loading configuration in workspace mode") 

188 initialize_config_with_workspace( 

189 workspace_config, skip_validation=skip_validation 

190 ) 

191 else: 

192 # Traditional mode 

193 _get_logger().debug("Loading configuration in traditional mode") 

194 _load_config(config_path, env_path, skip_validation) 

195 

196 except Exception as e: 

197 _get_logger().error("config_load_failed", error=str(e)) 

198 raise ClickException(f"Failed to load configuration: {str(e)!s}") from e 

199 

200 

201def _create_database_directory(path: Path) -> bool: 

202 """Create database directory with user confirmation. 

203 

204 Args: 

205 path: Path to the database directory 

206 

207 Returns: 

208 bool: True if directory was created, False if user declined 

209 """ 

210 try: 

211 _get_logger().info( 

212 "The database directory does not exist", path=str(path.absolute()) 

213 ) 

214 if click.confirm("Would you like to create this directory?", default=True): 

215 path.mkdir(parents=True, mode=0o755) 

216 _get_logger().info(f"Created directory: {path.absolute()}") 

217 return True 

218 return False 

219 except Exception as e: 

220 raise ClickException(f"Failed to create directory: {str(e)!s}") from e 

221 

222 

223def _load_config( 

224 config_path: Path | None = None, 

225 env_path: Path | None = None, 

226 skip_validation: bool = False, 

227) -> None: 

228 """Load configuration from file. 

229 

230 Args: 

231 config_path: Optional path to config file 

232 env_path: Optional path to .env file 

233 skip_validation: If True, skip directory validation and creation 

234 """ 

235 try: 

236 # Lazy import to avoid slow startup 

237 from qdrant_loader.config import initialize_config 

238 

239 # Step 1: If config path is provided, use it 

240 if config_path is not None: 

241 if not config_path.exists(): 

242 _get_logger().error("config_not_found", path=str(config_path)) 

243 raise ClickException(f"Config file not found: {str(config_path)!s}") 

244 initialize_config(config_path, env_path, skip_validation=skip_validation) 

245 return 

246 

247 # Step 2: If no config path, look for config.yaml in current folder 

248 default_config = Path("config.yaml") 

249 if default_config.exists(): 

250 initialize_config(default_config, env_path, skip_validation=skip_validation) 

251 return 

252 

253 # Step 4: If no file is found, raise an error 

254 raise ClickException( 

255 f"No config file found. Please specify a config file or create config.yaml in the current directory: {str(default_config)!s}" 

256 ) 

257 

258 except Exception as e: 

259 # Handle DatabaseDirectoryError and other exceptions 

260 from qdrant_loader.config.state import DatabaseDirectoryError 

261 

262 if isinstance(e, DatabaseDirectoryError): 

263 if skip_validation: 

264 # For config display, we don't need to create the directory 

265 return 

266 

267 # Get the path from the error and expand it properly 

268 path = Path(os.path.expanduser(str(e.path))) 

269 if not _create_database_directory(path): 

270 raise ClickException( 

271 "Database directory creation declined. Exiting." 

272 ) from e 

273 

274 # No need to retry _load_config since the directory is now created 

275 # Just initialize the config with the expanded path 

276 if config_path is not None: 

277 initialize_config( 

278 config_path, env_path, skip_validation=skip_validation 

279 ) 

280 else: 

281 initialize_config( 

282 Path("config.yaml"), env_path, skip_validation=skip_validation 

283 ) 

284 elif isinstance(e, ClickException): 

285 raise e from None 

286 else: 

287 _get_logger().error("config_load_failed", error=str(e)) 

288 raise ClickException(f"Failed to load configuration: {str(e)!s}") from e 

289 

290 

291def _check_settings(): 

292 """Check if settings are available.""" 

293 # Lazy import to avoid slow startup 

294 from qdrant_loader.config import get_settings 

295 

296 settings = get_settings() 

297 if settings is None: 

298 _get_logger().error("settings_not_available") 

299 raise ClickException("Settings not available") 

300 return settings 

301 

302 

303async def _run_init(settings, force: bool) -> None: 

304 """Run initialization process.""" 

305 try: 

306 # Lazy import to avoid slow startup 

307 from qdrant_loader.core.init_collection import init_collection 

308 

309 result = await init_collection(settings, force) 

310 if not result: 

311 raise ClickException("Failed to initialize collection") 

312 

313 # Provide user-friendly feedback 

314 if force: 

315 _get_logger().info( 

316 "Collection recreated successfully", 

317 collection=settings.qdrant_collection_name, 

318 ) 

319 else: 

320 _get_logger().info( 

321 "Collection initialized successfully", 

322 collection=settings.qdrant_collection_name, 

323 ) 

324 

325 except Exception as e: 

326 _get_logger().error("init_failed", error=str(e)) 

327 raise ClickException(f"Failed to initialize collection: {str(e)!s}") from e 

328 

329 

330@cli.command() 

331@option( 

332 "--workspace", 

333 type=ClickPath(path_type=Path), 

334 help="Workspace directory containing config.yaml and .env files. All output will be stored here.", 

335) 

336@option( 

337 "--config", type=ClickPath(exists=True, path_type=Path), help="Path to config file." 

338) 

339@option("--env", type=ClickPath(exists=True, path_type=Path), help="Path to .env file.") 

340@option("--force", is_flag=True, help="Force reinitialization of collection.") 

341@option( 

342 "--log-level", 

343 type=Choice( 

344 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False 

345 ), 

346 default="INFO", 

347 help="Set the logging level.", 

348) 

349@async_command 

350async def init( 

351 workspace: Path | None, 

352 config: Path | None, 

353 env: Path | None, 

354 force: bool, 

355 log_level: str, 

356): 

357 """Initialize QDrant collection.""" 

358 try: 

359 # Lazy import to avoid slow startup 

360 from qdrant_loader.config.workspace import validate_workspace_flags 

361 

362 # Validate flag combinations 

363 validate_workspace_flags(workspace, config, env) 

364 

365 # Setup workspace if provided 

366 workspace_config = None 

367 if workspace: 

368 workspace_config = _setup_workspace(workspace) 

369 

370 # Setup logging with workspace support 

371 _setup_logging(log_level, workspace_config) 

372 

373 # Load configuration 

374 _load_config_with_workspace(workspace_config, config, env) 

375 settings = _check_settings() 

376 

377 # Delete and recreate the database file if it exists 

378 db_path = settings.global_config.state_management.database_path 

379 if db_path != ":memory:": 

380 # Ensure the directory exists 

381 db_dir = Path(db_path).parent 

382 if not db_dir.exists(): 

383 if not _create_database_directory(db_dir): 

384 raise ClickException( 

385 "Database directory creation declined. Exiting." 

386 ) 

387 

388 # Delete the database file if it exists and force is True 

389 if os.path.exists(db_path) and force: 

390 _get_logger().info("Resetting state database", database_path=db_path) 

391 os.remove(db_path) 

392 _get_logger().info( 

393 "State database reset completed", database_path=db_path 

394 ) 

395 elif force: 

396 _get_logger().info( 

397 "State database reset skipped (no existing database)", 

398 database_path=db_path, 

399 ) 

400 

401 await _run_init(settings, force) 

402 

403 except ClickException as e: 

404 from qdrant_loader.utils.logging import LoggingConfig 

405 

406 LoggingConfig.get_logger(__name__).error("init_failed", error=str(e)) 

407 raise e from None 

408 except Exception as e: 

409 from qdrant_loader.utils.logging import LoggingConfig 

410 

411 LoggingConfig.get_logger(__name__).error("init_failed", error=str(e)) 

412 raise ClickException(f"Failed to initialize collection: {str(e)!s}") from e 

413 

414 

415async def _cancel_all_tasks(): 

416 tasks = [t for t in asyncio.all_tasks() if not t.done()] 

417 for task in tasks: 

418 task.cancel() 

419 await asyncio.gather(*tasks, return_exceptions=True) 

420 

421 

422@cli.command() 

423@option( 

424 "--workspace", 

425 type=ClickPath(path_type=Path), 

426 help="Workspace directory containing config.yaml and .env files. All output will be stored here.", 

427) 

428@option( 

429 "--config", type=ClickPath(exists=True, path_type=Path), help="Path to config file." 

430) 

431@option("--env", type=ClickPath(exists=True, path_type=Path), help="Path to .env file.") 

432@option( 

433 "--project", 

434 type=str, 

435 help="Project ID to process. If specified, --source-type and --source will filter within this project.", 

436) 

437@option( 

438 "--source-type", 

439 type=str, 

440 help="Source type to process (e.g., confluence, jira, git). If --project is specified, filters within that project; otherwise applies to all projects.", 

441) 

442@option( 

443 "--source", 

444 type=str, 

445 help="Source name to process. If --project is specified, filters within that project; otherwise applies to all projects.", 

446) 

447@option( 

448 "--log-level", 

449 type=Choice( 

450 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False 

451 ), 

452 default="INFO", 

453 help="Set the logging level.", 

454) 

455@option( 

456 "--profile/--no-profile", 

457 default=False, 

458 help="Run the ingestion under cProfile and save output to 'profile.out' (for performance analysis).", 

459) 

460@async_command 

461async def ingest( 

462 workspace: Path | None, 

463 config: Path | None, 

464 env: Path | None, 

465 project: str | None, 

466 source_type: str | None, 

467 source: str | None, 

468 log_level: str, 

469 profile: bool, 

470): 

471 """Ingest documents from configured sources. 

472 

473 Examples: 

474 # Ingest all projects 

475 qdrant-loader ingest 

476 

477 # Ingest specific project 

478 qdrant-loader ingest --project my-project 

479 

480 # Ingest specific source type from all projects 

481 qdrant-loader ingest --source-type git 

482 

483 # Ingest specific source type from specific project 

484 qdrant-loader ingest --project my-project --source-type git 

485 

486 # Ingest specific source from specific project 

487 qdrant-loader ingest --project my-project --source-type git --source my-repo 

488 """ 

489 try: 

490 # Lazy import to avoid slow startup 

491 from qdrant_loader.config.workspace import validate_workspace_flags 

492 from qdrant_loader.utils.logging import LoggingConfig 

493 

494 # Validate flag combinations 

495 validate_workspace_flags(workspace, config, env) 

496 

497 # Setup workspace if provided 

498 workspace_config = None 

499 if workspace: 

500 workspace_config = _setup_workspace(workspace) 

501 

502 # Setup logging with workspace support 

503 _setup_logging(log_level, workspace_config) 

504 

505 # Load configuration 

506 _load_config_with_workspace(workspace_config, config, env) 

507 settings = _check_settings() 

508 

509 # Lazy import to avoid slow startup 

510 from qdrant_loader.core.qdrant_manager import QdrantManager 

511 

512 qdrant_manager = QdrantManager(settings) 

513 

514 async def run_ingest(): 

515 # Lazy import to avoid slow startup 

516 from qdrant_loader.core.async_ingestion_pipeline import ( 

517 AsyncIngestionPipeline, 

518 ) 

519 

520 # Create pipeline with workspace-aware metrics path 

521 if workspace_config: 

522 pipeline = AsyncIngestionPipeline( 

523 settings, qdrant_manager, metrics_dir=workspace_config.metrics_path 

524 ) 

525 else: 

526 pipeline = AsyncIngestionPipeline(settings, qdrant_manager) 

527 

528 try: 

529 await pipeline.process_documents( 

530 project_id=project, 

531 source_type=source_type, 

532 source=source, 

533 ) 

534 finally: 

535 # Ensure proper cleanup of the async pipeline 

536 await pipeline.cleanup() 

537 

538 loop = asyncio.get_running_loop() 

539 stop_event = asyncio.Event() 

540 

541 def _handle_sigint(): 

542 logger = LoggingConfig.get_logger(__name__) 

543 logger.debug(" SIGINT received, cancelling all tasks...") 

544 stop_event.set() 

545 

546 loop.add_signal_handler(signal.SIGINT, _handle_sigint) 

547 

548 try: 

549 if profile: 

550 import cProfile 

551 

552 profiler = cProfile.Profile() 

553 profiler.enable() 

554 try: 

555 await run_ingest() 

556 finally: 

557 profiler.disable() 

558 profiler.dump_stats("profile.out") 

559 LoggingConfig.get_logger(__name__).info( 

560 "Profile saved to profile.out" 

561 ) 

562 else: 

563 await run_ingest() 

564 logger = LoggingConfig.get_logger(__name__) 

565 logger.info("Pipeline finished, awaiting cleanup.") 

566 # Wait for all pending tasks 

567 pending = [ 

568 t 

569 for t in asyncio.all_tasks() 

570 if t is not asyncio.current_task() and not t.done() 

571 ] 

572 if pending: 

573 logger.debug(f" Awaiting {len(pending)} pending tasks before exit...") 

574 await asyncio.gather(*pending, return_exceptions=True) 

575 await asyncio.sleep(0.1) 

576 except Exception as e: 

577 logger = LoggingConfig.get_logger(__name__) 

578 logger.error(f" Exception in ingest: {e}") 

579 raise 

580 finally: 

581 if stop_event.is_set(): 

582 await _cancel_all_tasks() 

583 logger = LoggingConfig.get_logger(__name__) 

584 logger.debug(" All tasks cancelled, exiting after SIGINT.") 

585 

586 except ClickException as e: 

587 LoggingConfig.get_logger(__name__).error("ingest_failed", error=str(e)) 

588 raise e from None 

589 except Exception as e: 

590 LoggingConfig.get_logger(__name__).error("ingest_failed", error=str(e)) 

591 raise ClickException(f"Failed to run ingestion: {str(e)!s}") from e 

592 

593 

594@cli.command() 

595@option( 

596 "--workspace", 

597 type=ClickPath(path_type=Path), 

598 help="Workspace directory containing config.yaml and .env files. All output will be stored here.", 

599) 

600@option( 

601 "--log-level", 

602 type=Choice( 

603 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False 

604 ), 

605 default="INFO", 

606 help="Set the logging level.", 

607) 

608@option( 

609 "--config", type=ClickPath(exists=True, path_type=Path), help="Path to config file." 

610) 

611@option("--env", type=ClickPath(exists=True, path_type=Path), help="Path to .env file.") 

612def config( 

613 workspace: Path | None, log_level: str, config: Path | None, env: Path | None 

614): 

615 """Display current configuration.""" 

616 try: 

617 # Lazy import to avoid slow startup 

618 from qdrant_loader.config.workspace import validate_workspace_flags 

619 from qdrant_loader.utils.logging import LoggingConfig 

620 

621 # Validate flag combinations 

622 validate_workspace_flags(workspace, config, env) 

623 

624 # Setup workspace if provided 

625 workspace_config = None 

626 if workspace: 

627 workspace_config = _setup_workspace(workspace) 

628 

629 # Setup logging with workspace support 

630 _setup_logging(log_level, workspace_config) 

631 

632 # Load configuration 

633 _load_config_with_workspace(workspace_config, config, env, skip_validation=True) 

634 settings = _check_settings() 

635 

636 # Display configuration 

637 echo("Current Configuration:") 

638 echo(json.dumps(settings.model_dump(mode="json"), indent=2)) 

639 

640 except Exception as e: 

641 LoggingConfig.get_logger(__name__).error("config_failed", error=str(e)) 

642 raise ClickException(f"Failed to display configuration: {str(e)!s}") from e 

643 

644 

645# Add project management commands with lazy import 

646def _add_project_commands(): 

647 """Lazily add project commands to avoid slow startup.""" 

648 from qdrant_loader.cli.project_commands import project_cli 

649 

650 cli.add_command(project_cli) 

651 

652 

653# Only add project commands when CLI is actually used 

654if __name__ == "__main__": 

655 _add_project_commands() 

656 cli() 

657else: 

658 # For when imported as a module, add commands on first access 

659 import atexit 

660 

661 atexit.register(_add_project_commands)