Coverage for src/qdrant_loader/cli/cli.py: 55%
278 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""CLI module for QDrant Loader."""
3import asyncio
4import json
5import os
6import signal
7from pathlib import Path
9import click
10import tomli
11from click.decorators import group, option
12from click.exceptions import ClickException
13from click.types import Choice
14from click.types import Path as ClickPath
15from click.utils import echo
17from qdrant_loader.cli.asyncio import async_command
19# Minimal imports at startup - everything else is lazy loaded
20logger = None # Will be initialized when needed
23def _get_logger():
24 """Get logger with lazy import."""
25 global logger
26 if logger is None:
27 from qdrant_loader.utils.logging import LoggingConfig
29 logger = LoggingConfig.get_logger(__name__)
30 return logger
33def _get_version() -> str:
34 """Get version using importlib.metadata."""
35 try:
36 from importlib.metadata import version
38 return version("qdrant-loader")
39 except ImportError:
40 # Fallback for older Python versions
41 return "unknown"
42 except Exception:
43 # Fallback if package not found or other error
44 return "unknown"
47def _check_for_updates():
48 """Check for version updates in the background."""
49 try:
50 # Lazy import to avoid slow startup
51 from qdrant_loader.utils.version_check import check_version_async
53 current_version = _get_version()
54 check_version_async(current_version, silent=False)
55 except Exception:
56 # Silently fail if version check doesn't work
57 pass
60@group(name="qdrant-loader")
61@option(
62 "--log-level",
63 type=Choice(
64 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False
65 ),
66 default="INFO",
67 help="Set the logging level.",
68)
69@click.version_option(
70 version=_get_version(),
71 message="qDrant Loader v.%(version)s",
72)
73def cli(log_level: str = "INFO") -> None:
74 """QDrant Loader CLI."""
75 # Initialize basic logging first
76 _setup_logging(log_level)
78 # Check for updates in background (non-blocking)
79 _check_for_updates()
82def _setup_logging(log_level: str, workspace_config=None) -> None:
83 """Setup logging configuration with workspace support.
85 Args:
86 log_level: Logging level
87 workspace_config: Optional workspace configuration for custom log path
88 """
89 try:
90 # Lazy import to avoid slow startup
91 from qdrant_loader.utils.logging import LoggingConfig
93 # Get logging configuration from settings if available
94 log_format = "console"
96 # Use workspace log path if available, otherwise default
97 if workspace_config:
98 log_file = str(workspace_config.logs_path)
99 else:
100 log_file = "qdrant-loader.log"
102 # Reconfigure logging with the provided configuration
103 LoggingConfig.setup(
104 level=log_level,
105 format=log_format,
106 file=log_file,
107 )
109 # Update the global logger with new configuration
110 global logger
111 logger = LoggingConfig.get_logger(__name__)
113 except Exception as e:
114 raise ClickException(f"Failed to setup logging: {str(e)!s}") from e
117def _setup_workspace(workspace_path: Path):
118 """Setup and validate workspace configuration.
120 Args:
121 workspace_path: Path to the workspace directory
123 Returns:
124 WorkspaceConfig: Validated workspace configuration
126 Raises:
127 ClickException: If workspace setup fails
128 """
129 try:
130 # Lazy import to avoid slow startup
131 from qdrant_loader.config.workspace import (
132 WorkspaceConfig,
133 setup_workspace,
134 create_workspace_structure,
135 )
137 # Create workspace structure if needed
138 create_workspace_structure(workspace_path)
140 # Setup and validate workspace
141 workspace_config = setup_workspace(workspace_path)
143 # Use the global logger (now properly initialized)
144 logger = _get_logger()
145 logger.info("Using workspace", workspace=str(workspace_config.workspace_path))
146 if workspace_config.env_path:
147 logger.info(
148 "Environment file found", env_path=str(workspace_config.env_path)
149 )
151 if workspace_config.config_path:
152 logger.info(
153 "Config file found", config_path=str(workspace_config.config_path)
154 )
156 return workspace_config
158 except ValueError as e:
159 raise ClickException(str(e)) from e
160 except Exception as e:
161 raise ClickException(f"Failed to setup workspace: {str(e)!s}") from e
164def _load_config_with_workspace(
165 workspace_config=None,
166 config_path: Path | None = None,
167 env_path: Path | None = None,
168 skip_validation: bool = False,
169) -> None:
170 """Load configuration with workspace or traditional mode.
172 Args:
173 workspace_config: Optional workspace configuration
174 config_path: Optional path to config file (traditional mode)
175 env_path: Optional path to .env file (traditional mode)
176 skip_validation: If True, skip directory validation and creation
177 """
178 try:
179 # Lazy import to avoid slow startup
180 from qdrant_loader.config import (
181 initialize_config,
182 initialize_config_with_workspace,
183 )
185 if workspace_config:
186 # Workspace mode
187 _get_logger().debug("Loading configuration in workspace mode")
188 initialize_config_with_workspace(
189 workspace_config, skip_validation=skip_validation
190 )
191 else:
192 # Traditional mode
193 _get_logger().debug("Loading configuration in traditional mode")
194 _load_config(config_path, env_path, skip_validation)
196 except Exception as e:
197 _get_logger().error("config_load_failed", error=str(e))
198 raise ClickException(f"Failed to load configuration: {str(e)!s}") from e
201def _create_database_directory(path: Path) -> bool:
202 """Create database directory with user confirmation.
204 Args:
205 path: Path to the database directory
207 Returns:
208 bool: True if directory was created, False if user declined
209 """
210 try:
211 _get_logger().info(
212 "The database directory does not exist", path=str(path.absolute())
213 )
214 if click.confirm("Would you like to create this directory?", default=True):
215 path.mkdir(parents=True, mode=0o755)
216 _get_logger().info(f"Created directory: {path.absolute()}")
217 return True
218 return False
219 except Exception as e:
220 raise ClickException(f"Failed to create directory: {str(e)!s}") from e
223def _load_config(
224 config_path: Path | None = None,
225 env_path: Path | None = None,
226 skip_validation: bool = False,
227) -> None:
228 """Load configuration from file.
230 Args:
231 config_path: Optional path to config file
232 env_path: Optional path to .env file
233 skip_validation: If True, skip directory validation and creation
234 """
235 try:
236 # Lazy import to avoid slow startup
237 from qdrant_loader.config import initialize_config
239 # Step 1: If config path is provided, use it
240 if config_path is not None:
241 if not config_path.exists():
242 _get_logger().error("config_not_found", path=str(config_path))
243 raise ClickException(f"Config file not found: {str(config_path)!s}")
244 initialize_config(config_path, env_path, skip_validation=skip_validation)
245 return
247 # Step 2: If no config path, look for config.yaml in current folder
248 default_config = Path("config.yaml")
249 if default_config.exists():
250 initialize_config(default_config, env_path, skip_validation=skip_validation)
251 return
253 # Step 4: If no file is found, raise an error
254 raise ClickException(
255 f"No config file found. Please specify a config file or create config.yaml in the current directory: {str(default_config)!s}"
256 )
258 except Exception as e:
259 # Handle DatabaseDirectoryError and other exceptions
260 from qdrant_loader.config.state import DatabaseDirectoryError
262 if isinstance(e, DatabaseDirectoryError):
263 if skip_validation:
264 # For config display, we don't need to create the directory
265 return
267 # Get the path from the error and expand it properly
268 path = Path(os.path.expanduser(str(e.path)))
269 if not _create_database_directory(path):
270 raise ClickException(
271 "Database directory creation declined. Exiting."
272 ) from e
274 # No need to retry _load_config since the directory is now created
275 # Just initialize the config with the expanded path
276 if config_path is not None:
277 initialize_config(
278 config_path, env_path, skip_validation=skip_validation
279 )
280 else:
281 initialize_config(
282 Path("config.yaml"), env_path, skip_validation=skip_validation
283 )
284 elif isinstance(e, ClickException):
285 raise e from None
286 else:
287 _get_logger().error("config_load_failed", error=str(e))
288 raise ClickException(f"Failed to load configuration: {str(e)!s}") from e
291def _check_settings():
292 """Check if settings are available."""
293 # Lazy import to avoid slow startup
294 from qdrant_loader.config import get_settings
296 settings = get_settings()
297 if settings is None:
298 _get_logger().error("settings_not_available")
299 raise ClickException("Settings not available")
300 return settings
303async def _run_init(settings, force: bool) -> None:
304 """Run initialization process."""
305 try:
306 # Lazy import to avoid slow startup
307 from qdrant_loader.core.init_collection import init_collection
309 result = await init_collection(settings, force)
310 if not result:
311 raise ClickException("Failed to initialize collection")
313 # Provide user-friendly feedback
314 if force:
315 _get_logger().info(
316 "Collection recreated successfully",
317 collection=settings.qdrant_collection_name,
318 )
319 else:
320 _get_logger().info(
321 "Collection initialized successfully",
322 collection=settings.qdrant_collection_name,
323 )
325 except Exception as e:
326 _get_logger().error("init_failed", error=str(e))
327 raise ClickException(f"Failed to initialize collection: {str(e)!s}") from e
330@cli.command()
331@option(
332 "--workspace",
333 type=ClickPath(path_type=Path),
334 help="Workspace directory containing config.yaml and .env files. All output will be stored here.",
335)
336@option(
337 "--config", type=ClickPath(exists=True, path_type=Path), help="Path to config file."
338)
339@option("--env", type=ClickPath(exists=True, path_type=Path), help="Path to .env file.")
340@option("--force", is_flag=True, help="Force reinitialization of collection.")
341@option(
342 "--log-level",
343 type=Choice(
344 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False
345 ),
346 default="INFO",
347 help="Set the logging level.",
348)
349@async_command
350async def init(
351 workspace: Path | None,
352 config: Path | None,
353 env: Path | None,
354 force: bool,
355 log_level: str,
356):
357 """Initialize QDrant collection."""
358 try:
359 # Lazy import to avoid slow startup
360 from qdrant_loader.config.workspace import validate_workspace_flags
362 # Validate flag combinations
363 validate_workspace_flags(workspace, config, env)
365 # Setup workspace if provided
366 workspace_config = None
367 if workspace:
368 workspace_config = _setup_workspace(workspace)
370 # Setup logging with workspace support
371 _setup_logging(log_level, workspace_config)
373 # Load configuration
374 _load_config_with_workspace(workspace_config, config, env)
375 settings = _check_settings()
377 # Delete and recreate the database file if it exists
378 db_path = settings.global_config.state_management.database_path
379 if db_path != ":memory:":
380 # Ensure the directory exists
381 db_dir = Path(db_path).parent
382 if not db_dir.exists():
383 if not _create_database_directory(db_dir):
384 raise ClickException(
385 "Database directory creation declined. Exiting."
386 )
388 # Delete the database file if it exists and force is True
389 if os.path.exists(db_path) and force:
390 _get_logger().info("Resetting state database", database_path=db_path)
391 os.remove(db_path)
392 _get_logger().info(
393 "State database reset completed", database_path=db_path
394 )
395 elif force:
396 _get_logger().info(
397 "State database reset skipped (no existing database)",
398 database_path=db_path,
399 )
401 await _run_init(settings, force)
403 except ClickException as e:
404 from qdrant_loader.utils.logging import LoggingConfig
406 LoggingConfig.get_logger(__name__).error("init_failed", error=str(e))
407 raise e from None
408 except Exception as e:
409 from qdrant_loader.utils.logging import LoggingConfig
411 LoggingConfig.get_logger(__name__).error("init_failed", error=str(e))
412 raise ClickException(f"Failed to initialize collection: {str(e)!s}") from e
415async def _cancel_all_tasks():
416 tasks = [t for t in asyncio.all_tasks() if not t.done()]
417 for task in tasks:
418 task.cancel()
419 await asyncio.gather(*tasks, return_exceptions=True)
422@cli.command()
423@option(
424 "--workspace",
425 type=ClickPath(path_type=Path),
426 help="Workspace directory containing config.yaml and .env files. All output will be stored here.",
427)
428@option(
429 "--config", type=ClickPath(exists=True, path_type=Path), help="Path to config file."
430)
431@option("--env", type=ClickPath(exists=True, path_type=Path), help="Path to .env file.")
432@option(
433 "--project",
434 type=str,
435 help="Project ID to process. If specified, --source-type and --source will filter within this project.",
436)
437@option(
438 "--source-type",
439 type=str,
440 help="Source type to process (e.g., confluence, jira, git). If --project is specified, filters within that project; otherwise applies to all projects.",
441)
442@option(
443 "--source",
444 type=str,
445 help="Source name to process. If --project is specified, filters within that project; otherwise applies to all projects.",
446)
447@option(
448 "--log-level",
449 type=Choice(
450 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False
451 ),
452 default="INFO",
453 help="Set the logging level.",
454)
455@option(
456 "--profile/--no-profile",
457 default=False,
458 help="Run the ingestion under cProfile and save output to 'profile.out' (for performance analysis).",
459)
460@async_command
461async def ingest(
462 workspace: Path | None,
463 config: Path | None,
464 env: Path | None,
465 project: str | None,
466 source_type: str | None,
467 source: str | None,
468 log_level: str,
469 profile: bool,
470):
471 """Ingest documents from configured sources.
473 Examples:
474 # Ingest all projects
475 qdrant-loader ingest
477 # Ingest specific project
478 qdrant-loader ingest --project my-project
480 # Ingest specific source type from all projects
481 qdrant-loader ingest --source-type git
483 # Ingest specific source type from specific project
484 qdrant-loader ingest --project my-project --source-type git
486 # Ingest specific source from specific project
487 qdrant-loader ingest --project my-project --source-type git --source my-repo
488 """
489 try:
490 # Lazy import to avoid slow startup
491 from qdrant_loader.config.workspace import validate_workspace_flags
492 from qdrant_loader.utils.logging import LoggingConfig
494 # Validate flag combinations
495 validate_workspace_flags(workspace, config, env)
497 # Setup workspace if provided
498 workspace_config = None
499 if workspace:
500 workspace_config = _setup_workspace(workspace)
502 # Setup logging with workspace support
503 _setup_logging(log_level, workspace_config)
505 # Load configuration
506 _load_config_with_workspace(workspace_config, config, env)
507 settings = _check_settings()
509 # Lazy import to avoid slow startup
510 from qdrant_loader.core.qdrant_manager import QdrantManager
512 qdrant_manager = QdrantManager(settings)
514 async def run_ingest():
515 # Lazy import to avoid slow startup
516 from qdrant_loader.core.async_ingestion_pipeline import (
517 AsyncIngestionPipeline,
518 )
520 # Create pipeline with workspace-aware metrics path
521 if workspace_config:
522 pipeline = AsyncIngestionPipeline(
523 settings, qdrant_manager, metrics_dir=workspace_config.metrics_path
524 )
525 else:
526 pipeline = AsyncIngestionPipeline(settings, qdrant_manager)
528 try:
529 await pipeline.process_documents(
530 project_id=project,
531 source_type=source_type,
532 source=source,
533 )
534 finally:
535 # Ensure proper cleanup of the async pipeline
536 await pipeline.cleanup()
538 loop = asyncio.get_running_loop()
539 stop_event = asyncio.Event()
541 def _handle_sigint():
542 logger = LoggingConfig.get_logger(__name__)
543 logger.debug(" SIGINT received, cancelling all tasks...")
544 stop_event.set()
546 loop.add_signal_handler(signal.SIGINT, _handle_sigint)
548 try:
549 if profile:
550 import cProfile
552 profiler = cProfile.Profile()
553 profiler.enable()
554 try:
555 await run_ingest()
556 finally:
557 profiler.disable()
558 profiler.dump_stats("profile.out")
559 LoggingConfig.get_logger(__name__).info(
560 "Profile saved to profile.out"
561 )
562 else:
563 await run_ingest()
564 logger = LoggingConfig.get_logger(__name__)
565 logger.info("Pipeline finished, awaiting cleanup.")
566 # Wait for all pending tasks
567 pending = [
568 t
569 for t in asyncio.all_tasks()
570 if t is not asyncio.current_task() and not t.done()
571 ]
572 if pending:
573 logger.debug(f" Awaiting {len(pending)} pending tasks before exit...")
574 await asyncio.gather(*pending, return_exceptions=True)
575 await asyncio.sleep(0.1)
576 except Exception as e:
577 logger = LoggingConfig.get_logger(__name__)
578 logger.error(f" Exception in ingest: {e}")
579 raise
580 finally:
581 if stop_event.is_set():
582 await _cancel_all_tasks()
583 logger = LoggingConfig.get_logger(__name__)
584 logger.debug(" All tasks cancelled, exiting after SIGINT.")
586 except ClickException as e:
587 LoggingConfig.get_logger(__name__).error("ingest_failed", error=str(e))
588 raise e from None
589 except Exception as e:
590 LoggingConfig.get_logger(__name__).error("ingest_failed", error=str(e))
591 raise ClickException(f"Failed to run ingestion: {str(e)!s}") from e
594@cli.command()
595@option(
596 "--workspace",
597 type=ClickPath(path_type=Path),
598 help="Workspace directory containing config.yaml and .env files. All output will be stored here.",
599)
600@option(
601 "--log-level",
602 type=Choice(
603 ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False
604 ),
605 default="INFO",
606 help="Set the logging level.",
607)
608@option(
609 "--config", type=ClickPath(exists=True, path_type=Path), help="Path to config file."
610)
611@option("--env", type=ClickPath(exists=True, path_type=Path), help="Path to .env file.")
612def config(
613 workspace: Path | None, log_level: str, config: Path | None, env: Path | None
614):
615 """Display current configuration."""
616 try:
617 # Lazy import to avoid slow startup
618 from qdrant_loader.config.workspace import validate_workspace_flags
619 from qdrant_loader.utils.logging import LoggingConfig
621 # Validate flag combinations
622 validate_workspace_flags(workspace, config, env)
624 # Setup workspace if provided
625 workspace_config = None
626 if workspace:
627 workspace_config = _setup_workspace(workspace)
629 # Setup logging with workspace support
630 _setup_logging(log_level, workspace_config)
632 # Load configuration
633 _load_config_with_workspace(workspace_config, config, env, skip_validation=True)
634 settings = _check_settings()
636 # Display configuration
637 echo("Current Configuration:")
638 echo(json.dumps(settings.model_dump(mode="json"), indent=2))
640 except Exception as e:
641 LoggingConfig.get_logger(__name__).error("config_failed", error=str(e))
642 raise ClickException(f"Failed to display configuration: {str(e)!s}") from e
645# Add project management commands with lazy import
646def _add_project_commands():
647 """Lazily add project commands to avoid slow startup."""
648 from qdrant_loader.cli.project_commands import project_cli
650 cli.add_command(project_cli)
653# Only add project commands when CLI is actually used
654if __name__ == "__main__":
655 _add_project_commands()
656 cli()
657else:
658 # For when imported as a module, add commands on first access
659 import atexit
661 atexit.register(_add_project_commands)