Coverage for src / qdrant_loader / cli / commands / setup_cmd.py: 88%
317 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
1"""Interactive setup wizard for qdrant-loader configuration."""
3from __future__ import annotations
5from pathlib import Path
6from typing import Any
8import click
10# Heavy modules (questionary ~1.4s, rich ~0.4s) are lazy-imported via helpers
11# to keep CLI startup fast.
12_console = None
15def _get_console():
16 global _console
17 if _console is None:
18 from rich.console import Console
20 _console = Console()
21 return _console
24SOURCE_TYPES: dict[str, str] = {
25 "git": "Git Repository",
26 "confluence": "Confluence Wiki",
27 "jira": "Jira Issues",
28 "publicdocs": "Public Documentation (website)",
29 "localfile": "Local Files",
30}
32# Returned by every _collect_*_config helper: (yaml-ready dict, extra env vars dict)
33_SourceResult = tuple[dict[str, Any], dict[str, str]]
36SETUP_MODES: dict[str, str] = {
37 "default": "Quick start with localfile source pointing to current directory",
38 "normal": "Interactive wizard with simplified config format",
39 "advanced": "Full control over global settings, multi-project format",
40}
43def run_setup(output_dir: Path | None = None, mode: str | None = None) -> None:
44 """Entry point for the setup command.
46 When *output_dir* is ``None`` (or ``"."``) the user is prompted to choose a
47 workspace folder. When *mode* is ``None`` a TUI mode selector is shown.
49 Args:
50 output_dir: Directory in which the generated files are placed.
51 If ``None``, the user is prompted interactively.
52 mode: One of ``"default"``, ``"normal"``, ``"advanced"`` or ``None``.
53 """
54 # ------------------------------------------------------------------
55 # Step 0: Mode selection (before workspace, so default can skip prompt)
56 # ------------------------------------------------------------------
57 if mode is None:
58 mode = _select_setup_mode()
59 if mode is None:
60 _get_console().print("[yellow]Setup cancelled.[/yellow]")
61 return
63 # ------------------------------------------------------------------
64 # Step 1: Workspace folder
65 # ------------------------------------------------------------------
66 if mode == "default":
67 # Default mode always uses ./workspace, no prompt
68 output_dir = _resolve_workspace(
69 output_dir if output_dir is not None else Path("workspace")
70 )
71 else:
72 output_dir = _resolve_workspace(output_dir)
74 dispatch = {
75 "default": run_setup_default,
76 "normal": run_setup_wizard,
77 "advanced": run_setup_advanced,
78 }
79 try:
80 dispatch[mode](output_dir)
81 except (click.Abort, KeyboardInterrupt):
82 _get_console().print("\n[yellow]Setup cancelled.[/yellow]")
85def _resolve_workspace(output_dir: Path | None) -> Path:
86 """Resolve and prepare the workspace directory.
88 When *output_dir* is explicitly provided, uses it directly.
89 Otherwise prompts the user with ``./workspace`` as the default.
91 Args:
92 output_dir: The value passed via ``--output-dir``, or ``None``.
94 Returns:
95 Resolved :class:`Path` to the workspace directory (created if needed).
96 """
97 # If explicitly provided via --output-dir, use it as-is.
98 if output_dir is not None:
99 resolved = Path(output_dir).resolve()
100 if resolved.exists() and not resolved.is_dir():
101 raise click.BadParameter(
102 f"'{resolved}' exists but is not a directory.",
103 param_hint="output_dir",
104 )
105 resolved.mkdir(parents=True, exist_ok=True)
106 return resolved
108 # Interactive: prompt with default ./workspace
109 default_ws = "workspace"
110 raw: str = click.prompt("Workspace folder", default=default_ws)
111 chosen = raw.encode("utf-8", errors="ignore").decode("utf-8").strip()
112 if not chosen:
113 chosen = default_ws
115 ws_path = (Path.cwd() / chosen).resolve()
116 if ws_path.exists() and not ws_path.is_dir():
117 raise click.BadParameter(
118 f"'{ws_path}' exists but is not a directory.",
119 param_hint="workspace",
120 )
121 if not ws_path.exists():
122 ws_path.mkdir(parents=True, exist_ok=True)
123 _get_console().print(f"[green]Created workspace: {ws_path}[/green]")
124 else:
125 _get_console().print(f"[cyan]Using workspace: {ws_path}[/cyan]")
127 return ws_path
130def _select_setup_mode() -> str | None:
131 """Present an interactive mode selector using questionary.
133 Returns:
134 One of the keys in :data:`SETUP_MODES`, or ``None`` if the user cancels.
135 """
136 import questionary
137 from rich.panel import Panel
139 _get_console().print(
140 Panel(
141 "[bold]qdrant-loader Setup[/bold]\n" "Choose a setup mode to get started.",
142 style="blue",
143 )
144 )
146 _CANCEL = "__cancel__"
147 choices = [
148 questionary.Choice(title=f"{key.capitalize():<10} - {desc}", value=key)
149 for key, desc in SETUP_MODES.items()
150 ]
151 choices.append(questionary.Choice(title="Cancel", value=_CANCEL))
153 try:
154 result = questionary.select(
155 "Select setup mode:",
156 choices=choices,
157 default="default",
158 ).ask()
159 except (EOFError, KeyboardInterrupt):
160 result = None
162 if result is None or result == _CANCEL:
163 return None
164 return result
167def run_setup_default(output_dir: Path) -> None:
168 """Generate a minimal default config with a localfile source pointing to the current directory.
170 No interactive prompts – just writes ``config.yaml`` and ``.env`` with sensible
171 defaults so the user can immediately run ``qdrant-loader init && qdrant-loader ingest``.
173 Args:
174 output_dir: Directory in which the generated files are placed.
175 """
176 from rich.panel import Panel
178 output_dir = Path(output_dir).resolve()
180 config_path = output_dir / "config.yaml"
181 env_path = output_dir / ".env"
183 # Show preview of what will be created/overwritten
184 _show_file_preview(output_dir, config_path, env_path)
186 if not _confirm_overwrite(config_path, env_path):
187 return
189 output_dir.mkdir(parents=True, exist_ok=True)
191 # Use workspace/docs as the localfile source directory
192 docs_dir = output_dir / "docs"
193 docs_dir.mkdir(parents=True, exist_ok=True)
194 docs_path = docs_dir.as_uri()
196 sources: dict[str, dict[str, Any]] = {
197 "localfile": {
198 "my-docs": {
199 "base_url": docs_path,
200 "file_types": ["*.md", "*.txt", "*.py"],
201 "enable_file_conversion": True,
202 }
203 }
204 }
206 _write_env_file(
207 env_path,
208 openai_key="your_openai_api_key_here",
209 qdrant_url="http://localhost:6333",
210 qdrant_api_key="",
211 collection_name="documents",
212 )
213 _write_config_file_multi(config_path, sources=sources)
215 _get_console().print(
216 Panel(
217 f"[green]Created:[/green]\n"
218 f" - {config_path}\n"
219 f" - {env_path}\n"
220 f" - {docs_dir}/ (place your documents here)\n\n"
221 f"[bold]Next steps:[/bold]\n"
222 f" 1. Set your OPENAI_API_KEY in {env_path}\n"
223 f" 2. Place your documents in {docs_dir}/\n"
224 f" 3. Run: qdrant-loader init --workspace {output_dir}\n"
225 f" 4. Run: qdrant-loader ingest --workspace {output_dir}",
226 title="Default Setup Complete",
227 style="green",
228 )
229 )
232def run_setup_wizard(output_dir: Path) -> None:
233 """Run the interactive setup wizard (Normal mode).
235 Prompts the user for core settings and source-specific details, then writes
236 a ``config.yaml`` and ``.env`` file to *output_dir*.
238 Args:
239 output_dir: Directory in which the generated files are placed.
240 """
241 from rich.panel import Panel
243 output_dir = Path(output_dir).resolve()
245 _get_console().print(
246 Panel(
247 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Normal mode)[/dim]\n"
248 "Generate config.yaml and .env for your project.",
249 style="blue",
250 )
251 )
253 # ------------------------------------------------------------------
254 # Step 1: Core settings
255 # ------------------------------------------------------------------
256 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]")
258 openai_key: str = click.prompt("OpenAI API Key", hide_input=True)
259 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333")
260 qdrant_api_key: str = click.prompt(
261 "Qdrant API Key (leave empty for local)", default="", hide_input=True
262 )
263 collection_name: str = click.prompt("Collection name", default="documents")
265 # ------------------------------------------------------------------
266 # Step 2+3: Source type selection and config (loop for multiple)
267 # ------------------------------------------------------------------
268 all_sources: dict[str, dict[str, Any]] = {}
269 all_extra_env: dict[str, str] = {}
271 _collect_sources_loop(all_sources, all_extra_env, workspace_dir=output_dir)
273 # ------------------------------------------------------------------
274 # Step 4: Confirm output paths and write files
275 # ------------------------------------------------------------------
276 config_path = output_dir / "config.yaml"
277 env_path = output_dir / ".env"
279 _show_file_preview(output_dir, config_path, env_path)
281 if not _confirm_overwrite(config_path, env_path):
282 return
284 output_dir.mkdir(parents=True, exist_ok=True)
286 _write_env_file(
287 env_path,
288 openai_key=openai_key,
289 qdrant_url=qdrant_url,
290 qdrant_api_key=qdrant_api_key,
291 collection_name=collection_name,
292 extra_vars=all_extra_env,
293 )
294 _write_config_file_multi(
295 config_path,
296 sources=all_sources,
297 )
299 # Build source summary
300 source_summary = ", ".join(
301 f"{st}({len(names)})" for st, names in all_sources.items()
302 )
304 _get_console().print(
305 Panel(
306 f"[green]Created:[/green]\n"
307 f" - {config_path}\n"
308 f" - {env_path}\n"
309 f" - Sources: {source_summary}\n\n"
310 f"[bold]Next steps:[/bold]\n"
311 f" 1. Review the generated files\n"
312 f" 2. Run: qdrant-loader init --workspace {output_dir}\n"
313 f" 3. Run: qdrant-loader ingest --workspace {output_dir}",
314 title="Setup Complete",
315 style="green",
316 )
317 )
320def run_setup_advanced(output_dir: Path) -> None:
321 """Run the advanced setup wizard with full global settings and multi-project format.
323 Args:
324 output_dir: Directory in which the generated files are placed.
325 """
326 from rich.panel import Panel
328 output_dir = Path(output_dir).resolve()
330 _get_console().print(
331 Panel(
332 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Advanced mode)[/dim]\n"
333 "Full control over global settings and multi-project configuration.",
334 style="blue",
335 )
336 )
338 # ------------------------------------------------------------------
339 # Step 1: Core settings
340 # ------------------------------------------------------------------
341 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]")
343 openai_key: str = click.prompt("OpenAI API Key", hide_input=True)
344 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333")
345 qdrant_api_key: str = click.prompt(
346 "Qdrant API Key (leave empty for local)", default="", hide_input=True
347 )
348 collection_name: str = click.prompt("Collection name", default="documents")
350 # ------------------------------------------------------------------
351 # Step 2: Embedding settings
352 # ------------------------------------------------------------------
353 _get_console().print("\n[bold cyan]Step 2: Embedding Configuration[/bold cyan]")
355 embedding_model: str = click.prompt(
356 "Embedding model", default="argus-ai/pplx-embed-v1-0.6b:fp32"
357 )
358 embedding_endpoint: str = click.prompt(
359 "Embedding endpoint (Ollama local default)",
360 default="http://localhost:11434/v1",
361 )
362 vector_size: int = click.prompt("Vector size", default=1024, type=int)
363 if vector_size <= 0:
364 raise click.BadParameter("Vector size must be a positive integer.")
366 # ------------------------------------------------------------------
367 # Step 3: Chunking settings
368 # ------------------------------------------------------------------
369 _get_console().print("\n[bold cyan]Step 3: Chunking Configuration[/bold cyan]")
371 chunk_size: int = click.prompt("Chunk size (characters)", default=1500, type=int)
372 chunk_overlap: int = click.prompt(
373 "Chunk overlap (characters)", default=200, type=int
374 )
376 # ------------------------------------------------------------------
377 # Step 4: Reranking settings
378 # ------------------------------------------------------------------
379 _get_console().print("\n[bold cyan]Step 4: Reranking Configuration[/bold cyan]")
381 enable_reranking: bool = click.confirm(
382 "Enable cross-encoder reranking?", default=True
383 )
385 # ------------------------------------------------------------------
386 # Step 5: Projects with sources
387 # ------------------------------------------------------------------
388 projects: dict[str, dict[str, Any]] = {}
389 all_extra_env: dict[str, str] = {}
391 while True:
392 _get_console().print("\n[bold cyan]Step 5: Project Configuration[/bold cyan]")
394 while True:
395 project_id: str = click.prompt("Project ID", default="my-project")
396 if project_id in projects:
397 _get_console().print(
398 f"[red]Project '{project_id}' already exists. "
399 f"Pick a different ID.[/red]"
400 )
401 continue
402 break
403 display_name: str = click.prompt("Display name", default=project_id)
404 description: str = click.prompt("Description", default="")
406 project_sources: dict[str, dict[str, Any]] = {}
407 _collect_sources_loop(project_sources, all_extra_env, workspace_dir=output_dir)
409 projects[project_id] = {
410 "project_id": project_id,
411 "display_name": display_name,
412 "description": description,
413 "sources": project_sources,
414 }
416 _get_console().print(f"[green]Added project: {project_id}[/green]")
418 if not click.confirm("Add another project?", default=False):
419 break
421 # ------------------------------------------------------------------
422 # Step 6: Write files
423 # ------------------------------------------------------------------
424 config_path = output_dir / "config.yaml"
425 env_path = output_dir / ".env"
427 _show_file_preview(output_dir, config_path, env_path)
429 if not _confirm_overwrite(config_path, env_path):
430 return
432 output_dir.mkdir(parents=True, exist_ok=True)
434 _write_env_file(
435 env_path,
436 openai_key=openai_key,
437 qdrant_url=qdrant_url,
438 qdrant_api_key=qdrant_api_key,
439 collection_name=collection_name,
440 extra_vars=all_extra_env,
441 )
443 # Build global config
444 global_config: dict[str, Any] = {
445 "qdrant": {
446 "url": qdrant_url,
447 "api_key": "${QDRANT_API_KEY}" if qdrant_api_key else None,
448 "collection_name": collection_name,
449 },
450 "embedding": {
451 "model": embedding_model,
452 "api_key": "${OPENAI_API_KEY}",
453 "vector_size": vector_size,
454 },
455 "chunking": {
456 "chunk_size": chunk_size,
457 "chunk_overlap": chunk_overlap,
458 },
459 }
461 if embedding_endpoint:
462 global_config["embedding"]["endpoint"] = embedding_endpoint
464 global_config["reranking"] = {
465 "enabled": enable_reranking,
466 }
468 _write_config_file_advanced(
469 config_path,
470 global_config=global_config,
471 projects=projects,
472 )
474 project_summary = ", ".join(
475 f"{pid}({sum(len(srcs) for srcs in p['sources'].values())} sources)"
476 for pid, p in projects.items()
477 )
479 _get_console().print(
480 Panel(
481 f"[green]Created:[/green]\n"
482 f" - {config_path}\n"
483 f" - {env_path}\n"
484 f" - Projects: {project_summary}\n\n"
485 f"[bold]Next steps:[/bold]\n"
486 f" 1. Review the generated files\n"
487 f" 2. Run: qdrant-loader init --workspace {output_dir}\n"
488 f" 3. Run: qdrant-loader ingest --workspace {output_dir}",
489 title="Advanced Setup Complete",
490 style="green",
491 )
492 )
495# ---------------------------------------------------------------------------
496# Shared helpers
497# ---------------------------------------------------------------------------
500def _show_file_preview(output_dir: Path, *paths: Path) -> None:
501 """Display a summary panel showing the workspace and files that will be written.
503 Args:
504 output_dir: The workspace directory.
505 paths: File paths that will be created or overwritten.
506 """
507 from rich.panel import Panel
509 lines = [f"[bold]Workspace:[/bold] {output_dir}"]
510 for path in paths:
511 status = (
512 "[yellow](overwrite)[/yellow]" if path.exists() else "[green](new)[/green]"
513 )
514 lines.append(f" {path.name} {status}")
516 _get_console().print(
517 Panel(
518 "\n".join(lines),
519 title="Files to write",
520 style="cyan",
521 )
522 )
525def _confirm_overwrite(*paths: Path) -> bool:
526 """Ask the user to confirm before writing files.
528 Always prompts for confirmation. Warns specifically about existing files
529 that will be overwritten.
531 Returns:
532 ``True`` if it is safe to proceed, ``False`` if the user cancelled.
533 """
534 existing = [p for p in paths if p.exists()]
535 if existing:
536 names = ", ".join(p.name for p in existing)
537 if not click.confirm(
538 f"{names} already exist(s) and will be overwritten. Proceed?"
539 ):
540 _get_console().print("[yellow]Setup cancelled.[/yellow]")
541 return False
542 else:
543 if not click.confirm("Write files?", default=True):
544 _get_console().print("[yellow]Setup cancelled.[/yellow]")
545 return False
546 return True
549def _select_source_type() -> str | None:
550 """Present an interactive source type selector with arrow-key navigation.
552 Returns:
553 One of the keys in :data:`SOURCE_TYPES`, or ``None`` if the user
554 selects Back / presses Escape.
555 """
556 import questionary
558 choices = [
559 questionary.Choice(title=f"{key:<12} - {desc}", value=key)
560 for key, desc in SOURCE_TYPES.items()
561 ]
563 try:
564 result = questionary.select(
565 "Select source type:",
566 choices=choices,
567 default="localfile",
568 ).ask()
569 except (EOFError, KeyboardInterrupt):
570 result = None
572 return result
575def _collect_sources_loop(
576 all_sources: dict[str, dict[str, Any]],
577 all_extra_env: dict[str, str],
578 workspace_dir: Path | None = None,
579) -> None:
580 """Interactively collect one or more data sources from the user.
582 Results are merged into *all_sources* and *all_extra_env* in-place.
584 Args:
585 all_sources: Accumulated source configs (mutated in-place).
586 all_extra_env: Accumulated extra env vars (mutated in-place).
587 workspace_dir: Workspace directory, used to derive defaults (e.g. docs path).
588 """
589 while True:
590 _get_console().print("\n[bold cyan]Data Source[/bold cyan]")
592 source_type = _select_source_type()
593 if source_type is None:
594 break
596 _get_console().print(
597 f"\n[bold cyan]Configure {SOURCE_TYPES[source_type]}[/bold cyan]"
598 )
599 existing_names = all_sources.get(source_type, {})
600 # Collect suffixes from ALL already-registered env vars (across all
601 # source types and projects) to prevent silent overwrites.
602 existing_env_keys = set(all_extra_env.keys())
603 while True:
604 source_name = click.prompt(
605 "Source name (identifier)", default=f"my-{source_type}"
606 )
607 suffix = _source_name_to_env_suffix(source_name)
608 if source_name in existing_names:
609 _get_console().print(
610 f"[red]{source_type}/{source_name} already exists. "
611 f"Pick a different name.[/red]"
612 )
613 continue
614 # Check if any env key with this suffix already exists
615 if any(k.endswith(f"_{suffix}") for k in existing_env_keys):
616 _get_console().print(
617 f"[red]'{source_name}' collides with an existing "
618 f"env var suffix across projects. "
619 f"Pick a different name.[/red]"
620 )
621 continue
622 break
623 source_config, extra_env = _collect_source_config(
624 source_type, source_name, workspace_dir=workspace_dir
625 )
627 if source_type not in all_sources:
628 all_sources[source_type] = {}
629 all_sources[source_type][source_name] = source_config
630 all_extra_env.update(extra_env)
632 _get_console().print(f"[green]Added {source_type}/{source_name}[/green]")
634 if not click.confirm("Add another source?", default=False):
635 break
638# ---------------------------------------------------------------------------
639# Source-specific config collectors
640# ---------------------------------------------------------------------------
643def _source_name_to_env_suffix(source_name: str) -> str:
644 """Convert a source name like 'my-repo' to an env-var-safe suffix like 'MY_REPO'."""
645 import re
647 suffix = re.sub(r"[^A-Za-z0-9]", "_", source_name).strip("_").upper()
648 return suffix if suffix else "DEFAULT"
651def _collect_source_config(
652 source_type: str,
653 source_name: str,
654 *,
655 workspace_dir: Path | None = None,
656) -> _SourceResult:
657 """Dispatch to the correct collector based on *source_type*.
659 Args:
660 source_type: One of the keys in ``SOURCE_TYPES``.
661 source_name: User-chosen identifier, used to create unique env var names.
662 workspace_dir: Workspace directory, passed to collectors that need it.
664 Returns:
665 A tuple of (source yaml dict, extra env-var dict).
666 """
667 if source_type == "localfile":
668 return _collect_localfile_config(source_name, workspace_dir=workspace_dir)
670 collectors = {
671 "git": _collect_git_config,
672 "confluence": _collect_confluence_config,
673 "jira": _collect_jira_config,
674 "publicdocs": _collect_publicdocs_config,
675 }
676 collector = collectors.get(source_type)
677 if collector is None:
678 return {}, {}
679 return collector(source_name)
682def _collect_git_config(source_name: str) -> _SourceResult:
683 """Collect Git repository source configuration.
685 Returns:
686 Tuple of (source config dict, extra env vars).
687 """
688 url: str = click.prompt("Repository URL (e.g., https://github.com/org/repo.git)")
689 branch: str = click.prompt("Branch", default="main")
690 token: str = click.prompt(
691 "Access token (leave empty for public repos)", default="", hide_input=True
692 )
693 file_types_raw: str = click.prompt(
694 "File types (comma-separated)", default="*.md,*.txt,*.py"
695 )
696 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()]
698 config: dict[str, Any] = {
699 "base_url": url,
700 "branch": branch,
701 "file_types": file_types,
702 "enable_file_conversion": True,
703 }
704 extra_env: dict[str, str] = {}
706 if token:
707 suffix = _source_name_to_env_suffix(source_name)
708 env_key = f"GIT_TOKEN_{suffix}"
709 config["token"] = f"${{{env_key}}}"
710 extra_env[env_key] = token
712 return config, extra_env
715def _collect_confluence_config(source_name: str) -> _SourceResult:
716 """Collect Confluence source configuration.
718 Returns:
719 Tuple of (source config dict, extra env vars).
720 """
721 base_url: str = click.prompt(
722 "Confluence URL (e.g., https://mycompany.atlassian.net/wiki)"
723 )
724 space_key: str = click.prompt("Space key")
725 email: str = click.prompt("Email")
726 token: str = click.prompt("API token", hide_input=True)
728 suffix = _source_name_to_env_suffix(source_name)
729 token_key = f"CONFLUENCE_TOKEN_{suffix}"
730 email_key = f"CONFLUENCE_EMAIL_{suffix}"
732 config: dict[str, Any] = {
733 "base_url": base_url,
734 "space_key": space_key,
735 "token": f"${{{token_key}}}",
736 "email": f"${{{email_key}}}",
737 "enable_file_conversion": True,
738 }
739 extra_env: dict[str, str] = {
740 token_key: token,
741 email_key: email,
742 }
743 return config, extra_env
746def _collect_jira_config(source_name: str) -> _SourceResult:
747 """Collect Jira source configuration.
749 Returns:
750 Tuple of (source config dict, extra env vars).
751 """
752 base_url: str = click.prompt("Jira URL (e.g., https://mycompany.atlassian.net)")
753 project_key: str = click.prompt("Project key")
754 email: str = click.prompt("Email")
755 token: str = click.prompt("API token", hide_input=True)
757 suffix = _source_name_to_env_suffix(source_name)
758 token_key = f"JIRA_TOKEN_{suffix}"
759 email_key = f"JIRA_EMAIL_{suffix}"
761 config: dict[str, Any] = {
762 "base_url": base_url,
763 "project_key": project_key,
764 "token": f"${{{token_key}}}",
765 "email": f"${{{email_key}}}",
766 "enable_file_conversion": True,
767 }
768 extra_env: dict[str, str] = {
769 token_key: token,
770 email_key: email,
771 }
772 return config, extra_env
775def _collect_publicdocs_config(source_name: str) -> _SourceResult:
776 """Collect Public Documentation source configuration.
778 Returns:
779 Tuple of (source config dict, extra env vars).
780 """
781 base_url: str = click.prompt("Documentation URL (e.g., https://docs.example.com/)")
782 version: str = click.prompt("Version", default="latest")
783 content_type: str = click.prompt(
784 "Content type",
785 default="html",
786 type=click.Choice(["html", "markdown"]),
787 )
789 config: dict[str, Any] = {
790 "base_url": base_url,
791 "version": version,
792 "content_type": content_type,
793 }
794 return config, {}
797def _collect_localfile_config(
798 source_name: str, *, workspace_dir: Path | None = None
799) -> _SourceResult:
800 """Collect Local Files source configuration.
802 Args:
803 source_name: User-chosen identifier for this source.
804 workspace_dir: Workspace directory. When provided, defaults to ``<workspace>/docs``.
806 Returns:
807 Tuple of (source config dict, extra env vars).
808 """
809 default_path = ""
810 if workspace_dir is not None:
811 default_path = str(workspace_dir / "docs")
813 raw_path: str = click.prompt(
814 "Directory path (e.g., /path/to/files or file:///path)",
815 default=default_path or None,
816 )
817 if raw_path.startswith("file://"):
818 path = raw_path
819 else:
820 path = Path(raw_path).expanduser().resolve().as_uri()
822 file_types_raw: str = click.prompt(
823 "File types (comma-separated)", default="*.md,*.txt,*.py"
824 )
825 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()]
827 config: dict[str, Any] = {
828 "base_url": path,
829 "file_types": file_types,
830 "enable_file_conversion": True,
831 }
832 return config, {}
835# ---------------------------------------------------------------------------
836# File writers
837# ---------------------------------------------------------------------------
840def _escape_env_value(value: str) -> str:
841 """Escape a value for .env file if it contains special characters."""
842 if any(c in value for c in ("=", "\n", '"', "'", " ", "\t", "#", "$", "`")):
843 return '"' + value.replace("\\", "\\\\").replace('"', '\\"') + '"'
844 return value
847def _write_env_file(
848 path: Path,
849 *,
850 openai_key: str,
851 qdrant_url: str,
852 qdrant_api_key: str,
853 collection_name: str,
854 extra_vars: dict[str, str] | None = None,
855) -> None:
856 """Write the ``.env`` file.
858 Only non-default values are emitted so the file stays minimal.
860 Args:
861 path: Destination path for the ``.env`` file.
862 openai_key: OpenAI API key (always written).
863 qdrant_url: Qdrant URL (written only when not the default localhost).
864 qdrant_api_key: Qdrant API key (written only when non-empty).
865 collection_name: Collection name (written only when not "documents").
866 extra_vars: Additional environment variables from source-specific config.
867 """
868 lines: list[str] = [f"OPENAI_API_KEY={_escape_env_value(openai_key)}"]
870 if qdrant_url and qdrant_url != "http://localhost:6333":
871 lines.append(f"QDRANT_URL={_escape_env_value(qdrant_url)}")
873 if qdrant_api_key:
874 lines.append(f"QDRANT_API_KEY={_escape_env_value(qdrant_api_key)}")
876 if collection_name and collection_name != "documents":
877 lines.append(f"QDRANT_COLLECTION_NAME={_escape_env_value(collection_name)}")
879 if extra_vars:
880 lines.append("")
881 for key, value in extra_vars.items():
882 lines.append(f"{key}={_escape_env_value(value)}")
884 lines.append("") # trailing newline
885 content = "\n".join(lines)
887 # Write with restrictive permissions from the start
888 import os
890 try:
891 fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
892 try:
893 os.write(fd, content.encode("utf-8"))
894 os.fsync(fd)
895 finally:
896 os.close(fd)
897 except OSError:
898 # Fallback for platforms that don't support os.open mode (e.g., Windows)
899 path.write_text(content, encoding="utf-8")
900 try:
901 path.chmod(0o600)
902 except OSError:
903 pass
906def _write_config_file_multi(
907 path: Path,
908 *,
909 sources: dict[str, dict[str, Any]],
910) -> None:
911 """Write the ``config.yaml`` file using the simplified format.
913 Args:
914 path: Destination path for ``config.yaml``.
915 sources: Dict of source_type -> {source_name: source_config}.
916 """
917 import yaml
919 config: dict[str, Any] = {"sources": sources}
921 with path.open("w", encoding="utf-8") as fh:
922 fh.write("# Generated by qdrant-loader setup\n")
923 fh.write("# Simplified configuration format\n")
924 fh.write("# See config.template.yaml for the full multi-project format.\n\n")
925 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)
928def _write_config_file_advanced(
929 path: Path,
930 *,
931 global_config: dict[str, Any],
932 projects: dict[str, dict[str, Any]],
933) -> None:
934 """Write ``config.yaml`` using the advanced multi-project format.
936 Args:
937 path: Destination path for ``config.yaml``.
938 global_config: Global configuration dict (qdrant, embedding, chunking).
939 projects: Dict of project_id -> project config dict.
940 """
941 import yaml
943 config: dict[str, Any] = {
944 "global": global_config,
945 "projects": projects,
946 }
948 with path.open("w", encoding="utf-8") as fh:
949 fh.write("# Generated by qdrant-loader setup (advanced mode)\n")
950 fh.write("# Multi-project configuration format\n")
951 fh.write("# See config.template.yaml for all available options.\n\n")
952 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)