Coverage for src / qdrant_loader / cli / commands / setup_cmd.py: 88%
315 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:48 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:48 +0000
1"""Interactive setup wizard for qdrant-loader configuration."""
3from __future__ import annotations
5from pathlib import Path
6from typing import Any
8import click
10# Heavy modules (questionary ~1.4s, rich ~0.4s) are lazy-imported via helpers
11# to keep CLI startup fast.
12_console = None
15def _get_console():
16 global _console
17 if _console is None:
18 from rich.console import Console
20 _console = Console()
21 return _console
24SOURCE_TYPES: dict[str, str] = {
25 "git": "Git Repository",
26 "confluence": "Confluence Wiki",
27 "jira": "Jira Issues",
28 "publicdocs": "Public Documentation (website)",
29 "localfile": "Local Files",
30}
32# Returned by every _collect_*_config helper: (yaml-ready dict, extra env vars dict)
33_SourceResult = tuple[dict[str, Any], dict[str, str]]
36SETUP_MODES: dict[str, str] = {
37 "default": "Quick start with localfile source pointing to current directory",
38 "normal": "Interactive wizard with simplified config format",
39 "advanced": "Full control over global settings, multi-project format",
40}
43def run_setup(output_dir: Path | None = None, mode: str | None = None) -> None:
44 """Entry point for the setup command.
46 When *output_dir* is ``None`` (or ``"."``) the user is prompted to choose a
47 workspace folder. When *mode* is ``None`` a TUI mode selector is shown.
49 Args:
50 output_dir: Directory in which the generated files are placed.
51 If ``None``, the user is prompted interactively.
52 mode: One of ``"default"``, ``"normal"``, ``"advanced"`` or ``None``.
53 """
54 # ------------------------------------------------------------------
55 # Step 0: Mode selection (before workspace, so default can skip prompt)
56 # ------------------------------------------------------------------
57 if mode is None:
58 mode = _select_setup_mode()
59 if mode is None:
60 _get_console().print("[yellow]Setup cancelled.[/yellow]")
61 return
63 # ------------------------------------------------------------------
64 # Step 1: Workspace folder
65 # ------------------------------------------------------------------
66 if mode == "default":
67 # Default mode always uses ./workspace, no prompt
68 output_dir = _resolve_workspace(
69 output_dir if output_dir is not None else Path("workspace")
70 )
71 else:
72 output_dir = _resolve_workspace(output_dir)
74 dispatch = {
75 "default": run_setup_default,
76 "normal": run_setup_wizard,
77 "advanced": run_setup_advanced,
78 }
79 try:
80 dispatch[mode](output_dir)
81 except (click.Abort, KeyboardInterrupt):
82 _get_console().print("\n[yellow]Setup cancelled.[/yellow]")
85def _resolve_workspace(output_dir: Path | None) -> Path:
86 """Resolve and prepare the workspace directory.
88 When *output_dir* is explicitly provided, uses it directly.
89 Otherwise prompts the user with ``./workspace`` as the default.
91 Args:
92 output_dir: The value passed via ``--output-dir``, or ``None``.
94 Returns:
95 Resolved :class:`Path` to the workspace directory (created if needed).
96 """
97 # If explicitly provided via --output-dir, use it as-is.
98 if output_dir is not None:
99 resolved = Path(output_dir).resolve()
100 if resolved.exists() and not resolved.is_dir():
101 raise click.BadParameter(
102 f"'{resolved}' exists but is not a directory.",
103 param_hint="output_dir",
104 )
105 resolved.mkdir(parents=True, exist_ok=True)
106 return resolved
108 # Interactive: prompt with default ./workspace
109 default_ws = "workspace"
110 raw: str = click.prompt("Workspace folder", default=default_ws)
111 chosen = raw.encode("utf-8", errors="ignore").decode("utf-8").strip()
112 if not chosen:
113 chosen = default_ws
115 ws_path = (Path.cwd() / chosen).resolve()
116 if ws_path.exists() and not ws_path.is_dir():
117 raise click.BadParameter(
118 f"'{ws_path}' exists but is not a directory.",
119 param_hint="workspace",
120 )
121 if not ws_path.exists():
122 ws_path.mkdir(parents=True, exist_ok=True)
123 _get_console().print(f"[green]Created workspace: {ws_path}[/green]")
124 else:
125 _get_console().print(f"[cyan]Using workspace: {ws_path}[/cyan]")
127 return ws_path
130def _select_setup_mode() -> str | None:
131 """Present an interactive mode selector using questionary.
133 Returns:
134 One of the keys in :data:`SETUP_MODES`, or ``None`` if the user cancels.
135 """
136 import questionary
137 from rich.panel import Panel
139 _get_console().print(
140 Panel(
141 "[bold]qdrant-loader Setup[/bold]\n" "Choose a setup mode to get started.",
142 style="blue",
143 )
144 )
146 _CANCEL = "__cancel__"
147 choices = [
148 questionary.Choice(title=f"{key.capitalize():<10} - {desc}", value=key)
149 for key, desc in SETUP_MODES.items()
150 ]
151 choices.append(questionary.Choice(title="Cancel", value=_CANCEL))
153 try:
154 result = questionary.select(
155 "Select setup mode:",
156 choices=choices,
157 default="default",
158 ).ask()
159 except (EOFError, KeyboardInterrupt):
160 result = None
162 if result is None or result == _CANCEL:
163 return None
164 return result
167def run_setup_default(output_dir: Path) -> None:
168 """Generate a minimal default config with a localfile source pointing to the current directory.
170 No interactive prompts – just writes ``config.yaml`` and ``.env`` with sensible
171 defaults so the user can immediately run ``qdrant-loader init && qdrant-loader ingest``.
173 Args:
174 output_dir: Directory in which the generated files are placed.
175 """
176 from rich.panel import Panel
178 output_dir = Path(output_dir).resolve()
180 config_path = output_dir / "config.yaml"
181 env_path = output_dir / ".env"
183 # Show preview of what will be created/overwritten
184 _show_file_preview(output_dir, config_path, env_path)
186 if not _confirm_overwrite(config_path, env_path):
187 return
189 output_dir.mkdir(parents=True, exist_ok=True)
191 # Use workspace/docs as the localfile source directory
192 docs_dir = output_dir / "docs"
193 docs_dir.mkdir(parents=True, exist_ok=True)
194 docs_path = docs_dir.as_uri()
196 sources: dict[str, dict[str, Any]] = {
197 "localfile": {
198 "my-docs": {
199 "base_url": docs_path,
200 "file_types": ["*.md", "*.txt", "*.py"],
201 }
202 }
203 }
205 _write_env_file(
206 env_path,
207 openai_key="your_openai_api_key_here",
208 qdrant_url="http://localhost:6333",
209 qdrant_api_key="",
210 collection_name="documents",
211 )
212 _write_config_file_multi(config_path, sources=sources)
214 _get_console().print(
215 Panel(
216 f"[green]Created:[/green]\n"
217 f" - {config_path}\n"
218 f" - {env_path}\n"
219 f" - {docs_dir}/ (place your documents here)\n\n"
220 f"[bold]Next steps:[/bold]\n"
221 f" 1. Set your OPENAI_API_KEY in {env_path}\n"
222 f" 2. Place your documents in {docs_dir}/\n"
223 f" 3. Run: qdrant-loader init --workspace {output_dir}\n"
224 f" 4. Run: qdrant-loader ingest --workspace {output_dir}",
225 title="Default Setup Complete",
226 style="green",
227 )
228 )
231def run_setup_wizard(output_dir: Path) -> None:
232 """Run the interactive setup wizard (Normal mode).
234 Prompts the user for core settings and source-specific details, then writes
235 a ``config.yaml`` and ``.env`` file to *output_dir*.
237 Args:
238 output_dir: Directory in which the generated files are placed.
239 """
240 from rich.panel import Panel
242 output_dir = Path(output_dir).resolve()
244 _get_console().print(
245 Panel(
246 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Normal mode)[/dim]\n"
247 "Generate config.yaml and .env for your project.",
248 style="blue",
249 )
250 )
252 # ------------------------------------------------------------------
253 # Step 1: Core settings
254 # ------------------------------------------------------------------
255 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]")
257 openai_key: str = click.prompt("OpenAI API Key", hide_input=True)
258 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333")
259 qdrant_api_key: str = click.prompt(
260 "Qdrant API Key (leave empty for local)", default="", hide_input=True
261 )
262 collection_name: str = click.prompt("Collection name", default="documents")
264 # ------------------------------------------------------------------
265 # Step 2+3: Source type selection and config (loop for multiple)
266 # ------------------------------------------------------------------
267 all_sources: dict[str, dict[str, Any]] = {}
268 all_extra_env: dict[str, str] = {}
270 _collect_sources_loop(all_sources, all_extra_env, workspace_dir=output_dir)
272 # ------------------------------------------------------------------
273 # Step 4: Confirm output paths and write files
274 # ------------------------------------------------------------------
275 config_path = output_dir / "config.yaml"
276 env_path = output_dir / ".env"
278 _show_file_preview(output_dir, config_path, env_path)
280 if not _confirm_overwrite(config_path, env_path):
281 return
283 output_dir.mkdir(parents=True, exist_ok=True)
285 _write_env_file(
286 env_path,
287 openai_key=openai_key,
288 qdrant_url=qdrant_url,
289 qdrant_api_key=qdrant_api_key,
290 collection_name=collection_name,
291 extra_vars=all_extra_env,
292 )
293 _write_config_file_multi(
294 config_path,
295 sources=all_sources,
296 )
298 # Build source summary
299 source_summary = ", ".join(
300 f"{st}({len(names)})" for st, names in all_sources.items()
301 )
303 _get_console().print(
304 Panel(
305 f"[green]Created:[/green]\n"
306 f" - {config_path}\n"
307 f" - {env_path}\n"
308 f" - Sources: {source_summary}\n\n"
309 f"[bold]Next steps:[/bold]\n"
310 f" 1. Review the generated files\n"
311 f" 2. Run: qdrant-loader init --workspace {output_dir}\n"
312 f" 3. Run: qdrant-loader ingest --workspace {output_dir}",
313 title="Setup Complete",
314 style="green",
315 )
316 )
319def run_setup_advanced(output_dir: Path) -> None:
320 """Run the advanced setup wizard with full global settings and multi-project format.
322 Args:
323 output_dir: Directory in which the generated files are placed.
324 """
325 from rich.panel import Panel
327 output_dir = Path(output_dir).resolve()
329 _get_console().print(
330 Panel(
331 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Advanced mode)[/dim]\n"
332 "Full control over global settings and multi-project configuration.",
333 style="blue",
334 )
335 )
337 # ------------------------------------------------------------------
338 # Step 1: Core settings
339 # ------------------------------------------------------------------
340 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]")
342 openai_key: str = click.prompt("OpenAI API Key", hide_input=True)
343 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333")
344 qdrant_api_key: str = click.prompt(
345 "Qdrant API Key (leave empty for local)", default="", hide_input=True
346 )
347 collection_name: str = click.prompt("Collection name", default="documents")
349 # ------------------------------------------------------------------
350 # Step 2: Embedding settings
351 # ------------------------------------------------------------------
352 _get_console().print("\n[bold cyan]Step 2: Embedding Configuration[/bold cyan]")
354 embedding_model: str = click.prompt(
355 "Embedding model", default="text-embedding-3-small"
356 )
357 embedding_endpoint: str = click.prompt(
358 "Embedding endpoint (leave empty for OpenAI default)",
359 default="",
360 )
361 vector_size: int = click.prompt("Vector size", default=1536, type=int)
363 # ------------------------------------------------------------------
364 # Step 3: Chunking settings
365 # ------------------------------------------------------------------
366 _get_console().print("\n[bold cyan]Step 3: Chunking Configuration[/bold cyan]")
368 chunk_size: int = click.prompt("Chunk size (characters)", default=1500, type=int)
369 chunk_overlap: int = click.prompt(
370 "Chunk overlap (characters)", default=200, type=int
371 )
373 # ------------------------------------------------------------------
374 # Step 4: Reranking settings
375 # ------------------------------------------------------------------
376 _get_console().print("\n[bold cyan]Step 4: Reranking Configuration[/bold cyan]")
378 enable_reranking: bool = click.confirm(
379 "Enable cross-encoder reranking?", default=True
380 )
382 # ------------------------------------------------------------------
383 # Step 5: Projects with sources
384 # ------------------------------------------------------------------
385 projects: dict[str, dict[str, Any]] = {}
386 all_extra_env: dict[str, str] = {}
388 while True:
389 _get_console().print("\n[bold cyan]Step 5: Project Configuration[/bold cyan]")
391 while True:
392 project_id: str = click.prompt("Project ID", default="my-project")
393 if project_id in projects:
394 _get_console().print(
395 f"[red]Project '{project_id}' already exists. "
396 f"Pick a different ID.[/red]"
397 )
398 continue
399 break
400 display_name: str = click.prompt("Display name", default=project_id)
401 description: str = click.prompt("Description", default="")
403 project_sources: dict[str, dict[str, Any]] = {}
404 _collect_sources_loop(project_sources, all_extra_env, workspace_dir=output_dir)
406 projects[project_id] = {
407 "project_id": project_id,
408 "display_name": display_name,
409 "description": description,
410 "sources": project_sources,
411 }
413 _get_console().print(f"[green]Added project: {project_id}[/green]")
415 if not click.confirm("Add another project?", default=False):
416 break
418 # ------------------------------------------------------------------
419 # Step 6: Write files
420 # ------------------------------------------------------------------
421 config_path = output_dir / "config.yaml"
422 env_path = output_dir / ".env"
424 _show_file_preview(output_dir, config_path, env_path)
426 if not _confirm_overwrite(config_path, env_path):
427 return
429 output_dir.mkdir(parents=True, exist_ok=True)
431 _write_env_file(
432 env_path,
433 openai_key=openai_key,
434 qdrant_url=qdrant_url,
435 qdrant_api_key=qdrant_api_key,
436 collection_name=collection_name,
437 extra_vars=all_extra_env,
438 )
440 # Build global config
441 global_config: dict[str, Any] = {
442 "qdrant": {
443 "url": qdrant_url,
444 "api_key": "${QDRANT_API_KEY}" if qdrant_api_key else None,
445 "collection_name": collection_name,
446 },
447 "embedding": {
448 "model": embedding_model,
449 "api_key": "${OPENAI_API_KEY}",
450 "vector_size": vector_size,
451 },
452 "chunking": {
453 "chunk_size": chunk_size,
454 "chunk_overlap": chunk_overlap,
455 },
456 }
458 if embedding_endpoint:
459 global_config["embedding"]["endpoint"] = embedding_endpoint
461 global_config["reranking"] = {
462 "enabled": enable_reranking,
463 }
465 _write_config_file_advanced(
466 config_path,
467 global_config=global_config,
468 projects=projects,
469 )
471 project_summary = ", ".join(
472 f"{pid}({sum(len(srcs) for srcs in p['sources'].values())} sources)"
473 for pid, p in projects.items()
474 )
476 _get_console().print(
477 Panel(
478 f"[green]Created:[/green]\n"
479 f" - {config_path}\n"
480 f" - {env_path}\n"
481 f" - Projects: {project_summary}\n\n"
482 f"[bold]Next steps:[/bold]\n"
483 f" 1. Review the generated files\n"
484 f" 2. Run: qdrant-loader init --workspace {output_dir}\n"
485 f" 3. Run: qdrant-loader ingest --workspace {output_dir}",
486 title="Advanced Setup Complete",
487 style="green",
488 )
489 )
492# ---------------------------------------------------------------------------
493# Shared helpers
494# ---------------------------------------------------------------------------
497def _show_file_preview(output_dir: Path, *paths: Path) -> None:
498 """Display a summary panel showing the workspace and files that will be written.
500 Args:
501 output_dir: The workspace directory.
502 paths: File paths that will be created or overwritten.
503 """
504 from rich.panel import Panel
506 lines = [f"[bold]Workspace:[/bold] {output_dir}"]
507 for path in paths:
508 status = (
509 "[yellow](overwrite)[/yellow]" if path.exists() else "[green](new)[/green]"
510 )
511 lines.append(f" {path.name} {status}")
513 _get_console().print(
514 Panel(
515 "\n".join(lines),
516 title="Files to write",
517 style="cyan",
518 )
519 )
522def _confirm_overwrite(*paths: Path) -> bool:
523 """Ask the user to confirm before writing files.
525 Always prompts for confirmation. Warns specifically about existing files
526 that will be overwritten.
528 Returns:
529 ``True`` if it is safe to proceed, ``False`` if the user cancelled.
530 """
531 existing = [p for p in paths if p.exists()]
532 if existing:
533 names = ", ".join(p.name for p in existing)
534 if not click.confirm(
535 f"{names} already exist(s) and will be overwritten. Proceed?"
536 ):
537 _get_console().print("[yellow]Setup cancelled.[/yellow]")
538 return False
539 else:
540 if not click.confirm("Write files?", default=True):
541 _get_console().print("[yellow]Setup cancelled.[/yellow]")
542 return False
543 return True
546def _select_source_type() -> str | None:
547 """Present an interactive source type selector with arrow-key navigation.
549 Returns:
550 One of the keys in :data:`SOURCE_TYPES`, or ``None`` if the user
551 selects Back / presses Escape.
552 """
553 import questionary
555 choices = [
556 questionary.Choice(title=f"{key:<12} - {desc}", value=key)
557 for key, desc in SOURCE_TYPES.items()
558 ]
560 try:
561 result = questionary.select(
562 "Select source type:",
563 choices=choices,
564 default="localfile",
565 ).ask()
566 except (EOFError, KeyboardInterrupt):
567 result = None
569 return result
572def _collect_sources_loop(
573 all_sources: dict[str, dict[str, Any]],
574 all_extra_env: dict[str, str],
575 workspace_dir: Path | None = None,
576) -> None:
577 """Interactively collect one or more data sources from the user.
579 Results are merged into *all_sources* and *all_extra_env* in-place.
581 Args:
582 all_sources: Accumulated source configs (mutated in-place).
583 all_extra_env: Accumulated extra env vars (mutated in-place).
584 workspace_dir: Workspace directory, used to derive defaults (e.g. docs path).
585 """
586 while True:
587 _get_console().print("\n[bold cyan]Data Source[/bold cyan]")
589 source_type = _select_source_type()
590 if source_type is None:
591 break
593 _get_console().print(
594 f"\n[bold cyan]Configure {SOURCE_TYPES[source_type]}[/bold cyan]"
595 )
596 existing_names = all_sources.get(source_type, {})
597 # Collect suffixes from ALL already-registered env vars (across all
598 # source types and projects) to prevent silent overwrites.
599 existing_env_keys = set(all_extra_env.keys())
600 while True:
601 source_name = click.prompt(
602 "Source name (identifier)", default=f"my-{source_type}"
603 )
604 suffix = _source_name_to_env_suffix(source_name)
605 if source_name in existing_names:
606 _get_console().print(
607 f"[red]{source_type}/{source_name} already exists. "
608 f"Pick a different name.[/red]"
609 )
610 continue
611 # Check if any env key with this suffix already exists
612 if any(k.endswith(f"_{suffix}") for k in existing_env_keys):
613 _get_console().print(
614 f"[red]'{source_name}' collides with an existing "
615 f"env var suffix across projects. "
616 f"Pick a different name.[/red]"
617 )
618 continue
619 break
620 source_config, extra_env = _collect_source_config(
621 source_type, source_name, workspace_dir=workspace_dir
622 )
624 if source_type not in all_sources:
625 all_sources[source_type] = {}
626 all_sources[source_type][source_name] = source_config
627 all_extra_env.update(extra_env)
629 _get_console().print(f"[green]Added {source_type}/{source_name}[/green]")
631 if not click.confirm("Add another source?", default=False):
632 break
635# ---------------------------------------------------------------------------
636# Source-specific config collectors
637# ---------------------------------------------------------------------------
640def _source_name_to_env_suffix(source_name: str) -> str:
641 """Convert a source name like 'my-repo' to an env-var-safe suffix like 'MY_REPO'."""
642 import re
644 suffix = re.sub(r"[^A-Za-z0-9]", "_", source_name).strip("_").upper()
645 return suffix if suffix else "DEFAULT"
648def _collect_source_config(
649 source_type: str,
650 source_name: str,
651 *,
652 workspace_dir: Path | None = None,
653) -> _SourceResult:
654 """Dispatch to the correct collector based on *source_type*.
656 Args:
657 source_type: One of the keys in ``SOURCE_TYPES``.
658 source_name: User-chosen identifier, used to create unique env var names.
659 workspace_dir: Workspace directory, passed to collectors that need it.
661 Returns:
662 A tuple of (source yaml dict, extra env-var dict).
663 """
664 if source_type == "localfile":
665 return _collect_localfile_config(source_name, workspace_dir=workspace_dir)
667 collectors = {
668 "git": _collect_git_config,
669 "confluence": _collect_confluence_config,
670 "jira": _collect_jira_config,
671 "publicdocs": _collect_publicdocs_config,
672 }
673 collector = collectors.get(source_type)
674 if collector is None:
675 return {}, {}
676 return collector(source_name)
679def _collect_git_config(source_name: str) -> _SourceResult:
680 """Collect Git repository source configuration.
682 Returns:
683 Tuple of (source config dict, extra env vars).
684 """
685 url: str = click.prompt("Repository URL (e.g., https://github.com/org/repo.git)")
686 branch: str = click.prompt("Branch", default="main")
687 token: str = click.prompt(
688 "Access token (leave empty for public repos)", default="", hide_input=True
689 )
690 file_types_raw: str = click.prompt(
691 "File types (comma-separated)", default="*.md,*.txt,*.py"
692 )
693 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()]
695 config: dict[str, Any] = {
696 "base_url": url,
697 "branch": branch,
698 "file_types": file_types,
699 }
700 extra_env: dict[str, str] = {}
702 if token:
703 suffix = _source_name_to_env_suffix(source_name)
704 env_key = f"GIT_TOKEN_{suffix}"
705 config["token"] = f"${{{env_key}}}"
706 extra_env[env_key] = token
708 return config, extra_env
711def _collect_confluence_config(source_name: str) -> _SourceResult:
712 """Collect Confluence source configuration.
714 Returns:
715 Tuple of (source config dict, extra env vars).
716 """
717 base_url: str = click.prompt(
718 "Confluence URL (e.g., https://mycompany.atlassian.net/wiki)"
719 )
720 space_key: str = click.prompt("Space key")
721 email: str = click.prompt("Email")
722 token: str = click.prompt("API token", hide_input=True)
724 suffix = _source_name_to_env_suffix(source_name)
725 token_key = f"CONFLUENCE_TOKEN_{suffix}"
726 email_key = f"CONFLUENCE_EMAIL_{suffix}"
728 config: dict[str, Any] = {
729 "base_url": base_url,
730 "space_key": space_key,
731 "token": f"${{{token_key}}}",
732 "email": f"${{{email_key}}}",
733 }
734 extra_env: dict[str, str] = {
735 token_key: token,
736 email_key: email,
737 }
738 return config, extra_env
741def _collect_jira_config(source_name: str) -> _SourceResult:
742 """Collect Jira source configuration.
744 Returns:
745 Tuple of (source config dict, extra env vars).
746 """
747 base_url: str = click.prompt("Jira URL (e.g., https://mycompany.atlassian.net)")
748 project_key: str = click.prompt("Project key")
749 email: str = click.prompt("Email")
750 token: str = click.prompt("API token", hide_input=True)
752 suffix = _source_name_to_env_suffix(source_name)
753 token_key = f"JIRA_TOKEN_{suffix}"
754 email_key = f"JIRA_EMAIL_{suffix}"
756 config: dict[str, Any] = {
757 "base_url": base_url,
758 "project_key": project_key,
759 "token": f"${{{token_key}}}",
760 "email": f"${{{email_key}}}",
761 }
762 extra_env: dict[str, str] = {
763 token_key: token,
764 email_key: email,
765 }
766 return config, extra_env
769def _collect_publicdocs_config(source_name: str) -> _SourceResult:
770 """Collect Public Documentation source configuration.
772 Returns:
773 Tuple of (source config dict, extra env vars).
774 """
775 base_url: str = click.prompt("Documentation URL (e.g., https://docs.example.com/)")
776 version: str = click.prompt("Version", default="latest")
777 content_type: str = click.prompt(
778 "Content type",
779 default="html",
780 type=click.Choice(["html", "markdown"]),
781 )
783 config: dict[str, Any] = {
784 "base_url": base_url,
785 "version": version,
786 "content_type": content_type,
787 }
788 return config, {}
791def _collect_localfile_config(
792 source_name: str, *, workspace_dir: Path | None = None
793) -> _SourceResult:
794 """Collect Local Files source configuration.
796 Args:
797 source_name: User-chosen identifier for this source.
798 workspace_dir: Workspace directory. When provided, defaults to ``<workspace>/docs``.
800 Returns:
801 Tuple of (source config dict, extra env vars).
802 """
803 default_path = ""
804 if workspace_dir is not None:
805 default_path = str(workspace_dir / "docs")
807 raw_path: str = click.prompt(
808 "Directory path (e.g., /path/to/files or file:///path)",
809 default=default_path or None,
810 )
811 if raw_path.startswith("file://"):
812 path = raw_path
813 else:
814 path = Path(raw_path).expanduser().resolve().as_uri()
816 file_types_raw: str = click.prompt(
817 "File types (comma-separated)", default="*.md,*.txt,*.py"
818 )
819 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()]
821 config: dict[str, Any] = {
822 "base_url": path,
823 "file_types": file_types,
824 }
825 return config, {}
828# ---------------------------------------------------------------------------
829# File writers
830# ---------------------------------------------------------------------------
833def _escape_env_value(value: str) -> str:
834 """Escape a value for .env file if it contains special characters."""
835 if any(c in value for c in ("=", "\n", '"', "'", " ", "\t", "#", "$", "`")):
836 return '"' + value.replace("\\", "\\\\").replace('"', '\\"') + '"'
837 return value
840def _write_env_file(
841 path: Path,
842 *,
843 openai_key: str,
844 qdrant_url: str,
845 qdrant_api_key: str,
846 collection_name: str,
847 extra_vars: dict[str, str] | None = None,
848) -> None:
849 """Write the ``.env`` file.
851 Only non-default values are emitted so the file stays minimal.
853 Args:
854 path: Destination path for the ``.env`` file.
855 openai_key: OpenAI API key (always written).
856 qdrant_url: Qdrant URL (written only when not the default localhost).
857 qdrant_api_key: Qdrant API key (written only when non-empty).
858 collection_name: Collection name (written only when not "documents").
859 extra_vars: Additional environment variables from source-specific config.
860 """
861 lines: list[str] = [f"OPENAI_API_KEY={_escape_env_value(openai_key)}"]
863 if qdrant_url and qdrant_url != "http://localhost:6333":
864 lines.append(f"QDRANT_URL={_escape_env_value(qdrant_url)}")
866 if qdrant_api_key:
867 lines.append(f"QDRANT_API_KEY={_escape_env_value(qdrant_api_key)}")
869 if collection_name and collection_name != "documents":
870 lines.append(f"QDRANT_COLLECTION_NAME={_escape_env_value(collection_name)}")
872 if extra_vars:
873 lines.append("")
874 for key, value in extra_vars.items():
875 lines.append(f"{key}={_escape_env_value(value)}")
877 lines.append("") # trailing newline
878 content = "\n".join(lines)
880 # Write with restrictive permissions from the start
881 import os
883 try:
884 fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
885 try:
886 os.write(fd, content.encode("utf-8"))
887 os.fsync(fd)
888 finally:
889 os.close(fd)
890 except OSError:
891 # Fallback for platforms that don't support os.open mode (e.g., Windows)
892 path.write_text(content, encoding="utf-8")
893 try:
894 path.chmod(0o600)
895 except OSError:
896 pass
899def _write_config_file_multi(
900 path: Path,
901 *,
902 sources: dict[str, dict[str, Any]],
903) -> None:
904 """Write the ``config.yaml`` file using the simplified format.
906 Args:
907 path: Destination path for ``config.yaml``.
908 sources: Dict of source_type -> {source_name: source_config}.
909 """
910 import yaml
912 config: dict[str, Any] = {"sources": sources}
914 with path.open("w", encoding="utf-8") as fh:
915 fh.write("# Generated by qdrant-loader setup\n")
916 fh.write("# Simplified configuration format\n")
917 fh.write("# See config.template.yaml for the full multi-project format.\n\n")
918 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)
921def _write_config_file_advanced(
922 path: Path,
923 *,
924 global_config: dict[str, Any],
925 projects: dict[str, dict[str, Any]],
926) -> None:
927 """Write ``config.yaml`` using the advanced multi-project format.
929 Args:
930 path: Destination path for ``config.yaml``.
931 global_config: Global configuration dict (qdrant, embedding, chunking).
932 projects: Dict of project_id -> project config dict.
933 """
934 import yaml
936 config: dict[str, Any] = {
937 "global": global_config,
938 "projects": projects,
939 }
941 with path.open("w", encoding="utf-8") as fh:
942 fh.write("# Generated by qdrant-loader setup (advanced mode)\n")
943 fh.write("# Multi-project configuration format\n")
944 fh.write("# See config.template.yaml for all available options.\n\n")
945 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)