Coverage for src/qdrant_loader/cli/commands/setup

1"""Interactive setup wizard for qdrant-loader configuration."""

3from __future__ import annotations

5from pathlib import Path

6from typing import Any

8import click

10# Heavy modules (questionary ~1.4s, rich ~0.4s) are lazy-imported via helpers

11# to keep CLI startup fast.

12_console = None

15def _get_console():

16 global _console

17 if _console is None:

18 from rich.console import Console

20 _console = Console()

21 return _console

24SOURCE_TYPES: dict[str, str] = {

25 "git": "Git Repository",

26 "confluence": "Confluence Wiki",

27 "jira": "Jira Issues",

28 "publicdocs": "Public Documentation (website)",

29 "localfile": "Local Files",

30}

32# Returned by every _collect_*_config helper: (yaml-ready dict, extra env vars dict)

33_SourceResult = tuple[dict[str, Any], dict[str, str]]

36SETUP_MODES: dict[str, str] = {

37 "default": "Quick start with localfile source pointing to current directory",

38 "normal": "Interactive wizard with simplified config format",

39 "advanced": "Full control over global settings, multi-project format",

40}

43def run_setup(output_dir: Path | None = None, mode: str | None = None) -> None:

44 """Entry point for the setup command.

46 When *output_dir* is ``None`` (or ``"."``) the user is prompted to choose a

47 workspace folder. When *mode* is ``None`` a TUI mode selector is shown.

49 Args:

50 output_dir: Directory in which the generated files are placed.

51 If ``None``, the user is prompted interactively.

52 mode: One of ``"default"``, ``"normal"``, ``"advanced"`` or ``None``.

53 """

54 # ------------------------------------------------------------------

55 # Step 0: Mode selection (before workspace, so default can skip prompt)

56 # ------------------------------------------------------------------

57 if mode is None:

58 mode = _select_setup_mode()

59 if mode is None:

60 _get_console().print("[yellow]Setup cancelled.[/yellow]")

61 return

63 # ------------------------------------------------------------------

64 # Step 1: Workspace folder

65 # ------------------------------------------------------------------

66 if mode == "default":

67 # Default mode always uses ./workspace, no prompt

68 output_dir = _resolve_workspace(

69 output_dir if output_dir is not None else Path("workspace")

70 )

71 else:

72 output_dir = _resolve_workspace(output_dir)

74 dispatch = {

75 "default": run_setup_default,

76 "normal": run_setup_wizard,

77 "advanced": run_setup_advanced,

78 }

79 try:

80 dispatch[mode](output_dir)

81 except (click.Abort, KeyboardInterrupt):

82 _get_console().print("\n[yellow]Setup cancelled.[/yellow]")

85def _resolve_workspace(output_dir: Path | None) -> Path:

86 """Resolve and prepare the workspace directory.

88 When *output_dir* is explicitly provided, uses it directly.

89 Otherwise prompts the user with ``./workspace`` as the default.

91 Args:

92 output_dir: The value passed via ``--output-dir``, or ``None``.

94 Returns:

95 Resolved :class:`Path` to the workspace directory (created if needed).

96 """

97 # If explicitly provided via --output-dir, use it as-is.

98 if output_dir is not None:

99 resolved = Path(output_dir).resolve()

100 if resolved.exists() and not resolved.is_dir():

101 raise click.BadParameter(

102 f"'{resolved}' exists but is not a directory.",

103 param_hint="output_dir",

104 )

105 resolved.mkdir(parents=True, exist_ok=True)

106 return resolved

107

108 # Interactive: prompt with default ./workspace

109 default_ws = "workspace"

110 raw: str = click.prompt("Workspace folder", default=default_ws)

111 chosen = raw.encode("utf-8", errors="ignore").decode("utf-8").strip()

112 if not chosen:

113 chosen = default_ws

114

115 ws_path = (Path.cwd() / chosen).resolve()

116 if ws_path.exists() and not ws_path.is_dir():

117 raise click.BadParameter(

118 f"'{ws_path}' exists but is not a directory.",

119 param_hint="workspace",

120 )

121 if not ws_path.exists():

122 ws_path.mkdir(parents=True, exist_ok=True)

123 _get_console().print(f"[green]Created workspace: {ws_path}[/green]")

124 else:

125 _get_console().print(f"[cyan]Using workspace: {ws_path}[/cyan]")

126

127 return ws_path

128

129

130def _select_setup_mode() -> str | None:

131 """Present an interactive mode selector using questionary.

132

133 Returns:

134 One of the keys in :data:`SETUP_MODES`, or ``None`` if the user cancels.

135 """

136 import questionary

137 from rich.panel import Panel

138

139 _get_console().print(

140 Panel(

141 "[bold]qdrant-loader Setup[/bold]\n" "Choose a setup mode to get started.",

142 style="blue",

143 )

144 )

145

146 _CANCEL = "__cancel__"

147 choices = [

148 questionary.Choice(title=f"{key.capitalize():<10} - {desc}", value=key)

149 for key, desc in SETUP_MODES.items()

150 ]

151 choices.append(questionary.Choice(title="Cancel", value=_CANCEL))

152

153 try:

154 result = questionary.select(

155 "Select setup mode:",

156 choices=choices,

157 default="default",

158 ).ask()

159 except (EOFError, KeyboardInterrupt):

160 result = None

161

162 if result is None or result == _CANCEL:

163 return None

164 return result

165

166

167def run_setup_default(output_dir: Path) -> None:

168 """Generate a minimal default config with a localfile source pointing to the current directory.

169

170 No interactive prompts – just writes ``config.yaml`` and ``.env`` with sensible

171 defaults so the user can immediately run ``qdrant-loader init && qdrant-loader ingest``.

172

173 Args:

174 output_dir: Directory in which the generated files are placed.

175 """

176 from rich.panel import Panel

177

178 output_dir = Path(output_dir).resolve()

179

180 config_path = output_dir / "config.yaml"

181 env_path = output_dir / ".env"

182

183 # Show preview of what will be created/overwritten

184 _show_file_preview(output_dir, config_path, env_path)

185

186 if not _confirm_overwrite(config_path, env_path):

187 return

188

189 output_dir.mkdir(parents=True, exist_ok=True)

190

191 # Use workspace/docs as the localfile source directory

192 docs_dir = output_dir / "docs"

193 docs_dir.mkdir(parents=True, exist_ok=True)

194 docs_path = docs_dir.as_uri()

195

196 sources: dict[str, dict[str, Any]] = {

197 "localfile": {

198 "my-docs": {

199 "base_url": docs_path,

200 "file_types": ["*.md", "*.txt", "*.py"],

201 "enable_file_conversion": True,

202 }

203 }

204 }

205

206 _write_env_file(

207 env_path,

208 openai_key="your_openai_api_key_here",

209 qdrant_url="http://localhost:6333",

210 qdrant_api_key="",

211 collection_name="documents",

212 )

213 _write_config_file_multi(config_path, sources=sources)

214

215 _get_console().print(

216 Panel(

217 f"[green]Created:[/green]\n"

218 f" - {config_path}\n"

219 f" - {env_path}\n"

220 f" - {docs_dir}/ (place your documents here)\n\n"

221 f"[bold]Next steps:[/bold]\n"

222 f" 1. Set your OPENAI_API_KEY in {env_path}\n"

223 f" 2. Place your documents in {docs_dir}/\n"

224 f" 3. Run: qdrant-loader init --workspace {output_dir}\n"

225 f" 4. Run: qdrant-loader ingest --workspace {output_dir}",

226 title="Default Setup Complete",

227 style="green",

228 )

229 )

230

231

232def run_setup_wizard(output_dir: Path) -> None:

233 """Run the interactive setup wizard (Normal mode).

234

235 Prompts the user for core settings and source-specific details, then writes

236 a ``config.yaml`` and ``.env`` file to *output_dir*.

237

238 Args:

239 output_dir: Directory in which the generated files are placed.

240 """

241 from rich.panel import Panel

242

243 output_dir = Path(output_dir).resolve()

244

245 _get_console().print(

246 Panel(

247 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Normal mode)[/dim]\n"

248 "Generate config.yaml and .env for your project.",

249 style="blue",

250 )

251 )

252

253 # ------------------------------------------------------------------

254 # Step 1: Core settings

255 # ------------------------------------------------------------------

256 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]")

257

258 openai_key: str = click.prompt("OpenAI API Key", hide_input=True)

259 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333")

260 qdrant_api_key: str = click.prompt(

261 "Qdrant API Key (leave empty for local)", default="", hide_input=True

262 )

263 collection_name: str = click.prompt("Collection name", default="documents")

264

265 # ------------------------------------------------------------------

266 # Step 2+3: Source type selection and config (loop for multiple)

267 # ------------------------------------------------------------------

268 all_sources: dict[str, dict[str, Any]] = {}

269 all_extra_env: dict[str, str] = {}

270

271 _collect_sources_loop(all_sources, all_extra_env, workspace_dir=output_dir)

272

273 # ------------------------------------------------------------------

274 # Step 4: Confirm output paths and write files

275 # ------------------------------------------------------------------

276 config_path = output_dir / "config.yaml"

277 env_path = output_dir / ".env"

278

279 _show_file_preview(output_dir, config_path, env_path)

280

281 if not _confirm_overwrite(config_path, env_path):

282 return

283

284 output_dir.mkdir(parents=True, exist_ok=True)

285

286 _write_env_file(

287 env_path,

288 openai_key=openai_key,

289 qdrant_url=qdrant_url,

290 qdrant_api_key=qdrant_api_key,

291 collection_name=collection_name,

292 extra_vars=all_extra_env,

293 )

294 _write_config_file_multi(

295 config_path,

296 sources=all_sources,

297 )

298

299 # Build source summary

300 source_summary = ", ".join(

301 f"{st}({len(names)})" for st, names in all_sources.items()

302 )

303

304 _get_console().print(

305 Panel(

306 f"[green]Created:[/green]\n"

307 f" - {config_path}\n"

308 f" - {env_path}\n"

309 f" - Sources: {source_summary}\n\n"

310 f"[bold]Next steps:[/bold]\n"

311 f" 1. Review the generated files\n"

312 f" 2. Run: qdrant-loader init --workspace {output_dir}\n"

313 f" 3. Run: qdrant-loader ingest --workspace {output_dir}",

314 title="Setup Complete",

315 style="green",

316 )

317 )

318

319

320def run_setup_advanced(output_dir: Path) -> None:

321 """Run the advanced setup wizard with full global settings and multi-project format.

322

323 Args:

324 output_dir: Directory in which the generated files are placed.

325 """

326 from rich.panel import Panel

327

328 output_dir = Path(output_dir).resolve()

329

330 _get_console().print(

331 Panel(

332 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Advanced mode)[/dim]\n"

333 "Full control over global settings and multi-project configuration.",

334 style="blue",

335 )

336 )

337

338 # ------------------------------------------------------------------

339 # Step 1: Core settings

340 # ------------------------------------------------------------------

341 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]")

342

343 openai_key: str = click.prompt("OpenAI API Key", hide_input=True)

344 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333")

345 qdrant_api_key: str = click.prompt(

346 "Qdrant API Key (leave empty for local)", default="", hide_input=True

347 )

348 collection_name: str = click.prompt("Collection name", default="documents")

349

350 # ------------------------------------------------------------------

351 # Step 2: Embedding settings

352 # ------------------------------------------------------------------

353 _get_console().print("\n[bold cyan]Step 2: Embedding Configuration[/bold cyan]")

354

355 embedding_model: str = click.prompt(

356 "Embedding model", default="argus-ai/pplx-embed-v1-0.6b:fp32"

357 )

358 embedding_endpoint: str = click.prompt(

359 "Embedding endpoint (Ollama local default)",

360 default="http://localhost:11434/v1",

361 )

362 vector_size: int = click.prompt("Vector size", default=1024, type=int)

363 if vector_size <= 0:

364 raise click.BadParameter("Vector size must be a positive integer.")

365

366 # ------------------------------------------------------------------

367 # Step 3: Chunking settings

368 # ------------------------------------------------------------------

369 _get_console().print("\n[bold cyan]Step 3: Chunking Configuration[/bold cyan]")

370

371 chunk_size: int = click.prompt("Chunk size (characters)", default=1500, type=int)

372 chunk_overlap: int = click.prompt(

373 "Chunk overlap (characters)", default=200, type=int

374 )

375

376 # ------------------------------------------------------------------

377 # Step 4: Reranking settings

378 # ------------------------------------------------------------------

379 _get_console().print("\n[bold cyan]Step 4: Reranking Configuration[/bold cyan]")

380

381 enable_reranking: bool = click.confirm(

382 "Enable cross-encoder reranking?", default=True

383 )

384

385 # ------------------------------------------------------------------

386 # Step 5: Projects with sources

387 # ------------------------------------------------------------------

388 projects: dict[str, dict[str, Any]] = {}

389 all_extra_env: dict[str, str] = {}

390

391 while True:

392 _get_console().print("\n[bold cyan]Step 5: Project Configuration[/bold cyan]")

393

394 while True:

395 project_id: str = click.prompt("Project ID", default="my-project")

396 if project_id in projects:

397 _get_console().print(

398 f"[red]Project '{project_id}' already exists. "

399 f"Pick a different ID.[/red]"

400 )

401 continue

402 break

403 display_name: str = click.prompt("Display name", default=project_id)

404 description: str = click.prompt("Description", default="")

405

406 project_sources: dict[str, dict[str, Any]] = {}

407 _collect_sources_loop(project_sources, all_extra_env, workspace_dir=output_dir)

408

409 projects[project_id] = {

410 "project_id": project_id,

411 "display_name": display_name,

412 "description": description,

413 "sources": project_sources,

414 }

415

416 _get_console().print(f"[green]Added project: {project_id}[/green]")

417

418 if not click.confirm("Add another project?", default=False):

419 break

420

421 # ------------------------------------------------------------------

422 # Step 6: Write files

423 # ------------------------------------------------------------------

424 config_path = output_dir / "config.yaml"

425 env_path = output_dir / ".env"

426

427 _show_file_preview(output_dir, config_path, env_path)

428

429 if not _confirm_overwrite(config_path, env_path):

430 return

431

432 output_dir.mkdir(parents=True, exist_ok=True)

433

434 _write_env_file(

435 env_path,

436 openai_key=openai_key,

437 qdrant_url=qdrant_url,

438 qdrant_api_key=qdrant_api_key,

439 collection_name=collection_name,

440 extra_vars=all_extra_env,

441 )

442

443 # Build global config

444 global_config: dict[str, Any] = {

445 "qdrant": {

446 "url": qdrant_url,

447 "api_key": "${QDRANT_API_KEY}" if qdrant_api_key else None,

448 "collection_name": collection_name,

449 },

450 "embedding": {

451 "model": embedding_model,

452 "api_key": "${OPENAI_API_KEY}",

453 "vector_size": vector_size,

454 },

455 "chunking": {

456 "chunk_size": chunk_size,

457 "chunk_overlap": chunk_overlap,

458 },

459 }

460

461 if embedding_endpoint:

462 global_config["embedding"]["endpoint"] = embedding_endpoint

463

464 global_config["reranking"] = {

465 "enabled": enable_reranking,

466 }

467

468 _write_config_file_advanced(

469 config_path,

470 global_config=global_config,

471 projects=projects,

472 )

473

474 project_summary = ", ".join(

475 f"{pid}({sum(len(srcs) for srcs in p['sources'].values())} sources)"

476 for pid, p in projects.items()

477 )

478

479 _get_console().print(

480 Panel(

481 f"[green]Created:[/green]\n"

482 f" - {config_path}\n"

483 f" - {env_path}\n"

484 f" - Projects: {project_summary}\n\n"

485 f"[bold]Next steps:[/bold]\n"

486 f" 1. Review the generated files\n"

487 f" 2. Run: qdrant-loader init --workspace {output_dir}\n"

488 f" 3. Run: qdrant-loader ingest --workspace {output_dir}",

489 title="Advanced Setup Complete",

490 style="green",

491 )

492 )

493

494

495# ---------------------------------------------------------------------------

496# Shared helpers

497# ---------------------------------------------------------------------------

498

499

500def _show_file_preview(output_dir: Path, *paths: Path) -> None:

501 """Display a summary panel showing the workspace and files that will be written.

502

503 Args:

504 output_dir: The workspace directory.

505 paths: File paths that will be created or overwritten.

506 """

507 from rich.panel import Panel

508

509 lines = [f"[bold]Workspace:[/bold] {output_dir}"]

510 for path in paths:

511 status = (

512 "[yellow](overwrite)[/yellow]" if path.exists() else "[green](new)[/green]"

513 )

514 lines.append(f" {path.name} {status}")

515

516 _get_console().print(

517 Panel(

518 "\n".join(lines),

519 title="Files to write",

520 style="cyan",

521 )

522 )

523

524

525def _confirm_overwrite(*paths: Path) -> bool:

526 """Ask the user to confirm before writing files.

527

528 Always prompts for confirmation. Warns specifically about existing files

529 that will be overwritten.

530

531 Returns:

532 ``True`` if it is safe to proceed, ``False`` if the user cancelled.

533 """

534 existing = [p for p in paths if p.exists()]

535 if existing:

536 names = ", ".join(p.name for p in existing)

537 if not click.confirm(

538 f"{names} already exist(s) and will be overwritten. Proceed?"

539 ):

540 _get_console().print("[yellow]Setup cancelled.[/yellow]")

541 return False

542 else:

543 if not click.confirm("Write files?", default=True):

544 _get_console().print("[yellow]Setup cancelled.[/yellow]")

545 return False

546 return True

547

548

549def _select_source_type() -> str | None:

550 """Present an interactive source type selector with arrow-key navigation.

551

552 Returns:

553 One of the keys in :data:`SOURCE_TYPES`, or ``None`` if the user

554 selects Back / presses Escape.

555 """

556 import questionary

557

558 choices = [

559 questionary.Choice(title=f"{key:<12} - {desc}", value=key)

560 for key, desc in SOURCE_TYPES.items()

561 ]

562

563 try:

564 result = questionary.select(

565 "Select source type:",

566 choices=choices,

567 default="localfile",

568 ).ask()

569 except (EOFError, KeyboardInterrupt):

570 result = None

571

572 return result

573

574

575def _collect_sources_loop(

576 all_sources: dict[str, dict[str, Any]],

577 all_extra_env: dict[str, str],

578 workspace_dir: Path | None = None,

579) -> None:

580 """Interactively collect one or more data sources from the user.

581

582 Results are merged into *all_sources* and *all_extra_env* in-place.

583

584 Args:

585 all_sources: Accumulated source configs (mutated in-place).

586 all_extra_env: Accumulated extra env vars (mutated in-place).

587 workspace_dir: Workspace directory, used to derive defaults (e.g. docs path).

588 """

589 while True:

590 _get_console().print("\n[bold cyan]Data Source[/bold cyan]")

591

592 source_type = _select_source_type()

593 if source_type is None:

594 break

595

596 _get_console().print(

597 f"\n[bold cyan]Configure {SOURCE_TYPES[source_type]}[/bold cyan]"

598 )

599 existing_names = all_sources.get(source_type, {})

600 # Collect suffixes from ALL already-registered env vars (across all

601 # source types and projects) to prevent silent overwrites.

602 existing_env_keys = set(all_extra_env.keys())

603 while True:

604 source_name = click.prompt(

605 "Source name (identifier)", default=f"my-{source_type}"

606 )

607 suffix = _source_name_to_env_suffix(source_name)

608 if source_name in existing_names:

609 _get_console().print(

610 f"[red]{source_type}/{source_name} already exists. "

611 f"Pick a different name.[/red]"

612 )

613 continue

614 # Check if any env key with this suffix already exists

615 if any(k.endswith(f"_{suffix}") for k in existing_env_keys):

616 _get_console().print(

617 f"[red]'{source_name}' collides with an existing "

618 f"env var suffix across projects. "

619 f"Pick a different name.[/red]"

620 )

621 continue

622 break

623 source_config, extra_env = _collect_source_config(

624 source_type, source_name, workspace_dir=workspace_dir

625 )

626

627 if source_type not in all_sources:

628 all_sources[source_type] = {}

629 all_sources[source_type][source_name] = source_config

630 all_extra_env.update(extra_env)

631

632 _get_console().print(f"[green]Added {source_type}/{source_name}[/green]")

633

634 if not click.confirm("Add another source?", default=False):

635 break

636

637

638# ---------------------------------------------------------------------------

639# Source-specific config collectors

640# ---------------------------------------------------------------------------

641

642

643def _source_name_to_env_suffix(source_name: str) -> str:

644 """Convert a source name like 'my-repo' to an env-var-safe suffix like 'MY_REPO'."""

645 import re

646

647 suffix = re.sub(r"[^A-Za-z0-9]", "_", source_name).strip("_").upper()

648 return suffix if suffix else "DEFAULT"

649

650

651def _collect_source_config(

652 source_type: str,

653 source_name: str,

654 *,

655 workspace_dir: Path | None = None,

656) -> _SourceResult:

657 """Dispatch to the correct collector based on *source_type*.

658

659 Args:

660 source_type: One of the keys in ``SOURCE_TYPES``.

661 source_name: User-chosen identifier, used to create unique env var names.

662 workspace_dir: Workspace directory, passed to collectors that need it.

663

664 Returns:

665 A tuple of (source yaml dict, extra env-var dict).

666 """

667 if source_type == "localfile":

668 return _collect_localfile_config(source_name, workspace_dir=workspace_dir)

669

670 collectors = {

671 "git": _collect_git_config,

672 "confluence": _collect_confluence_config,

673 "jira": _collect_jira_config,

674 "publicdocs": _collect_publicdocs_config,

675 }

676 collector = collectors.get(source_type)

677 if collector is None:

678 return {}, {}

679 return collector(source_name)

680

681

682def _collect_git_config(source_name: str) -> _SourceResult:

683 """Collect Git repository source configuration.

684

685 Returns:

686 Tuple of (source config dict, extra env vars).

687 """

688 url: str = click.prompt("Repository URL (e.g., https://github.com/org/repo.git)")

689 branch: str = click.prompt("Branch", default="main")

690 token: str = click.prompt(

691 "Access token (leave empty for public repos)", default="", hide_input=True

692 )

693 file_types_raw: str = click.prompt(

694 "File types (comma-separated)", default="*.md,*.txt,*.py"

695 )

696 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()]

697

698 config: dict[str, Any] = {

699 "base_url": url,

700 "branch": branch,

701 "file_types": file_types,

702 "enable_file_conversion": True,

703 }

704 extra_env: dict[str, str] = {}

705

706 if token:

707 suffix = _source_name_to_env_suffix(source_name)

708 env_key = f"GIT_TOKEN_{suffix}"

709 config["token"] = f"${{{env_key}}}"

710 extra_env[env_key] = token

711

712 return config, extra_env

713

714

715def _collect_confluence_config(source_name: str) -> _SourceResult:

716 """Collect Confluence source configuration.

717

718 Returns:

719 Tuple of (source config dict, extra env vars).

720 """

721 base_url: str = click.prompt(

722 "Confluence URL (e.g., https://mycompany.atlassian.net/wiki)"

723 )

724 space_key: str = click.prompt("Space key")

725 email: str = click.prompt("Email")

726 token: str = click.prompt("API token", hide_input=True)

727

728 suffix = _source_name_to_env_suffix(source_name)

729 token_key = f"CONFLUENCE_TOKEN_{suffix}"

730 email_key = f"CONFLUENCE_EMAIL_{suffix}"

731

732 config: dict[str, Any] = {

733 "base_url": base_url,

734 "space_key": space_key,

735 "token": f"${{{token_key}}}",

736 "email": f"${{{email_key}}}",

737 "enable_file_conversion": True,

738 }

739 extra_env: dict[str, str] = {

740 token_key: token,

741 email_key: email,

742 }

743 return config, extra_env

744

745

746def _collect_jira_config(source_name: str) -> _SourceResult:

747 """Collect Jira source configuration.

748

749 Returns:

750 Tuple of (source config dict, extra env vars).

751 """

752 base_url: str = click.prompt("Jira URL (e.g., https://mycompany.atlassian.net)")

753 project_key: str = click.prompt("Project key")

754 email: str = click.prompt("Email")

755 token: str = click.prompt("API token", hide_input=True)

756

757 suffix = _source_name_to_env_suffix(source_name)

758 token_key = f"JIRA_TOKEN_{suffix}"

759 email_key = f"JIRA_EMAIL_{suffix}"

760

761 config: dict[str, Any] = {

762 "base_url": base_url,

763 "project_key": project_key,

764 "token": f"${{{token_key}}}",

765 "email": f"${{{email_key}}}",

766 "enable_file_conversion": True,

767 }

768 extra_env: dict[str, str] = {

769 token_key: token,

770 email_key: email,

771 }

772 return config, extra_env

773

774

775def _collect_publicdocs_config(source_name: str) -> _SourceResult:

776 """Collect Public Documentation source configuration.

777

778 Returns:

779 Tuple of (source config dict, extra env vars).

780 """

781 base_url: str = click.prompt("Documentation URL (e.g., https://docs.example.com/)")

782 version: str = click.prompt("Version", default="latest")

783 content_type: str = click.prompt(

784 "Content type",

785 default="html",

786 type=click.Choice(["html", "markdown"]),

787 )

788

789 config: dict[str, Any] = {

790 "base_url": base_url,

791 "version": version,

792 "content_type": content_type,

793 }

794 return config, {}

795

796

797def _collect_localfile_config(

798 source_name: str, *, workspace_dir: Path | None = None

799) -> _SourceResult:

800 """Collect Local Files source configuration.

801

802 Args:

803 source_name: User-chosen identifier for this source.

804 workspace_dir: Workspace directory. When provided, defaults to ``<workspace>/docs``.

805

806 Returns:

807 Tuple of (source config dict, extra env vars).

808 """

809 default_path = ""

810 if workspace_dir is not None:

811 default_path = str(workspace_dir / "docs")

812

813 raw_path: str = click.prompt(

814 "Directory path (e.g., /path/to/files or file:///path)",

815 default=default_path or None,

816 )

817 if raw_path.startswith("file://"):

818 path = raw_path

819 else:

820 path = Path(raw_path).expanduser().resolve().as_uri()

821

822 file_types_raw: str = click.prompt(

823 "File types (comma-separated)", default="*.md,*.txt,*.py"

824 )

825 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()]

826

827 config: dict[str, Any] = {

828 "base_url": path,

829 "file_types": file_types,

830 "enable_file_conversion": True,

831 }

832 return config, {}

833

834

835# ---------------------------------------------------------------------------

836# File writers

837# ---------------------------------------------------------------------------

838

839

840def _escape_env_value(value: str) -> str:

841 """Escape a value for .env file if it contains special characters."""

842 if any(c in value for c in ("=", "\n", '"', "'", " ", "\t", "#", "$", "`")):

843 return '"' + value.replace("\\", "\\\\").replace('"', '\\"') + '"'

844 return value

845

846

847def _write_env_file(

848 path: Path,

849 *,

850 openai_key: str,

851 qdrant_url: str,

852 qdrant_api_key: str,

853 collection_name: str,

854 extra_vars: dict[str, str] | None = None,

855) -> None:

856 """Write the ``.env`` file.

857

858 Only non-default values are emitted so the file stays minimal.

859

860 Args:

861 path: Destination path for the ``.env`` file.

862 openai_key: OpenAI API key (always written).

863 qdrant_url: Qdrant URL (written only when not the default localhost).

864 qdrant_api_key: Qdrant API key (written only when non-empty).

865 collection_name: Collection name (written only when not "documents").

866 extra_vars: Additional environment variables from source-specific config.

867 """

868 lines: list[str] = [f"OPENAI_API_KEY={_escape_env_value(openai_key)}"]

869

870 if qdrant_url and qdrant_url != "http://localhost:6333":

871 lines.append(f"QDRANT_URL={_escape_env_value(qdrant_url)}")

872

873 if qdrant_api_key:

874 lines.append(f"QDRANT_API_KEY={_escape_env_value(qdrant_api_key)}")

875

876 if collection_name and collection_name != "documents":

877 lines.append(f"QDRANT_COLLECTION_NAME={_escape_env_value(collection_name)}")

878

879 if extra_vars:

880 lines.append("")

881 for key, value in extra_vars.items():

882 lines.append(f"{key}={_escape_env_value(value)}")

883

884 lines.append("") # trailing newline

885 content = "\n".join(lines)

886

887 # Write with restrictive permissions from the start

888 import os

889

890 try:

891 fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)

892 try:

893 os.write(fd, content.encode("utf-8"))

894 os.fsync(fd)

895 finally:

896 os.close(fd)

897 except OSError:

898 # Fallback for platforms that don't support os.open mode (e.g., Windows)

899 path.write_text(content, encoding="utf-8")

900 try:

901 path.chmod(0o600)

902 except OSError:

903 pass

904

905

906def _write_config_file_multi(

907 path: Path,

908 *,

909 sources: dict[str, dict[str, Any]],

910) -> None:

911 """Write the ``config.yaml`` file using the simplified format.

912

913 Args:

914 path: Destination path for ``config.yaml``.

915 sources: Dict of source_type -> {source_name: source_config}.

916 """

917 import yaml

918

919 config: dict[str, Any] = {"sources": sources}

920

921 with path.open("w", encoding="utf-8") as fh:

922 fh.write("# Generated by qdrant-loader setup\n")

923 fh.write("# Simplified configuration format\n")

924 fh.write("# See config.template.yaml for the full multi-project format.\n\n")

925 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)

926

927

928def _write_config_file_advanced(

929 path: Path,

930 *,

931 global_config: dict[str, Any],

932 projects: dict[str, dict[str, Any]],

933) -> None:

934 """Write ``config.yaml`` using the advanced multi-project format.

935

936 Args:

937 path: Destination path for ``config.yaml``.

938 global_config: Global configuration dict (qdrant, embedding, chunking).

939 projects: Dict of project_id -> project config dict.

940 """

941 import yaml

942

943 config: dict[str, Any] = {

944 "global": global_config,

945 "projects": projects,

946 }

947

948 with path.open("w", encoding="utf-8") as fh:

949 fh.write("# Generated by qdrant-loader setup (advanced mode)\n")

950 fh.write("# Multi-project configuration format\n")

951 fh.write("# See config.template.yaml for all available options.\n\n")

952 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)

Coverage for src / qdrant_loader / cli / commands / setup_cmd.py: 88%

317 statements