Coverage for src / qdrant_loader / cli / commands / setup_cmd.py: 88%

315 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-18 04:48 +0000

1"""Interactive setup wizard for qdrant-loader configuration.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6from typing import Any 

7 

8import click 

9 

10# Heavy modules (questionary ~1.4s, rich ~0.4s) are lazy-imported via helpers 

11# to keep CLI startup fast. 

12_console = None 

13 

14 

15def _get_console(): 

16 global _console 

17 if _console is None: 

18 from rich.console import Console 

19 

20 _console = Console() 

21 return _console 

22 

23 

24SOURCE_TYPES: dict[str, str] = { 

25 "git": "Git Repository", 

26 "confluence": "Confluence Wiki", 

27 "jira": "Jira Issues", 

28 "publicdocs": "Public Documentation (website)", 

29 "localfile": "Local Files", 

30} 

31 

32# Returned by every _collect_*_config helper: (yaml-ready dict, extra env vars dict) 

33_SourceResult = tuple[dict[str, Any], dict[str, str]] 

34 

35 

36SETUP_MODES: dict[str, str] = { 

37 "default": "Quick start with localfile source pointing to current directory", 

38 "normal": "Interactive wizard with simplified config format", 

39 "advanced": "Full control over global settings, multi-project format", 

40} 

41 

42 

43def run_setup(output_dir: Path | None = None, mode: str | None = None) -> None: 

44 """Entry point for the setup command. 

45 

46 When *output_dir* is ``None`` (or ``"."``) the user is prompted to choose a 

47 workspace folder. When *mode* is ``None`` a TUI mode selector is shown. 

48 

49 Args: 

50 output_dir: Directory in which the generated files are placed. 

51 If ``None``, the user is prompted interactively. 

52 mode: One of ``"default"``, ``"normal"``, ``"advanced"`` or ``None``. 

53 """ 

54 # ------------------------------------------------------------------ 

55 # Step 0: Mode selection (before workspace, so default can skip prompt) 

56 # ------------------------------------------------------------------ 

57 if mode is None: 

58 mode = _select_setup_mode() 

59 if mode is None: 

60 _get_console().print("[yellow]Setup cancelled.[/yellow]") 

61 return 

62 

63 # ------------------------------------------------------------------ 

64 # Step 1: Workspace folder 

65 # ------------------------------------------------------------------ 

66 if mode == "default": 

67 # Default mode always uses ./workspace, no prompt 

68 output_dir = _resolve_workspace( 

69 output_dir if output_dir is not None else Path("workspace") 

70 ) 

71 else: 

72 output_dir = _resolve_workspace(output_dir) 

73 

74 dispatch = { 

75 "default": run_setup_default, 

76 "normal": run_setup_wizard, 

77 "advanced": run_setup_advanced, 

78 } 

79 try: 

80 dispatch[mode](output_dir) 

81 except (click.Abort, KeyboardInterrupt): 

82 _get_console().print("\n[yellow]Setup cancelled.[/yellow]") 

83 

84 

85def _resolve_workspace(output_dir: Path | None) -> Path: 

86 """Resolve and prepare the workspace directory. 

87 

88 When *output_dir* is explicitly provided, uses it directly. 

89 Otherwise prompts the user with ``./workspace`` as the default. 

90 

91 Args: 

92 output_dir: The value passed via ``--output-dir``, or ``None``. 

93 

94 Returns: 

95 Resolved :class:`Path` to the workspace directory (created if needed). 

96 """ 

97 # If explicitly provided via --output-dir, use it as-is. 

98 if output_dir is not None: 

99 resolved = Path(output_dir).resolve() 

100 if resolved.exists() and not resolved.is_dir(): 

101 raise click.BadParameter( 

102 f"'{resolved}' exists but is not a directory.", 

103 param_hint="output_dir", 

104 ) 

105 resolved.mkdir(parents=True, exist_ok=True) 

106 return resolved 

107 

108 # Interactive: prompt with default ./workspace 

109 default_ws = "workspace" 

110 raw: str = click.prompt("Workspace folder", default=default_ws) 

111 chosen = raw.encode("utf-8", errors="ignore").decode("utf-8").strip() 

112 if not chosen: 

113 chosen = default_ws 

114 

115 ws_path = (Path.cwd() / chosen).resolve() 

116 if ws_path.exists() and not ws_path.is_dir(): 

117 raise click.BadParameter( 

118 f"'{ws_path}' exists but is not a directory.", 

119 param_hint="workspace", 

120 ) 

121 if not ws_path.exists(): 

122 ws_path.mkdir(parents=True, exist_ok=True) 

123 _get_console().print(f"[green]Created workspace: {ws_path}[/green]") 

124 else: 

125 _get_console().print(f"[cyan]Using workspace: {ws_path}[/cyan]") 

126 

127 return ws_path 

128 

129 

130def _select_setup_mode() -> str | None: 

131 """Present an interactive mode selector using questionary. 

132 

133 Returns: 

134 One of the keys in :data:`SETUP_MODES`, or ``None`` if the user cancels. 

135 """ 

136 import questionary 

137 from rich.panel import Panel 

138 

139 _get_console().print( 

140 Panel( 

141 "[bold]qdrant-loader Setup[/bold]\n" "Choose a setup mode to get started.", 

142 style="blue", 

143 ) 

144 ) 

145 

146 _CANCEL = "__cancel__" 

147 choices = [ 

148 questionary.Choice(title=f"{key.capitalize():<10} - {desc}", value=key) 

149 for key, desc in SETUP_MODES.items() 

150 ] 

151 choices.append(questionary.Choice(title="Cancel", value=_CANCEL)) 

152 

153 try: 

154 result = questionary.select( 

155 "Select setup mode:", 

156 choices=choices, 

157 default="default", 

158 ).ask() 

159 except (EOFError, KeyboardInterrupt): 

160 result = None 

161 

162 if result is None or result == _CANCEL: 

163 return None 

164 return result 

165 

166 

167def run_setup_default(output_dir: Path) -> None: 

168 """Generate a minimal default config with a localfile source pointing to the current directory. 

169 

170 No interactive prompts – just writes ``config.yaml`` and ``.env`` with sensible 

171 defaults so the user can immediately run ``qdrant-loader init && qdrant-loader ingest``. 

172 

173 Args: 

174 output_dir: Directory in which the generated files are placed. 

175 """ 

176 from rich.panel import Panel 

177 

178 output_dir = Path(output_dir).resolve() 

179 

180 config_path = output_dir / "config.yaml" 

181 env_path = output_dir / ".env" 

182 

183 # Show preview of what will be created/overwritten 

184 _show_file_preview(output_dir, config_path, env_path) 

185 

186 if not _confirm_overwrite(config_path, env_path): 

187 return 

188 

189 output_dir.mkdir(parents=True, exist_ok=True) 

190 

191 # Use workspace/docs as the localfile source directory 

192 docs_dir = output_dir / "docs" 

193 docs_dir.mkdir(parents=True, exist_ok=True) 

194 docs_path = docs_dir.as_uri() 

195 

196 sources: dict[str, dict[str, Any]] = { 

197 "localfile": { 

198 "my-docs": { 

199 "base_url": docs_path, 

200 "file_types": ["*.md", "*.txt", "*.py"], 

201 } 

202 } 

203 } 

204 

205 _write_env_file( 

206 env_path, 

207 openai_key="your_openai_api_key_here", 

208 qdrant_url="http://localhost:6333", 

209 qdrant_api_key="", 

210 collection_name="documents", 

211 ) 

212 _write_config_file_multi(config_path, sources=sources) 

213 

214 _get_console().print( 

215 Panel( 

216 f"[green]Created:[/green]\n" 

217 f" - {config_path}\n" 

218 f" - {env_path}\n" 

219 f" - {docs_dir}/ (place your documents here)\n\n" 

220 f"[bold]Next steps:[/bold]\n" 

221 f" 1. Set your OPENAI_API_KEY in {env_path}\n" 

222 f" 2. Place your documents in {docs_dir}/\n" 

223 f" 3. Run: qdrant-loader init --workspace {output_dir}\n" 

224 f" 4. Run: qdrant-loader ingest --workspace {output_dir}", 

225 title="Default Setup Complete", 

226 style="green", 

227 ) 

228 ) 

229 

230 

231def run_setup_wizard(output_dir: Path) -> None: 

232 """Run the interactive setup wizard (Normal mode). 

233 

234 Prompts the user for core settings and source-specific details, then writes 

235 a ``config.yaml`` and ``.env`` file to *output_dir*. 

236 

237 Args: 

238 output_dir: Directory in which the generated files are placed. 

239 """ 

240 from rich.panel import Panel 

241 

242 output_dir = Path(output_dir).resolve() 

243 

244 _get_console().print( 

245 Panel( 

246 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Normal mode)[/dim]\n" 

247 "Generate config.yaml and .env for your project.", 

248 style="blue", 

249 ) 

250 ) 

251 

252 # ------------------------------------------------------------------ 

253 # Step 1: Core settings 

254 # ------------------------------------------------------------------ 

255 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]") 

256 

257 openai_key: str = click.prompt("OpenAI API Key", hide_input=True) 

258 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333") 

259 qdrant_api_key: str = click.prompt( 

260 "Qdrant API Key (leave empty for local)", default="", hide_input=True 

261 ) 

262 collection_name: str = click.prompt("Collection name", default="documents") 

263 

264 # ------------------------------------------------------------------ 

265 # Step 2+3: Source type selection and config (loop for multiple) 

266 # ------------------------------------------------------------------ 

267 all_sources: dict[str, dict[str, Any]] = {} 

268 all_extra_env: dict[str, str] = {} 

269 

270 _collect_sources_loop(all_sources, all_extra_env, workspace_dir=output_dir) 

271 

272 # ------------------------------------------------------------------ 

273 # Step 4: Confirm output paths and write files 

274 # ------------------------------------------------------------------ 

275 config_path = output_dir / "config.yaml" 

276 env_path = output_dir / ".env" 

277 

278 _show_file_preview(output_dir, config_path, env_path) 

279 

280 if not _confirm_overwrite(config_path, env_path): 

281 return 

282 

283 output_dir.mkdir(parents=True, exist_ok=True) 

284 

285 _write_env_file( 

286 env_path, 

287 openai_key=openai_key, 

288 qdrant_url=qdrant_url, 

289 qdrant_api_key=qdrant_api_key, 

290 collection_name=collection_name, 

291 extra_vars=all_extra_env, 

292 ) 

293 _write_config_file_multi( 

294 config_path, 

295 sources=all_sources, 

296 ) 

297 

298 # Build source summary 

299 source_summary = ", ".join( 

300 f"{st}({len(names)})" for st, names in all_sources.items() 

301 ) 

302 

303 _get_console().print( 

304 Panel( 

305 f"[green]Created:[/green]\n" 

306 f" - {config_path}\n" 

307 f" - {env_path}\n" 

308 f" - Sources: {source_summary}\n\n" 

309 f"[bold]Next steps:[/bold]\n" 

310 f" 1. Review the generated files\n" 

311 f" 2. Run: qdrant-loader init --workspace {output_dir}\n" 

312 f" 3. Run: qdrant-loader ingest --workspace {output_dir}", 

313 title="Setup Complete", 

314 style="green", 

315 ) 

316 ) 

317 

318 

319def run_setup_advanced(output_dir: Path) -> None: 

320 """Run the advanced setup wizard with full global settings and multi-project format. 

321 

322 Args: 

323 output_dir: Directory in which the generated files are placed. 

324 """ 

325 from rich.panel import Panel 

326 

327 output_dir = Path(output_dir).resolve() 

328 

329 _get_console().print( 

330 Panel( 

331 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Advanced mode)[/dim]\n" 

332 "Full control over global settings and multi-project configuration.", 

333 style="blue", 

334 ) 

335 ) 

336 

337 # ------------------------------------------------------------------ 

338 # Step 1: Core settings 

339 # ------------------------------------------------------------------ 

340 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]") 

341 

342 openai_key: str = click.prompt("OpenAI API Key", hide_input=True) 

343 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333") 

344 qdrant_api_key: str = click.prompt( 

345 "Qdrant API Key (leave empty for local)", default="", hide_input=True 

346 ) 

347 collection_name: str = click.prompt("Collection name", default="documents") 

348 

349 # ------------------------------------------------------------------ 

350 # Step 2: Embedding settings 

351 # ------------------------------------------------------------------ 

352 _get_console().print("\n[bold cyan]Step 2: Embedding Configuration[/bold cyan]") 

353 

354 embedding_model: str = click.prompt( 

355 "Embedding model", default="text-embedding-3-small" 

356 ) 

357 embedding_endpoint: str = click.prompt( 

358 "Embedding endpoint (leave empty for OpenAI default)", 

359 default="", 

360 ) 

361 vector_size: int = click.prompt("Vector size", default=1536, type=int) 

362 

363 # ------------------------------------------------------------------ 

364 # Step 3: Chunking settings 

365 # ------------------------------------------------------------------ 

366 _get_console().print("\n[bold cyan]Step 3: Chunking Configuration[/bold cyan]") 

367 

368 chunk_size: int = click.prompt("Chunk size (characters)", default=1500, type=int) 

369 chunk_overlap: int = click.prompt( 

370 "Chunk overlap (characters)", default=200, type=int 

371 ) 

372 

373 # ------------------------------------------------------------------ 

374 # Step 4: Reranking settings 

375 # ------------------------------------------------------------------ 

376 _get_console().print("\n[bold cyan]Step 4: Reranking Configuration[/bold cyan]") 

377 

378 enable_reranking: bool = click.confirm( 

379 "Enable cross-encoder reranking?", default=True 

380 ) 

381 

382 # ------------------------------------------------------------------ 

383 # Step 5: Projects with sources 

384 # ------------------------------------------------------------------ 

385 projects: dict[str, dict[str, Any]] = {} 

386 all_extra_env: dict[str, str] = {} 

387 

388 while True: 

389 _get_console().print("\n[bold cyan]Step 5: Project Configuration[/bold cyan]") 

390 

391 while True: 

392 project_id: str = click.prompt("Project ID", default="my-project") 

393 if project_id in projects: 

394 _get_console().print( 

395 f"[red]Project '{project_id}' already exists. " 

396 f"Pick a different ID.[/red]" 

397 ) 

398 continue 

399 break 

400 display_name: str = click.prompt("Display name", default=project_id) 

401 description: str = click.prompt("Description", default="") 

402 

403 project_sources: dict[str, dict[str, Any]] = {} 

404 _collect_sources_loop(project_sources, all_extra_env, workspace_dir=output_dir) 

405 

406 projects[project_id] = { 

407 "project_id": project_id, 

408 "display_name": display_name, 

409 "description": description, 

410 "sources": project_sources, 

411 } 

412 

413 _get_console().print(f"[green]Added project: {project_id}[/green]") 

414 

415 if not click.confirm("Add another project?", default=False): 

416 break 

417 

418 # ------------------------------------------------------------------ 

419 # Step 6: Write files 

420 # ------------------------------------------------------------------ 

421 config_path = output_dir / "config.yaml" 

422 env_path = output_dir / ".env" 

423 

424 _show_file_preview(output_dir, config_path, env_path) 

425 

426 if not _confirm_overwrite(config_path, env_path): 

427 return 

428 

429 output_dir.mkdir(parents=True, exist_ok=True) 

430 

431 _write_env_file( 

432 env_path, 

433 openai_key=openai_key, 

434 qdrant_url=qdrant_url, 

435 qdrant_api_key=qdrant_api_key, 

436 collection_name=collection_name, 

437 extra_vars=all_extra_env, 

438 ) 

439 

440 # Build global config 

441 global_config: dict[str, Any] = { 

442 "qdrant": { 

443 "url": qdrant_url, 

444 "api_key": "${QDRANT_API_KEY}" if qdrant_api_key else None, 

445 "collection_name": collection_name, 

446 }, 

447 "embedding": { 

448 "model": embedding_model, 

449 "api_key": "${OPENAI_API_KEY}", 

450 "vector_size": vector_size, 

451 }, 

452 "chunking": { 

453 "chunk_size": chunk_size, 

454 "chunk_overlap": chunk_overlap, 

455 }, 

456 } 

457 

458 if embedding_endpoint: 

459 global_config["embedding"]["endpoint"] = embedding_endpoint 

460 

461 global_config["reranking"] = { 

462 "enabled": enable_reranking, 

463 } 

464 

465 _write_config_file_advanced( 

466 config_path, 

467 global_config=global_config, 

468 projects=projects, 

469 ) 

470 

471 project_summary = ", ".join( 

472 f"{pid}({sum(len(srcs) for srcs in p['sources'].values())} sources)" 

473 for pid, p in projects.items() 

474 ) 

475 

476 _get_console().print( 

477 Panel( 

478 f"[green]Created:[/green]\n" 

479 f" - {config_path}\n" 

480 f" - {env_path}\n" 

481 f" - Projects: {project_summary}\n\n" 

482 f"[bold]Next steps:[/bold]\n" 

483 f" 1. Review the generated files\n" 

484 f" 2. Run: qdrant-loader init --workspace {output_dir}\n" 

485 f" 3. Run: qdrant-loader ingest --workspace {output_dir}", 

486 title="Advanced Setup Complete", 

487 style="green", 

488 ) 

489 ) 

490 

491 

492# --------------------------------------------------------------------------- 

493# Shared helpers 

494# --------------------------------------------------------------------------- 

495 

496 

497def _show_file_preview(output_dir: Path, *paths: Path) -> None: 

498 """Display a summary panel showing the workspace and files that will be written. 

499 

500 Args: 

501 output_dir: The workspace directory. 

502 paths: File paths that will be created or overwritten. 

503 """ 

504 from rich.panel import Panel 

505 

506 lines = [f"[bold]Workspace:[/bold] {output_dir}"] 

507 for path in paths: 

508 status = ( 

509 "[yellow](overwrite)[/yellow]" if path.exists() else "[green](new)[/green]" 

510 ) 

511 lines.append(f" {path.name} {status}") 

512 

513 _get_console().print( 

514 Panel( 

515 "\n".join(lines), 

516 title="Files to write", 

517 style="cyan", 

518 ) 

519 ) 

520 

521 

522def _confirm_overwrite(*paths: Path) -> bool: 

523 """Ask the user to confirm before writing files. 

524 

525 Always prompts for confirmation. Warns specifically about existing files 

526 that will be overwritten. 

527 

528 Returns: 

529 ``True`` if it is safe to proceed, ``False`` if the user cancelled. 

530 """ 

531 existing = [p for p in paths if p.exists()] 

532 if existing: 

533 names = ", ".join(p.name for p in existing) 

534 if not click.confirm( 

535 f"{names} already exist(s) and will be overwritten. Proceed?" 

536 ): 

537 _get_console().print("[yellow]Setup cancelled.[/yellow]") 

538 return False 

539 else: 

540 if not click.confirm("Write files?", default=True): 

541 _get_console().print("[yellow]Setup cancelled.[/yellow]") 

542 return False 

543 return True 

544 

545 

546def _select_source_type() -> str | None: 

547 """Present an interactive source type selector with arrow-key navigation. 

548 

549 Returns: 

550 One of the keys in :data:`SOURCE_TYPES`, or ``None`` if the user 

551 selects Back / presses Escape. 

552 """ 

553 import questionary 

554 

555 choices = [ 

556 questionary.Choice(title=f"{key:<12} - {desc}", value=key) 

557 for key, desc in SOURCE_TYPES.items() 

558 ] 

559 

560 try: 

561 result = questionary.select( 

562 "Select source type:", 

563 choices=choices, 

564 default="localfile", 

565 ).ask() 

566 except (EOFError, KeyboardInterrupt): 

567 result = None 

568 

569 return result 

570 

571 

572def _collect_sources_loop( 

573 all_sources: dict[str, dict[str, Any]], 

574 all_extra_env: dict[str, str], 

575 workspace_dir: Path | None = None, 

576) -> None: 

577 """Interactively collect one or more data sources from the user. 

578 

579 Results are merged into *all_sources* and *all_extra_env* in-place. 

580 

581 Args: 

582 all_sources: Accumulated source configs (mutated in-place). 

583 all_extra_env: Accumulated extra env vars (mutated in-place). 

584 workspace_dir: Workspace directory, used to derive defaults (e.g. docs path). 

585 """ 

586 while True: 

587 _get_console().print("\n[bold cyan]Data Source[/bold cyan]") 

588 

589 source_type = _select_source_type() 

590 if source_type is None: 

591 break 

592 

593 _get_console().print( 

594 f"\n[bold cyan]Configure {SOURCE_TYPES[source_type]}[/bold cyan]" 

595 ) 

596 existing_names = all_sources.get(source_type, {}) 

597 # Collect suffixes from ALL already-registered env vars (across all 

598 # source types and projects) to prevent silent overwrites. 

599 existing_env_keys = set(all_extra_env.keys()) 

600 while True: 

601 source_name = click.prompt( 

602 "Source name (identifier)", default=f"my-{source_type}" 

603 ) 

604 suffix = _source_name_to_env_suffix(source_name) 

605 if source_name in existing_names: 

606 _get_console().print( 

607 f"[red]{source_type}/{source_name} already exists. " 

608 f"Pick a different name.[/red]" 

609 ) 

610 continue 

611 # Check if any env key with this suffix already exists 

612 if any(k.endswith(f"_{suffix}") for k in existing_env_keys): 

613 _get_console().print( 

614 f"[red]'{source_name}' collides with an existing " 

615 f"env var suffix across projects. " 

616 f"Pick a different name.[/red]" 

617 ) 

618 continue 

619 break 

620 source_config, extra_env = _collect_source_config( 

621 source_type, source_name, workspace_dir=workspace_dir 

622 ) 

623 

624 if source_type not in all_sources: 

625 all_sources[source_type] = {} 

626 all_sources[source_type][source_name] = source_config 

627 all_extra_env.update(extra_env) 

628 

629 _get_console().print(f"[green]Added {source_type}/{source_name}[/green]") 

630 

631 if not click.confirm("Add another source?", default=False): 

632 break 

633 

634 

635# --------------------------------------------------------------------------- 

636# Source-specific config collectors 

637# --------------------------------------------------------------------------- 

638 

639 

640def _source_name_to_env_suffix(source_name: str) -> str: 

641 """Convert a source name like 'my-repo' to an env-var-safe suffix like 'MY_REPO'.""" 

642 import re 

643 

644 suffix = re.sub(r"[^A-Za-z0-9]", "_", source_name).strip("_").upper() 

645 return suffix if suffix else "DEFAULT" 

646 

647 

648def _collect_source_config( 

649 source_type: str, 

650 source_name: str, 

651 *, 

652 workspace_dir: Path | None = None, 

653) -> _SourceResult: 

654 """Dispatch to the correct collector based on *source_type*. 

655 

656 Args: 

657 source_type: One of the keys in ``SOURCE_TYPES``. 

658 source_name: User-chosen identifier, used to create unique env var names. 

659 workspace_dir: Workspace directory, passed to collectors that need it. 

660 

661 Returns: 

662 A tuple of (source yaml dict, extra env-var dict). 

663 """ 

664 if source_type == "localfile": 

665 return _collect_localfile_config(source_name, workspace_dir=workspace_dir) 

666 

667 collectors = { 

668 "git": _collect_git_config, 

669 "confluence": _collect_confluence_config, 

670 "jira": _collect_jira_config, 

671 "publicdocs": _collect_publicdocs_config, 

672 } 

673 collector = collectors.get(source_type) 

674 if collector is None: 

675 return {}, {} 

676 return collector(source_name) 

677 

678 

679def _collect_git_config(source_name: str) -> _SourceResult: 

680 """Collect Git repository source configuration. 

681 

682 Returns: 

683 Tuple of (source config dict, extra env vars). 

684 """ 

685 url: str = click.prompt("Repository URL (e.g., https://github.com/org/repo.git)") 

686 branch: str = click.prompt("Branch", default="main") 

687 token: str = click.prompt( 

688 "Access token (leave empty for public repos)", default="", hide_input=True 

689 ) 

690 file_types_raw: str = click.prompt( 

691 "File types (comma-separated)", default="*.md,*.txt,*.py" 

692 ) 

693 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()] 

694 

695 config: dict[str, Any] = { 

696 "base_url": url, 

697 "branch": branch, 

698 "file_types": file_types, 

699 } 

700 extra_env: dict[str, str] = {} 

701 

702 if token: 

703 suffix = _source_name_to_env_suffix(source_name) 

704 env_key = f"GIT_TOKEN_{suffix}" 

705 config["token"] = f"${{{env_key}}}" 

706 extra_env[env_key] = token 

707 

708 return config, extra_env 

709 

710 

711def _collect_confluence_config(source_name: str) -> _SourceResult: 

712 """Collect Confluence source configuration. 

713 

714 Returns: 

715 Tuple of (source config dict, extra env vars). 

716 """ 

717 base_url: str = click.prompt( 

718 "Confluence URL (e.g., https://mycompany.atlassian.net/wiki)" 

719 ) 

720 space_key: str = click.prompt("Space key") 

721 email: str = click.prompt("Email") 

722 token: str = click.prompt("API token", hide_input=True) 

723 

724 suffix = _source_name_to_env_suffix(source_name) 

725 token_key = f"CONFLUENCE_TOKEN_{suffix}" 

726 email_key = f"CONFLUENCE_EMAIL_{suffix}" 

727 

728 config: dict[str, Any] = { 

729 "base_url": base_url, 

730 "space_key": space_key, 

731 "token": f"${{{token_key}}}", 

732 "email": f"${{{email_key}}}", 

733 } 

734 extra_env: dict[str, str] = { 

735 token_key: token, 

736 email_key: email, 

737 } 

738 return config, extra_env 

739 

740 

741def _collect_jira_config(source_name: str) -> _SourceResult: 

742 """Collect Jira source configuration. 

743 

744 Returns: 

745 Tuple of (source config dict, extra env vars). 

746 """ 

747 base_url: str = click.prompt("Jira URL (e.g., https://mycompany.atlassian.net)") 

748 project_key: str = click.prompt("Project key") 

749 email: str = click.prompt("Email") 

750 token: str = click.prompt("API token", hide_input=True) 

751 

752 suffix = _source_name_to_env_suffix(source_name) 

753 token_key = f"JIRA_TOKEN_{suffix}" 

754 email_key = f"JIRA_EMAIL_{suffix}" 

755 

756 config: dict[str, Any] = { 

757 "base_url": base_url, 

758 "project_key": project_key, 

759 "token": f"${{{token_key}}}", 

760 "email": f"${{{email_key}}}", 

761 } 

762 extra_env: dict[str, str] = { 

763 token_key: token, 

764 email_key: email, 

765 } 

766 return config, extra_env 

767 

768 

769def _collect_publicdocs_config(source_name: str) -> _SourceResult: 

770 """Collect Public Documentation source configuration. 

771 

772 Returns: 

773 Tuple of (source config dict, extra env vars). 

774 """ 

775 base_url: str = click.prompt("Documentation URL (e.g., https://docs.example.com/)") 

776 version: str = click.prompt("Version", default="latest") 

777 content_type: str = click.prompt( 

778 "Content type", 

779 default="html", 

780 type=click.Choice(["html", "markdown"]), 

781 ) 

782 

783 config: dict[str, Any] = { 

784 "base_url": base_url, 

785 "version": version, 

786 "content_type": content_type, 

787 } 

788 return config, {} 

789 

790 

791def _collect_localfile_config( 

792 source_name: str, *, workspace_dir: Path | None = None 

793) -> _SourceResult: 

794 """Collect Local Files source configuration. 

795 

796 Args: 

797 source_name: User-chosen identifier for this source. 

798 workspace_dir: Workspace directory. When provided, defaults to ``<workspace>/docs``. 

799 

800 Returns: 

801 Tuple of (source config dict, extra env vars). 

802 """ 

803 default_path = "" 

804 if workspace_dir is not None: 

805 default_path = str(workspace_dir / "docs") 

806 

807 raw_path: str = click.prompt( 

808 "Directory path (e.g., /path/to/files or file:///path)", 

809 default=default_path or None, 

810 ) 

811 if raw_path.startswith("file://"): 

812 path = raw_path 

813 else: 

814 path = Path(raw_path).expanduser().resolve().as_uri() 

815 

816 file_types_raw: str = click.prompt( 

817 "File types (comma-separated)", default="*.md,*.txt,*.py" 

818 ) 

819 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()] 

820 

821 config: dict[str, Any] = { 

822 "base_url": path, 

823 "file_types": file_types, 

824 } 

825 return config, {} 

826 

827 

828# --------------------------------------------------------------------------- 

829# File writers 

830# --------------------------------------------------------------------------- 

831 

832 

833def _escape_env_value(value: str) -> str: 

834 """Escape a value for .env file if it contains special characters.""" 

835 if any(c in value for c in ("=", "\n", '"', "'", " ", "\t", "#", "$", "`")): 

836 return '"' + value.replace("\\", "\\\\").replace('"', '\\"') + '"' 

837 return value 

838 

839 

840def _write_env_file( 

841 path: Path, 

842 *, 

843 openai_key: str, 

844 qdrant_url: str, 

845 qdrant_api_key: str, 

846 collection_name: str, 

847 extra_vars: dict[str, str] | None = None, 

848) -> None: 

849 """Write the ``.env`` file. 

850 

851 Only non-default values are emitted so the file stays minimal. 

852 

853 Args: 

854 path: Destination path for the ``.env`` file. 

855 openai_key: OpenAI API key (always written). 

856 qdrant_url: Qdrant URL (written only when not the default localhost). 

857 qdrant_api_key: Qdrant API key (written only when non-empty). 

858 collection_name: Collection name (written only when not "documents"). 

859 extra_vars: Additional environment variables from source-specific config. 

860 """ 

861 lines: list[str] = [f"OPENAI_API_KEY={_escape_env_value(openai_key)}"] 

862 

863 if qdrant_url and qdrant_url != "http://localhost:6333": 

864 lines.append(f"QDRANT_URL={_escape_env_value(qdrant_url)}") 

865 

866 if qdrant_api_key: 

867 lines.append(f"QDRANT_API_KEY={_escape_env_value(qdrant_api_key)}") 

868 

869 if collection_name and collection_name != "documents": 

870 lines.append(f"QDRANT_COLLECTION_NAME={_escape_env_value(collection_name)}") 

871 

872 if extra_vars: 

873 lines.append("") 

874 for key, value in extra_vars.items(): 

875 lines.append(f"{key}={_escape_env_value(value)}") 

876 

877 lines.append("") # trailing newline 

878 content = "\n".join(lines) 

879 

880 # Write with restrictive permissions from the start 

881 import os 

882 

883 try: 

884 fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) 

885 try: 

886 os.write(fd, content.encode("utf-8")) 

887 os.fsync(fd) 

888 finally: 

889 os.close(fd) 

890 except OSError: 

891 # Fallback for platforms that don't support os.open mode (e.g., Windows) 

892 path.write_text(content, encoding="utf-8") 

893 try: 

894 path.chmod(0o600) 

895 except OSError: 

896 pass 

897 

898 

899def _write_config_file_multi( 

900 path: Path, 

901 *, 

902 sources: dict[str, dict[str, Any]], 

903) -> None: 

904 """Write the ``config.yaml`` file using the simplified format. 

905 

906 Args: 

907 path: Destination path for ``config.yaml``. 

908 sources: Dict of source_type -> {source_name: source_config}. 

909 """ 

910 import yaml 

911 

912 config: dict[str, Any] = {"sources": sources} 

913 

914 with path.open("w", encoding="utf-8") as fh: 

915 fh.write("# Generated by qdrant-loader setup\n") 

916 fh.write("# Simplified configuration format\n") 

917 fh.write("# See config.template.yaml for the full multi-project format.\n\n") 

918 yaml.dump(config, fh, default_flow_style=False, sort_keys=False) 

919 

920 

921def _write_config_file_advanced( 

922 path: Path, 

923 *, 

924 global_config: dict[str, Any], 

925 projects: dict[str, dict[str, Any]], 

926) -> None: 

927 """Write ``config.yaml`` using the advanced multi-project format. 

928 

929 Args: 

930 path: Destination path for ``config.yaml``. 

931 global_config: Global configuration dict (qdrant, embedding, chunking). 

932 projects: Dict of project_id -> project config dict. 

933 """ 

934 import yaml 

935 

936 config: dict[str, Any] = { 

937 "global": global_config, 

938 "projects": projects, 

939 } 

940 

941 with path.open("w", encoding="utf-8") as fh: 

942 fh.write("# Generated by qdrant-loader setup (advanced mode)\n") 

943 fh.write("# Multi-project configuration format\n") 

944 fh.write("# See config.template.yaml for all available options.\n\n") 

945 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)