Coverage for src / qdrant_loader / cli / commands / setup_cmd.py: 88%

317 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-10 09:40 +0000

1"""Interactive setup wizard for qdrant-loader configuration.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6from typing import Any 

7 

8import click 

9 

10# Heavy modules (questionary ~1.4s, rich ~0.4s) are lazy-imported via helpers 

11# to keep CLI startup fast. 

12_console = None 

13 

14 

15def _get_console(): 

16 global _console 

17 if _console is None: 

18 from rich.console import Console 

19 

20 _console = Console() 

21 return _console 

22 

23 

24SOURCE_TYPES: dict[str, str] = { 

25 "git": "Git Repository", 

26 "confluence": "Confluence Wiki", 

27 "jira": "Jira Issues", 

28 "publicdocs": "Public Documentation (website)", 

29 "localfile": "Local Files", 

30} 

31 

32# Returned by every _collect_*_config helper: (yaml-ready dict, extra env vars dict) 

33_SourceResult = tuple[dict[str, Any], dict[str, str]] 

34 

35 

36SETUP_MODES: dict[str, str] = { 

37 "default": "Quick start with localfile source pointing to current directory", 

38 "normal": "Interactive wizard with simplified config format", 

39 "advanced": "Full control over global settings, multi-project format", 

40} 

41 

42 

43def run_setup(output_dir: Path | None = None, mode: str | None = None) -> None: 

44 """Entry point for the setup command. 

45 

46 When *output_dir* is ``None`` (or ``"."``) the user is prompted to choose a 

47 workspace folder. When *mode* is ``None`` a TUI mode selector is shown. 

48 

49 Args: 

50 output_dir: Directory in which the generated files are placed. 

51 If ``None``, the user is prompted interactively. 

52 mode: One of ``"default"``, ``"normal"``, ``"advanced"`` or ``None``. 

53 """ 

54 # ------------------------------------------------------------------ 

55 # Step 0: Mode selection (before workspace, so default can skip prompt) 

56 # ------------------------------------------------------------------ 

57 if mode is None: 

58 mode = _select_setup_mode() 

59 if mode is None: 

60 _get_console().print("[yellow]Setup cancelled.[/yellow]") 

61 return 

62 

63 # ------------------------------------------------------------------ 

64 # Step 1: Workspace folder 

65 # ------------------------------------------------------------------ 

66 if mode == "default": 

67 # Default mode always uses ./workspace, no prompt 

68 output_dir = _resolve_workspace( 

69 output_dir if output_dir is not None else Path("workspace") 

70 ) 

71 else: 

72 output_dir = _resolve_workspace(output_dir) 

73 

74 dispatch = { 

75 "default": run_setup_default, 

76 "normal": run_setup_wizard, 

77 "advanced": run_setup_advanced, 

78 } 

79 try: 

80 dispatch[mode](output_dir) 

81 except (click.Abort, KeyboardInterrupt): 

82 _get_console().print("\n[yellow]Setup cancelled.[/yellow]") 

83 

84 

85def _resolve_workspace(output_dir: Path | None) -> Path: 

86 """Resolve and prepare the workspace directory. 

87 

88 When *output_dir* is explicitly provided, uses it directly. 

89 Otherwise prompts the user with ``./workspace`` as the default. 

90 

91 Args: 

92 output_dir: The value passed via ``--output-dir``, or ``None``. 

93 

94 Returns: 

95 Resolved :class:`Path` to the workspace directory (created if needed). 

96 """ 

97 # If explicitly provided via --output-dir, use it as-is. 

98 if output_dir is not None: 

99 resolved = Path(output_dir).resolve() 

100 if resolved.exists() and not resolved.is_dir(): 

101 raise click.BadParameter( 

102 f"'{resolved}' exists but is not a directory.", 

103 param_hint="output_dir", 

104 ) 

105 resolved.mkdir(parents=True, exist_ok=True) 

106 return resolved 

107 

108 # Interactive: prompt with default ./workspace 

109 default_ws = "workspace" 

110 raw: str = click.prompt("Workspace folder", default=default_ws) 

111 chosen = raw.encode("utf-8", errors="ignore").decode("utf-8").strip() 

112 if not chosen: 

113 chosen = default_ws 

114 

115 ws_path = (Path.cwd() / chosen).resolve() 

116 if ws_path.exists() and not ws_path.is_dir(): 

117 raise click.BadParameter( 

118 f"'{ws_path}' exists but is not a directory.", 

119 param_hint="workspace", 

120 ) 

121 if not ws_path.exists(): 

122 ws_path.mkdir(parents=True, exist_ok=True) 

123 _get_console().print(f"[green]Created workspace: {ws_path}[/green]") 

124 else: 

125 _get_console().print(f"[cyan]Using workspace: {ws_path}[/cyan]") 

126 

127 return ws_path 

128 

129 

130def _select_setup_mode() -> str | None: 

131 """Present an interactive mode selector using questionary. 

132 

133 Returns: 

134 One of the keys in :data:`SETUP_MODES`, or ``None`` if the user cancels. 

135 """ 

136 import questionary 

137 from rich.panel import Panel 

138 

139 _get_console().print( 

140 Panel( 

141 "[bold]qdrant-loader Setup[/bold]\n" "Choose a setup mode to get started.", 

142 style="blue", 

143 ) 

144 ) 

145 

146 _CANCEL = "__cancel__" 

147 choices = [ 

148 questionary.Choice(title=f"{key.capitalize():<10} - {desc}", value=key) 

149 for key, desc in SETUP_MODES.items() 

150 ] 

151 choices.append(questionary.Choice(title="Cancel", value=_CANCEL)) 

152 

153 try: 

154 result = questionary.select( 

155 "Select setup mode:", 

156 choices=choices, 

157 default="default", 

158 ).ask() 

159 except (EOFError, KeyboardInterrupt): 

160 result = None 

161 

162 if result is None or result == _CANCEL: 

163 return None 

164 return result 

165 

166 

167def run_setup_default(output_dir: Path) -> None: 

168 """Generate a minimal default config with a localfile source pointing to the current directory. 

169 

170 No interactive prompts – just writes ``config.yaml`` and ``.env`` with sensible 

171 defaults so the user can immediately run ``qdrant-loader init && qdrant-loader ingest``. 

172 

173 Args: 

174 output_dir: Directory in which the generated files are placed. 

175 """ 

176 from rich.panel import Panel 

177 

178 output_dir = Path(output_dir).resolve() 

179 

180 config_path = output_dir / "config.yaml" 

181 env_path = output_dir / ".env" 

182 

183 # Show preview of what will be created/overwritten 

184 _show_file_preview(output_dir, config_path, env_path) 

185 

186 if not _confirm_overwrite(config_path, env_path): 

187 return 

188 

189 output_dir.mkdir(parents=True, exist_ok=True) 

190 

191 # Use workspace/docs as the localfile source directory 

192 docs_dir = output_dir / "docs" 

193 docs_dir.mkdir(parents=True, exist_ok=True) 

194 docs_path = docs_dir.as_uri() 

195 

196 sources: dict[str, dict[str, Any]] = { 

197 "localfile": { 

198 "my-docs": { 

199 "base_url": docs_path, 

200 "file_types": ["*.md", "*.txt", "*.py"], 

201 "enable_file_conversion": True, 

202 } 

203 } 

204 } 

205 

206 _write_env_file( 

207 env_path, 

208 openai_key="your_openai_api_key_here", 

209 qdrant_url="http://localhost:6333", 

210 qdrant_api_key="", 

211 collection_name="documents", 

212 ) 

213 _write_config_file_multi(config_path, sources=sources) 

214 

215 _get_console().print( 

216 Panel( 

217 f"[green]Created:[/green]\n" 

218 f" - {config_path}\n" 

219 f" - {env_path}\n" 

220 f" - {docs_dir}/ (place your documents here)\n\n" 

221 f"[bold]Next steps:[/bold]\n" 

222 f" 1. Set your OPENAI_API_KEY in {env_path}\n" 

223 f" 2. Place your documents in {docs_dir}/\n" 

224 f" 3. Run: qdrant-loader init --workspace {output_dir}\n" 

225 f" 4. Run: qdrant-loader ingest --workspace {output_dir}", 

226 title="Default Setup Complete", 

227 style="green", 

228 ) 

229 ) 

230 

231 

232def run_setup_wizard(output_dir: Path) -> None: 

233 """Run the interactive setup wizard (Normal mode). 

234 

235 Prompts the user for core settings and source-specific details, then writes 

236 a ``config.yaml`` and ``.env`` file to *output_dir*. 

237 

238 Args: 

239 output_dir: Directory in which the generated files are placed. 

240 """ 

241 from rich.panel import Panel 

242 

243 output_dir = Path(output_dir).resolve() 

244 

245 _get_console().print( 

246 Panel( 

247 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Normal mode)[/dim]\n" 

248 "Generate config.yaml and .env for your project.", 

249 style="blue", 

250 ) 

251 ) 

252 

253 # ------------------------------------------------------------------ 

254 # Step 1: Core settings 

255 # ------------------------------------------------------------------ 

256 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]") 

257 

258 openai_key: str = click.prompt("OpenAI API Key", hide_input=True) 

259 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333") 

260 qdrant_api_key: str = click.prompt( 

261 "Qdrant API Key (leave empty for local)", default="", hide_input=True 

262 ) 

263 collection_name: str = click.prompt("Collection name", default="documents") 

264 

265 # ------------------------------------------------------------------ 

266 # Step 2+3: Source type selection and config (loop for multiple) 

267 # ------------------------------------------------------------------ 

268 all_sources: dict[str, dict[str, Any]] = {} 

269 all_extra_env: dict[str, str] = {} 

270 

271 _collect_sources_loop(all_sources, all_extra_env, workspace_dir=output_dir) 

272 

273 # ------------------------------------------------------------------ 

274 # Step 4: Confirm output paths and write files 

275 # ------------------------------------------------------------------ 

276 config_path = output_dir / "config.yaml" 

277 env_path = output_dir / ".env" 

278 

279 _show_file_preview(output_dir, config_path, env_path) 

280 

281 if not _confirm_overwrite(config_path, env_path): 

282 return 

283 

284 output_dir.mkdir(parents=True, exist_ok=True) 

285 

286 _write_env_file( 

287 env_path, 

288 openai_key=openai_key, 

289 qdrant_url=qdrant_url, 

290 qdrant_api_key=qdrant_api_key, 

291 collection_name=collection_name, 

292 extra_vars=all_extra_env, 

293 ) 

294 _write_config_file_multi( 

295 config_path, 

296 sources=all_sources, 

297 ) 

298 

299 # Build source summary 

300 source_summary = ", ".join( 

301 f"{st}({len(names)})" for st, names in all_sources.items() 

302 ) 

303 

304 _get_console().print( 

305 Panel( 

306 f"[green]Created:[/green]\n" 

307 f" - {config_path}\n" 

308 f" - {env_path}\n" 

309 f" - Sources: {source_summary}\n\n" 

310 f"[bold]Next steps:[/bold]\n" 

311 f" 1. Review the generated files\n" 

312 f" 2. Run: qdrant-loader init --workspace {output_dir}\n" 

313 f" 3. Run: qdrant-loader ingest --workspace {output_dir}", 

314 title="Setup Complete", 

315 style="green", 

316 ) 

317 ) 

318 

319 

320def run_setup_advanced(output_dir: Path) -> None: 

321 """Run the advanced setup wizard with full global settings and multi-project format. 

322 

323 Args: 

324 output_dir: Directory in which the generated files are placed. 

325 """ 

326 from rich.panel import Panel 

327 

328 output_dir = Path(output_dir).resolve() 

329 

330 _get_console().print( 

331 Panel( 

332 "[bold]qdrant-loader Setup Wizard[/bold] [dim](Advanced mode)[/dim]\n" 

333 "Full control over global settings and multi-project configuration.", 

334 style="blue", 

335 ) 

336 ) 

337 

338 # ------------------------------------------------------------------ 

339 # Step 1: Core settings 

340 # ------------------------------------------------------------------ 

341 _get_console().print("\n[bold cyan]Step 1: Core Settings[/bold cyan]") 

342 

343 openai_key: str = click.prompt("OpenAI API Key", hide_input=True) 

344 qdrant_url: str = click.prompt("Qdrant URL", default="http://localhost:6333") 

345 qdrant_api_key: str = click.prompt( 

346 "Qdrant API Key (leave empty for local)", default="", hide_input=True 

347 ) 

348 collection_name: str = click.prompt("Collection name", default="documents") 

349 

350 # ------------------------------------------------------------------ 

351 # Step 2: Embedding settings 

352 # ------------------------------------------------------------------ 

353 _get_console().print("\n[bold cyan]Step 2: Embedding Configuration[/bold cyan]") 

354 

355 embedding_model: str = click.prompt( 

356 "Embedding model", default="argus-ai/pplx-embed-v1-0.6b:fp32" 

357 ) 

358 embedding_endpoint: str = click.prompt( 

359 "Embedding endpoint (Ollama local default)", 

360 default="http://localhost:11434/v1", 

361 ) 

362 vector_size: int = click.prompt("Vector size", default=1024, type=int) 

363 if vector_size <= 0: 

364 raise click.BadParameter("Vector size must be a positive integer.") 

365 

366 # ------------------------------------------------------------------ 

367 # Step 3: Chunking settings 

368 # ------------------------------------------------------------------ 

369 _get_console().print("\n[bold cyan]Step 3: Chunking Configuration[/bold cyan]") 

370 

371 chunk_size: int = click.prompt("Chunk size (characters)", default=1500, type=int) 

372 chunk_overlap: int = click.prompt( 

373 "Chunk overlap (characters)", default=200, type=int 

374 ) 

375 

376 # ------------------------------------------------------------------ 

377 # Step 4: Reranking settings 

378 # ------------------------------------------------------------------ 

379 _get_console().print("\n[bold cyan]Step 4: Reranking Configuration[/bold cyan]") 

380 

381 enable_reranking: bool = click.confirm( 

382 "Enable cross-encoder reranking?", default=True 

383 ) 

384 

385 # ------------------------------------------------------------------ 

386 # Step 5: Projects with sources 

387 # ------------------------------------------------------------------ 

388 projects: dict[str, dict[str, Any]] = {} 

389 all_extra_env: dict[str, str] = {} 

390 

391 while True: 

392 _get_console().print("\n[bold cyan]Step 5: Project Configuration[/bold cyan]") 

393 

394 while True: 

395 project_id: str = click.prompt("Project ID", default="my-project") 

396 if project_id in projects: 

397 _get_console().print( 

398 f"[red]Project '{project_id}' already exists. " 

399 f"Pick a different ID.[/red]" 

400 ) 

401 continue 

402 break 

403 display_name: str = click.prompt("Display name", default=project_id) 

404 description: str = click.prompt("Description", default="") 

405 

406 project_sources: dict[str, dict[str, Any]] = {} 

407 _collect_sources_loop(project_sources, all_extra_env, workspace_dir=output_dir) 

408 

409 projects[project_id] = { 

410 "project_id": project_id, 

411 "display_name": display_name, 

412 "description": description, 

413 "sources": project_sources, 

414 } 

415 

416 _get_console().print(f"[green]Added project: {project_id}[/green]") 

417 

418 if not click.confirm("Add another project?", default=False): 

419 break 

420 

421 # ------------------------------------------------------------------ 

422 # Step 6: Write files 

423 # ------------------------------------------------------------------ 

424 config_path = output_dir / "config.yaml" 

425 env_path = output_dir / ".env" 

426 

427 _show_file_preview(output_dir, config_path, env_path) 

428 

429 if not _confirm_overwrite(config_path, env_path): 

430 return 

431 

432 output_dir.mkdir(parents=True, exist_ok=True) 

433 

434 _write_env_file( 

435 env_path, 

436 openai_key=openai_key, 

437 qdrant_url=qdrant_url, 

438 qdrant_api_key=qdrant_api_key, 

439 collection_name=collection_name, 

440 extra_vars=all_extra_env, 

441 ) 

442 

443 # Build global config 

444 global_config: dict[str, Any] = { 

445 "qdrant": { 

446 "url": qdrant_url, 

447 "api_key": "${QDRANT_API_KEY}" if qdrant_api_key else None, 

448 "collection_name": collection_name, 

449 }, 

450 "embedding": { 

451 "model": embedding_model, 

452 "api_key": "${OPENAI_API_KEY}", 

453 "vector_size": vector_size, 

454 }, 

455 "chunking": { 

456 "chunk_size": chunk_size, 

457 "chunk_overlap": chunk_overlap, 

458 }, 

459 } 

460 

461 if embedding_endpoint: 

462 global_config["embedding"]["endpoint"] = embedding_endpoint 

463 

464 global_config["reranking"] = { 

465 "enabled": enable_reranking, 

466 } 

467 

468 _write_config_file_advanced( 

469 config_path, 

470 global_config=global_config, 

471 projects=projects, 

472 ) 

473 

474 project_summary = ", ".join( 

475 f"{pid}({sum(len(srcs) for srcs in p['sources'].values())} sources)" 

476 for pid, p in projects.items() 

477 ) 

478 

479 _get_console().print( 

480 Panel( 

481 f"[green]Created:[/green]\n" 

482 f" - {config_path}\n" 

483 f" - {env_path}\n" 

484 f" - Projects: {project_summary}\n\n" 

485 f"[bold]Next steps:[/bold]\n" 

486 f" 1. Review the generated files\n" 

487 f" 2. Run: qdrant-loader init --workspace {output_dir}\n" 

488 f" 3. Run: qdrant-loader ingest --workspace {output_dir}", 

489 title="Advanced Setup Complete", 

490 style="green", 

491 ) 

492 ) 

493 

494 

495# --------------------------------------------------------------------------- 

496# Shared helpers 

497# --------------------------------------------------------------------------- 

498 

499 

500def _show_file_preview(output_dir: Path, *paths: Path) -> None: 

501 """Display a summary panel showing the workspace and files that will be written. 

502 

503 Args: 

504 output_dir: The workspace directory. 

505 paths: File paths that will be created or overwritten. 

506 """ 

507 from rich.panel import Panel 

508 

509 lines = [f"[bold]Workspace:[/bold] {output_dir}"] 

510 for path in paths: 

511 status = ( 

512 "[yellow](overwrite)[/yellow]" if path.exists() else "[green](new)[/green]" 

513 ) 

514 lines.append(f" {path.name} {status}") 

515 

516 _get_console().print( 

517 Panel( 

518 "\n".join(lines), 

519 title="Files to write", 

520 style="cyan", 

521 ) 

522 ) 

523 

524 

525def _confirm_overwrite(*paths: Path) -> bool: 

526 """Ask the user to confirm before writing files. 

527 

528 Always prompts for confirmation. Warns specifically about existing files 

529 that will be overwritten. 

530 

531 Returns: 

532 ``True`` if it is safe to proceed, ``False`` if the user cancelled. 

533 """ 

534 existing = [p for p in paths if p.exists()] 

535 if existing: 

536 names = ", ".join(p.name for p in existing) 

537 if not click.confirm( 

538 f"{names} already exist(s) and will be overwritten. Proceed?" 

539 ): 

540 _get_console().print("[yellow]Setup cancelled.[/yellow]") 

541 return False 

542 else: 

543 if not click.confirm("Write files?", default=True): 

544 _get_console().print("[yellow]Setup cancelled.[/yellow]") 

545 return False 

546 return True 

547 

548 

549def _select_source_type() -> str | None: 

550 """Present an interactive source type selector with arrow-key navigation. 

551 

552 Returns: 

553 One of the keys in :data:`SOURCE_TYPES`, or ``None`` if the user 

554 selects Back / presses Escape. 

555 """ 

556 import questionary 

557 

558 choices = [ 

559 questionary.Choice(title=f"{key:<12} - {desc}", value=key) 

560 for key, desc in SOURCE_TYPES.items() 

561 ] 

562 

563 try: 

564 result = questionary.select( 

565 "Select source type:", 

566 choices=choices, 

567 default="localfile", 

568 ).ask() 

569 except (EOFError, KeyboardInterrupt): 

570 result = None 

571 

572 return result 

573 

574 

575def _collect_sources_loop( 

576 all_sources: dict[str, dict[str, Any]], 

577 all_extra_env: dict[str, str], 

578 workspace_dir: Path | None = None, 

579) -> None: 

580 """Interactively collect one or more data sources from the user. 

581 

582 Results are merged into *all_sources* and *all_extra_env* in-place. 

583 

584 Args: 

585 all_sources: Accumulated source configs (mutated in-place). 

586 all_extra_env: Accumulated extra env vars (mutated in-place). 

587 workspace_dir: Workspace directory, used to derive defaults (e.g. docs path). 

588 """ 

589 while True: 

590 _get_console().print("\n[bold cyan]Data Source[/bold cyan]") 

591 

592 source_type = _select_source_type() 

593 if source_type is None: 

594 break 

595 

596 _get_console().print( 

597 f"\n[bold cyan]Configure {SOURCE_TYPES[source_type]}[/bold cyan]" 

598 ) 

599 existing_names = all_sources.get(source_type, {}) 

600 # Collect suffixes from ALL already-registered env vars (across all 

601 # source types and projects) to prevent silent overwrites. 

602 existing_env_keys = set(all_extra_env.keys()) 

603 while True: 

604 source_name = click.prompt( 

605 "Source name (identifier)", default=f"my-{source_type}" 

606 ) 

607 suffix = _source_name_to_env_suffix(source_name) 

608 if source_name in existing_names: 

609 _get_console().print( 

610 f"[red]{source_type}/{source_name} already exists. " 

611 f"Pick a different name.[/red]" 

612 ) 

613 continue 

614 # Check if any env key with this suffix already exists 

615 if any(k.endswith(f"_{suffix}") for k in existing_env_keys): 

616 _get_console().print( 

617 f"[red]'{source_name}' collides with an existing " 

618 f"env var suffix across projects. " 

619 f"Pick a different name.[/red]" 

620 ) 

621 continue 

622 break 

623 source_config, extra_env = _collect_source_config( 

624 source_type, source_name, workspace_dir=workspace_dir 

625 ) 

626 

627 if source_type not in all_sources: 

628 all_sources[source_type] = {} 

629 all_sources[source_type][source_name] = source_config 

630 all_extra_env.update(extra_env) 

631 

632 _get_console().print(f"[green]Added {source_type}/{source_name}[/green]") 

633 

634 if not click.confirm("Add another source?", default=False): 

635 break 

636 

637 

638# --------------------------------------------------------------------------- 

639# Source-specific config collectors 

640# --------------------------------------------------------------------------- 

641 

642 

643def _source_name_to_env_suffix(source_name: str) -> str: 

644 """Convert a source name like 'my-repo' to an env-var-safe suffix like 'MY_REPO'.""" 

645 import re 

646 

647 suffix = re.sub(r"[^A-Za-z0-9]", "_", source_name).strip("_").upper() 

648 return suffix if suffix else "DEFAULT" 

649 

650 

651def _collect_source_config( 

652 source_type: str, 

653 source_name: str, 

654 *, 

655 workspace_dir: Path | None = None, 

656) -> _SourceResult: 

657 """Dispatch to the correct collector based on *source_type*. 

658 

659 Args: 

660 source_type: One of the keys in ``SOURCE_TYPES``. 

661 source_name: User-chosen identifier, used to create unique env var names. 

662 workspace_dir: Workspace directory, passed to collectors that need it. 

663 

664 Returns: 

665 A tuple of (source yaml dict, extra env-var dict). 

666 """ 

667 if source_type == "localfile": 

668 return _collect_localfile_config(source_name, workspace_dir=workspace_dir) 

669 

670 collectors = { 

671 "git": _collect_git_config, 

672 "confluence": _collect_confluence_config, 

673 "jira": _collect_jira_config, 

674 "publicdocs": _collect_publicdocs_config, 

675 } 

676 collector = collectors.get(source_type) 

677 if collector is None: 

678 return {}, {} 

679 return collector(source_name) 

680 

681 

682def _collect_git_config(source_name: str) -> _SourceResult: 

683 """Collect Git repository source configuration. 

684 

685 Returns: 

686 Tuple of (source config dict, extra env vars). 

687 """ 

688 url: str = click.prompt("Repository URL (e.g., https://github.com/org/repo.git)") 

689 branch: str = click.prompt("Branch", default="main") 

690 token: str = click.prompt( 

691 "Access token (leave empty for public repos)", default="", hide_input=True 

692 ) 

693 file_types_raw: str = click.prompt( 

694 "File types (comma-separated)", default="*.md,*.txt,*.py" 

695 ) 

696 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()] 

697 

698 config: dict[str, Any] = { 

699 "base_url": url, 

700 "branch": branch, 

701 "file_types": file_types, 

702 "enable_file_conversion": True, 

703 } 

704 extra_env: dict[str, str] = {} 

705 

706 if token: 

707 suffix = _source_name_to_env_suffix(source_name) 

708 env_key = f"GIT_TOKEN_{suffix}" 

709 config["token"] = f"${{{env_key}}}" 

710 extra_env[env_key] = token 

711 

712 return config, extra_env 

713 

714 

715def _collect_confluence_config(source_name: str) -> _SourceResult: 

716 """Collect Confluence source configuration. 

717 

718 Returns: 

719 Tuple of (source config dict, extra env vars). 

720 """ 

721 base_url: str = click.prompt( 

722 "Confluence URL (e.g., https://mycompany.atlassian.net/wiki)" 

723 ) 

724 space_key: str = click.prompt("Space key") 

725 email: str = click.prompt("Email") 

726 token: str = click.prompt("API token", hide_input=True) 

727 

728 suffix = _source_name_to_env_suffix(source_name) 

729 token_key = f"CONFLUENCE_TOKEN_{suffix}" 

730 email_key = f"CONFLUENCE_EMAIL_{suffix}" 

731 

732 config: dict[str, Any] = { 

733 "base_url": base_url, 

734 "space_key": space_key, 

735 "token": f"${{{token_key}}}", 

736 "email": f"${{{email_key}}}", 

737 "enable_file_conversion": True, 

738 } 

739 extra_env: dict[str, str] = { 

740 token_key: token, 

741 email_key: email, 

742 } 

743 return config, extra_env 

744 

745 

746def _collect_jira_config(source_name: str) -> _SourceResult: 

747 """Collect Jira source configuration. 

748 

749 Returns: 

750 Tuple of (source config dict, extra env vars). 

751 """ 

752 base_url: str = click.prompt("Jira URL (e.g., https://mycompany.atlassian.net)") 

753 project_key: str = click.prompt("Project key") 

754 email: str = click.prompt("Email") 

755 token: str = click.prompt("API token", hide_input=True) 

756 

757 suffix = _source_name_to_env_suffix(source_name) 

758 token_key = f"JIRA_TOKEN_{suffix}" 

759 email_key = f"JIRA_EMAIL_{suffix}" 

760 

761 config: dict[str, Any] = { 

762 "base_url": base_url, 

763 "project_key": project_key, 

764 "token": f"${{{token_key}}}", 

765 "email": f"${{{email_key}}}", 

766 "enable_file_conversion": True, 

767 } 

768 extra_env: dict[str, str] = { 

769 token_key: token, 

770 email_key: email, 

771 } 

772 return config, extra_env 

773 

774 

775def _collect_publicdocs_config(source_name: str) -> _SourceResult: 

776 """Collect Public Documentation source configuration. 

777 

778 Returns: 

779 Tuple of (source config dict, extra env vars). 

780 """ 

781 base_url: str = click.prompt("Documentation URL (e.g., https://docs.example.com/)") 

782 version: str = click.prompt("Version", default="latest") 

783 content_type: str = click.prompt( 

784 "Content type", 

785 default="html", 

786 type=click.Choice(["html", "markdown"]), 

787 ) 

788 

789 config: dict[str, Any] = { 

790 "base_url": base_url, 

791 "version": version, 

792 "content_type": content_type, 

793 } 

794 return config, {} 

795 

796 

797def _collect_localfile_config( 

798 source_name: str, *, workspace_dir: Path | None = None 

799) -> _SourceResult: 

800 """Collect Local Files source configuration. 

801 

802 Args: 

803 source_name: User-chosen identifier for this source. 

804 workspace_dir: Workspace directory. When provided, defaults to ``<workspace>/docs``. 

805 

806 Returns: 

807 Tuple of (source config dict, extra env vars). 

808 """ 

809 default_path = "" 

810 if workspace_dir is not None: 

811 default_path = str(workspace_dir / "docs") 

812 

813 raw_path: str = click.prompt( 

814 "Directory path (e.g., /path/to/files or file:///path)", 

815 default=default_path or None, 

816 ) 

817 if raw_path.startswith("file://"): 

818 path = raw_path 

819 else: 

820 path = Path(raw_path).expanduser().resolve().as_uri() 

821 

822 file_types_raw: str = click.prompt( 

823 "File types (comma-separated)", default="*.md,*.txt,*.py" 

824 ) 

825 file_types = [ft.strip() for ft in file_types_raw.split(",") if ft.strip()] 

826 

827 config: dict[str, Any] = { 

828 "base_url": path, 

829 "file_types": file_types, 

830 "enable_file_conversion": True, 

831 } 

832 return config, {} 

833 

834 

835# --------------------------------------------------------------------------- 

836# File writers 

837# --------------------------------------------------------------------------- 

838 

839 

840def _escape_env_value(value: str) -> str: 

841 """Escape a value for .env file if it contains special characters.""" 

842 if any(c in value for c in ("=", "\n", '"', "'", " ", "\t", "#", "$", "`")): 

843 return '"' + value.replace("\\", "\\\\").replace('"', '\\"') + '"' 

844 return value 

845 

846 

847def _write_env_file( 

848 path: Path, 

849 *, 

850 openai_key: str, 

851 qdrant_url: str, 

852 qdrant_api_key: str, 

853 collection_name: str, 

854 extra_vars: dict[str, str] | None = None, 

855) -> None: 

856 """Write the ``.env`` file. 

857 

858 Only non-default values are emitted so the file stays minimal. 

859 

860 Args: 

861 path: Destination path for the ``.env`` file. 

862 openai_key: OpenAI API key (always written). 

863 qdrant_url: Qdrant URL (written only when not the default localhost). 

864 qdrant_api_key: Qdrant API key (written only when non-empty). 

865 collection_name: Collection name (written only when not "documents"). 

866 extra_vars: Additional environment variables from source-specific config. 

867 """ 

868 lines: list[str] = [f"OPENAI_API_KEY={_escape_env_value(openai_key)}"] 

869 

870 if qdrant_url and qdrant_url != "http://localhost:6333": 

871 lines.append(f"QDRANT_URL={_escape_env_value(qdrant_url)}") 

872 

873 if qdrant_api_key: 

874 lines.append(f"QDRANT_API_KEY={_escape_env_value(qdrant_api_key)}") 

875 

876 if collection_name and collection_name != "documents": 

877 lines.append(f"QDRANT_COLLECTION_NAME={_escape_env_value(collection_name)}") 

878 

879 if extra_vars: 

880 lines.append("") 

881 for key, value in extra_vars.items(): 

882 lines.append(f"{key}={_escape_env_value(value)}") 

883 

884 lines.append("") # trailing newline 

885 content = "\n".join(lines) 

886 

887 # Write with restrictive permissions from the start 

888 import os 

889 

890 try: 

891 fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) 

892 try: 

893 os.write(fd, content.encode("utf-8")) 

894 os.fsync(fd) 

895 finally: 

896 os.close(fd) 

897 except OSError: 

898 # Fallback for platforms that don't support os.open mode (e.g., Windows) 

899 path.write_text(content, encoding="utf-8") 

900 try: 

901 path.chmod(0o600) 

902 except OSError: 

903 pass 

904 

905 

906def _write_config_file_multi( 

907 path: Path, 

908 *, 

909 sources: dict[str, dict[str, Any]], 

910) -> None: 

911 """Write the ``config.yaml`` file using the simplified format. 

912 

913 Args: 

914 path: Destination path for ``config.yaml``. 

915 sources: Dict of source_type -> {source_name: source_config}. 

916 """ 

917 import yaml 

918 

919 config: dict[str, Any] = {"sources": sources} 

920 

921 with path.open("w", encoding="utf-8") as fh: 

922 fh.write("# Generated by qdrant-loader setup\n") 

923 fh.write("# Simplified configuration format\n") 

924 fh.write("# See config.template.yaml for the full multi-project format.\n\n") 

925 yaml.dump(config, fh, default_flow_style=False, sort_keys=False) 

926 

927 

928def _write_config_file_advanced( 

929 path: Path, 

930 *, 

931 global_config: dict[str, Any], 

932 projects: dict[str, dict[str, Any]], 

933) -> None: 

934 """Write ``config.yaml`` using the advanced multi-project format. 

935 

936 Args: 

937 path: Destination path for ``config.yaml``. 

938 global_config: Global configuration dict (qdrant, embedding, chunking). 

939 projects: Dict of project_id -> project config dict. 

940 """ 

941 import yaml 

942 

943 config: dict[str, Any] = { 

944 "global": global_config, 

945 "projects": projects, 

946 } 

947 

948 with path.open("w", encoding="utf-8") as fh: 

949 fh.write("# Generated by qdrant-loader setup (advanced mode)\n") 

950 fh.write("# Multi-project configuration format\n") 

951 fh.write("# See config.template.yaml for all available options.\n\n") 

952 yaml.dump(config, fh, default_flow_style=False, sort_keys=False)