Coverage for website/build.py: 90%

494 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 05:45 +0000

1#!/usr/bin/env python3 

2""" 

3Website builder for QDrant Loader documentation site. 

4Uses templates with replaceable content to generate static HTML pages. 

5""" 

6 

7import os 

8import json 

9import shutil 

10from pathlib import Path 

11from typing import Dict, Any, Optional 

12import argparse 

13import re 

14 

15 

16class WebsiteBuilder: 

17 """Builds the QDrant Loader documentation website from templates.""" 

18 

19 def __init__( 

20 self, templates_dir: str = "website/templates", output_dir: str = "site" 

21 ): 

22 """Initialize the website builder.""" 

23 self.templates_dir = Path(templates_dir) 

24 self.output_dir = Path(output_dir) 

25 self.base_url = "" 

26 

27 def load_template(self, template_name: str) -> str: 

28 """Load a template file.""" 

29 template_path = self.templates_dir / template_name 

30 if not template_path.exists(): 

31 raise FileNotFoundError(f"Template not found: {template_path}") 

32 

33 with open(template_path, "r", encoding="utf-8") as f: 

34 return f.read() 

35 

36 def replace_placeholders(self, content: str, replacements: Dict[str, str]) -> str: 

37 """Replace placeholders in content with actual values.""" 

38 for placeholder, value in replacements.items(): 

39 content = content.replace(f"{ { {placeholder} } } ", str(value)) 

40 return content 

41 

42 def markdown_to_html( 

43 self, markdown_content: str, source_file: str = "", output_file: str = "" 

44 ) -> str: 

45 """Convert markdown to HTML with Bootstrap styling.""" 

46 try: 

47 import markdown 

48 from markdown.extensions import codehilite, toc, tables, fenced_code 

49 

50 md = markdown.Markdown( 

51 extensions=[ 

52 "codehilite", 

53 "toc", 

54 "tables", 

55 "fenced_code", 

56 "attr_list", 

57 "def_list", 

58 "footnotes", 

59 "md_in_html", 

60 ], 

61 extension_configs={ 

62 "codehilite": {"css_class": "highlight", "use_pygments": True}, 

63 "toc": { 

64 "permalink": False, # Disable the ¶ characters 

65 "permalink_class": "text-decoration-none", 

66 "permalink_title": "Link to this section", 

67 }, 

68 }, 

69 ) 

70 

71 html_content = md.convert(markdown_content) 

72 

73 # Add Bootstrap classes to common elements 

74 html_content = self.add_bootstrap_classes(html_content) 

75 

76 # Convert markdown links to HTML 

77 html_content = self.convert_markdown_links_to_html( 

78 html_content, source_file, output_file 

79 ) 

80 

81 return html_content 

82 

83 except ImportError: 

84 print("⚠️ Markdown library not available, falling back to basic conversion") 

85 return self.basic_markdown_to_html(markdown_content) 

86 

87 def convert_markdown_links_to_html( 

88 self, html_content: str, source_file: str = "", output_file: str = "" 

89 ) -> str: 

90 """Convert markdown file links to HTML file links in the content.""" 

91 import re 

92 from pathlib import Path 

93 

94 # Convert relative markdown links to HTML links 

95 # Pattern: href="./path/file.md" or href="path/file.md" 

96 html_content = re.sub( 

97 r'href="(\./)?([^"]*?)\.md"', r'href="\1\2.html"', html_content 

98 ) 

99 

100 # Convert absolute markdown links to HTML links 

101 # Pattern: href="/docs/file.md" 

102 html_content = re.sub(r'href="(/[^"]*?)\.md"', r'href="\1.html"', html_content) 

103 

104 # Convert LICENSE file links to LICENSE.html 

105 # Pattern: href="path/LICENSE" or href="./path/LICENSE" or href="../../LICENSE" 

106 html_content = re.sub( 

107 r'href="([^"]*?)LICENSE"', r'href="\1LICENSE.html"', html_content 

108 ) 

109 

110 # Fix relative paths when source and output are in different directories 

111 if source_file and output_file: 

112 source_path = Path(source_file) 

113 output_path = Path(output_file) 

114 

115 # Case 1: Main README.md moved to docs/README.html 

116 if ( 

117 source_path.name == "README.md" 

118 and str(source_path.parent) == "." 

119 and str(output_path.parent).startswith("docs") 

120 ): 

121 

122 # Fix links that start with ./docs/ - remove the ./docs/ part since we're already in docs/ 

123 html_content = re.sub(r'href="\./docs/', r'href="./', html_content) 

124 # Fix links that start with docs/ - make them relative 

125 html_content = re.sub(r'href="docs/', r'href="./', html_content) 

126 

127 # Case 2: Package README.md moved from packages/*/README.md to docs/packages/*/README.html 

128 elif ( 

129 source_path.name == "README.md" 

130 and str(source_path.parent).startswith("packages/") 

131 and str(output_path.parent).startswith("docs/packages/") 

132 ): 

133 # Calculate how many levels up we need to go to reach docs/ 

134 # From docs/packages/qdrant-loader/ we need to go up 2 levels to reach docs/ 

135 package_depth = ( 

136 len(output_path.parts) - 2 

137 ) # Subtract 2: one for filename, one for docs/ 

138 up_levels = "../" * package_depth 

139 

140 # Only rewrite relative links (not after href="http, href="https, or href="/) 

141 html_content = re.sub( 

142 r'(?<!href="http)(?<!href="https)(?<!href="/)href="((\.\./)*|\./)?docs/', 

143 f'href="{up_levels}', 

144 html_content, 

145 ) 

146 # Remove any accidental double slashes (but not after http: or https:) 

147 html_content = re.sub(r"(?<!http:)(?<!https:)//+", "/", html_content) 

148 # Remove any double docs/ that may have slipped through 

149 html_content = re.sub(r"docs/docs/", "docs/", html_content) 

150 

151 return html_content 

152 

153 def basic_markdown_to_html(self, markdown_content: str) -> str: 

154 """Basic markdown to HTML conversion without external dependencies.""" 

155 html = markdown_content 

156 

157 # Headers 

158 html = re.sub( 

159 r"^# (.*?)$", 

160 r'<h1 class="display-4 fw-bold text-primary mb-4">\1</h1>', 

161 html, 

162 flags=re.MULTILINE, 

163 ) 

164 html = re.sub( 

165 r"^## (.*?)$", 

166 r'<h2 class="h3 fw-bold text-primary mt-5 mb-3">\1</h2>', 

167 html, 

168 flags=re.MULTILINE, 

169 ) 

170 html = re.sub( 

171 r"^### (.*?)$", 

172 r'<h3 class="h4 fw-bold mt-4 mb-3">\1</h3>', 

173 html, 

174 flags=re.MULTILINE, 

175 ) 

176 html = re.sub( 

177 r"^#### (.*?)$", 

178 r'<h4 class="h5 fw-bold mt-3 mb-2">\1</h4>', 

179 html, 

180 flags=re.MULTILINE, 

181 ) 

182 

183 # Code blocks 

184 html = re.sub( 

185 r"```(\w+)?\n(.*?)\n```", 

186 r'<pre class="bg-dark text-light p-3 rounded"><code>\2</code></pre>', 

187 html, 

188 flags=re.DOTALL, 

189 ) 

190 html = re.sub( 

191 r"`([^`]+)`", 

192 r'<code class="bg-light text-dark px-2 py-1 rounded">\1</code>', 

193 html, 

194 ) 

195 

196 # Links 

197 html = re.sub( 

198 r"\[([^\]]+)\]\(([^)]+)\)", 

199 r'<a href="\2" class="text-decoration-none">\1</a>', 

200 html, 

201 ) 

202 

203 # Bold and italic 

204 html = re.sub(r"\*\*([^*]+)\*\*", r"<strong>\1</strong>", html) 

205 html = re.sub(r"\*([^*]+)\*", r"<em>\1</em>", html) 

206 

207 # Lists 

208 html = re.sub(r"^- (.*?)$", r"<li>\1</li>", html, flags=re.MULTILINE) 

209 html = re.sub( 

210 r"(<li>.*?</li>)", 

211 r'<ul class="list-group list-group-flush">\1</ul>', 

212 html, 

213 flags=re.DOTALL, 

214 ) 

215 

216 # Paragraphs 

217 lines = html.split("\n") 

218 processed_lines = [] 

219 in_list = False 

220 

221 for line in lines: 

222 line = line.strip() 

223 if not line: 

224 continue 

225 elif ( 

226 line.startswith("<h") 

227 or line.startswith("<pre") 

228 or line.startswith("<ul") 

229 or line.startswith("<li") 

230 ): 

231 processed_lines.append(line) 

232 else: 

233 processed_lines.append(f'<p class="mb-3">{line}</p>') 

234 

235 return "\n".join(processed_lines) 

236 

237 def add_bootstrap_classes(self, html_content: str) -> str: 

238 """Add Bootstrap classes to HTML elements.""" 

239 # Headers 

240 html_content = re.sub( 

241 r"<h1([^>]*)>", 

242 r'<h1\1 class="display-4 fw-bold text-primary mb-4">', 

243 html_content, 

244 ) 

245 html_content = re.sub( 

246 r"<h2([^>]*)>", 

247 r'<h2\1 class="h3 fw-bold text-primary mt-5 mb-3">', 

248 html_content, 

249 ) 

250 html_content = re.sub( 

251 r"<h3([^>]*)>", r'<h3\1 class="h4 fw-bold mt-4 mb-3">', html_content 

252 ) 

253 html_content = re.sub( 

254 r"<h4([^>]*)>", r'<h4\1 class="h5 fw-bold mt-3 mb-2">', html_content 

255 ) 

256 html_content = re.sub( 

257 r"<h5([^>]*)>", r'<h5\1 class="h6 fw-bold mt-3 mb-2">', html_content 

258 ) 

259 html_content = re.sub( 

260 r"<h6([^>]*)>", r'<h6\1 class="fw-bold mt-2 mb-2">', html_content 

261 ) 

262 

263 # Paragraphs 

264 html_content = re.sub(r"<p([^>]*)>", r'<p\1 class="mb-3">', html_content) 

265 

266 # Lists 

267 html_content = re.sub( 

268 r"<ul([^>]*)>", 

269 r'<ul\1 class="list-group list-group-flush mb-4">', 

270 html_content, 

271 ) 

272 html_content = re.sub( 

273 r"<ol([^>]*)>", 

274 r'<ol\1 class="list-group list-group-numbered mb-4">', 

275 html_content, 

276 ) 

277 html_content = re.sub( 

278 r"<li([^>]*)>", 

279 r'<li\1 class="list-group-item border-0 px-0">', 

280 html_content, 

281 ) 

282 

283 # Tables 

284 html_content = re.sub( 

285 r"<table([^>]*)>", 

286 r'<div class="table-responsive mb-4"><table\1 class="table table-striped table-hover">', 

287 html_content, 

288 ) 

289 html_content = re.sub(r"</table>", r"</table></div>", html_content) 

290 html_content = re.sub( 

291 r"<th([^>]*)>", r'<th\1 class="bg-primary text-white">', html_content 

292 ) 

293 

294 # Code blocks 

295 html_content = re.sub( 

296 r"<pre([^>]*)>", 

297 r'<pre\1 class="bg-dark text-light p-3 rounded mb-4">', 

298 html_content, 

299 ) 

300 html_content = re.sub( 

301 r"<code([^>]*)>", 

302 r'<code\1 class="bg-light text-dark px-2 py-1 rounded">', 

303 html_content, 

304 ) 

305 

306 # Links 

307 html_content = re.sub( 

308 r'<a([^>]*href="http[^"]*"[^>]*)>', 

309 r'<a\1 class="text-decoration-none" target="_blank">', 

310 html_content, 

311 ) 

312 html_content = re.sub( 

313 r'<a([^>]*href="(?!http)[^"]*"[^>]*)>', 

314 r'<a\1 class="text-decoration-none">', 

315 html_content, 

316 ) 

317 

318 # Blockquotes 

319 html_content = re.sub( 

320 r"<blockquote([^>]*)>", 

321 r'<blockquote\1 class="blockquote border-start border-primary border-4 ps-3 mb-4">', 

322 html_content, 

323 ) 

324 

325 return html_content 

326 

327 def extract_title_from_markdown(self, markdown_content: str) -> str: 

328 """Extract the first H1 title from markdown content.""" 

329 lines = markdown_content.split("\n") 

330 for line in lines: 

331 line = line.strip() 

332 if line.startswith("# "): 

333 return line[2:].strip() 

334 return "Documentation" 

335 

336 def build_page( 

337 self, 

338 template_name: str, 

339 content_template: str, 

340 page_title: str, 

341 page_description: str, 

342 output_file: str, 

343 additional_replacements: Optional[Dict[str, str]] = None, 

344 ) -> None: 

345 """Build a complete page using base template and content template.""" 

346 

347 # Load templates 

348 base_template = self.load_template("base.html") 

349 content = self.load_template(content_template) 

350 

351 # Calculate relative path to root based on output file location 

352 output_path = Path(output_file) 

353 depth = len(output_path.parts) - 1 # Number of directories deep 

354 

355 # Calculate correct base URL for this page depth 

356 if depth == 0: 

357 page_base_url = self.base_url 

358 else: 

359 page_base_url = ( 

360 "../" * depth + self.base_url if self.base_url else "../" * depth 

361 ) 

362 

363 # Calculate canonical URL 

364 if self.base_url.startswith("http"): 

365 canonical_url = self.base_url.rstrip("/") + "/docs/" 

366 else: 

367 canonical_url = "/docs/" 

368 

369 # Get version from project info if available 

370 version = "0.4.0b1" # Default version 

371 try: 

372 import tomli 

373 

374 with open("pyproject.toml", "rb") as f: 

375 pyproject = tomli.load(f) 

376 version = pyproject.get("project", {}).get("version", version) 

377 except: 

378 pass 

379 

380 # Prepare replacements 

381 replacements = { 

382 "page_title": page_title, 

383 "page_description": page_description, 

384 "content": content, 

385 "base_url": page_base_url, # Use calculated base URL 

386 "canonical_url": canonical_url, 

387 "version": version, 

388 "additional_head": "", 

389 "additional_scripts": "", 

390 } 

391 

392 # Add any additional replacements 

393 if additional_replacements: 

394 replacements.update(additional_replacements) 

395 

396 # Replace placeholders 

397 final_content = self.replace_placeholders(base_template, replacements) 

398 

399 # Write output file 

400 output_path = self.output_dir / output_file 

401 output_path.parent.mkdir(parents=True, exist_ok=True) 

402 

403 with open(output_path, "w", encoding="utf-8") as f: 

404 f.write(final_content) 

405 

406 print(f"✅ Built: {output_file}") 

407 

408 def build_markdown_page( 

409 self, 

410 markdown_file: str, 

411 output_file: str, 

412 page_title: Optional[str] = None, 

413 page_description: Optional[str] = None, 

414 breadcrumb: Optional[str] = None, 

415 ) -> None: 

416 """Build a page from a markdown file using the documentation template.""" 

417 

418 # Read markdown file 

419 markdown_path = Path(markdown_file) 

420 if not markdown_path.exists(): 

421 print(f"⚠️ Markdown file not found: {markdown_file}") 

422 return 

423 

424 with open(markdown_path, "r", encoding="utf-8") as f: 

425 markdown_content = f.read() 

426 

427 # Extract title if not provided 

428 if not page_title: 

429 page_title = self.extract_title_from_markdown(markdown_content) 

430 

431 # Generate description if not provided 

432 if not page_description: 

433 # Use first paragraph as description 

434 lines = markdown_content.split("\n") 

435 for line in lines: 

436 line = line.strip() 

437 if line and not line.startswith("#") and not line.startswith("```"): 

438 page_description = line[:150] + "..." if len(line) > 150 else line 

439 break 

440 if not page_description: 

441 page_description = f"Documentation for {page_title}" 

442 

443 # Convert markdown to HTML 

444 html_content = self.markdown_to_html( 

445 markdown_content, markdown_file, output_file 

446 ) 

447 

448 # Calculate relative paths based on output file location 

449 output_path = Path(output_file) 

450 depth = len(output_path.parts) - 1 # Number of directories deep 

451 

452 # Calculate relative path to root 

453 if depth == 0: 

454 home_url = self.base_url 

455 docs_url = f"{self.base_url}docs/" 

456 page_base_url = self.base_url 

457 else: 

458 home_url = "../" * depth + self.base_url if self.base_url else "../" * depth 

459 

460 # Calculate docs_url - always point to the docs index page 

461 if output_file.startswith("docs/"): 

462 # For pages in docs/ directory structure 

463 # Count how many levels deep we are from the docs/ directory 

464 path_parts = Path(output_file).parts 

465 docs_depth = ( 

466 len(path_parts) - 2 

467 ) # Subtract 2: one for filename, one for docs/ 

468 

469 if docs_depth <= 0: 

470 # Page is directly in docs/ directory (e.g., docs/README.html) 

471 # Link should go to docs/index.html (which is in the same directory) 

472 docs_url = "./" if not self.base_url else f"{self.base_url}docs/" 

473 else: 

474 # Page is in a subdirectory of docs/ (e.g., docs/users/README.html, docs/packages/mcp-server/README.html) 

475 # Link should go back to docs/index.html 

476 if not self.base_url: 

477 # For relative paths, go back the number of subdirectory levels to reach docs/ 

478 docs_url = "../" * docs_depth 

479 else: 

480 docs_url = f"{self.base_url}docs/" 

481 else: 

482 # For non-docs pages, calculate path to docs/ 

483 docs_url = f"{self.base_url}docs/" if self.base_url else "docs/" 

484 

485 page_base_url = ( 

486 "../" * depth + self.base_url if self.base_url else "../" * depth 

487 ) 

488 

489 # Create breadcrumb navigation 

490 breadcrumb_html = "" 

491 if breadcrumb: 

492 breadcrumb_html = f""" 

493 <nav aria-label="breadcrumb" class="mb-4"> 

494 <ol class="breadcrumb"> 

495 <li class="breadcrumb-item"> 

496 <a href="{home_url}" class="text-decoration-none"> 

497 <i class="bi bi-house me-1"></i>Home 

498 </a> 

499 </li> 

500 <li class="breadcrumb-item"> 

501 <a href="{docs_url}" class="text-decoration-none">Documentation</a> 

502 </li> 

503 <li class="breadcrumb-item active" aria-current="page">{breadcrumb}</li> 

504 </ol> 

505 </nav> 

506 """ 

507 

508 # Create the documentation content template 

509 doc_content = f""" 

510 <section class="py-5"> 

511 <div class="container"> 

512 <div class="row justify-content-center"> 

513 <div class="col-lg-10"> 

514 {breadcrumb_html} 

515 <div class="card border-0 shadow"> 

516 <div class="card-body p-5"> 

517 {html_content} 

518 </div> 

519 </div> 

520  

521 <!-- Navigation footer --> 

522 <div class="d-flex justify-content-between align-items-center mt-4"> 

523 <a href="{docs_url}" class="btn btn-outline-primary"> 

524 <i class="bi bi-arrow-left me-2"></i>Back to Documentation 

525 </a> 

526 <div class="text-muted small"> 

527 <i class="bi bi-file-text me-1"></i> 

528 Generated from {markdown_path.name} 

529 </div> 

530 </div> 

531 </div> 

532 </div> 

533 </div> 

534 </section> 

535 """ 

536 

537 # Build the page using base template 

538 base_template = self.load_template("base.html") 

539 

540 # Calculate canonical URL 

541 if self.base_url.startswith("http"): 

542 canonical_url = ( 

543 self.base_url.rstrip("/") + "/" + output_file.replace("index.html", "") 

544 ) 

545 else: 

546 # Relative URL or GitHub Pages - use relative path for local testing 

547 canonical_url = f"/{output_file.replace('index.html', '')}" 

548 

549 # Get version from project info if available 

550 version = "0.4.0b1" # Default version 

551 try: 

552 import tomli 

553 

554 with open("pyproject.toml", "rb") as f: 

555 pyproject = tomli.load(f) 

556 version = pyproject.get("project", {}).get("version", version) 

557 except: 

558 pass 

559 

560 replacements = { 

561 "page_title": page_title, 

562 "page_description": page_description, 

563 "content": doc_content, 

564 "base_url": page_base_url, 

565 "canonical_url": canonical_url, 

566 "version": version, 

567 "additional_head": """ 

568 <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css"> 

569 """, 

570 "additional_scripts": """ 

571 <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script> 

572 <script>hljs.highlightAll();</script> 

573 """, 

574 } 

575 

576 final_content = self.replace_placeholders(base_template, replacements) 

577 

578 # Write output file 

579 output_path = self.output_dir / output_file 

580 output_path.parent.mkdir(parents=True, exist_ok=True) 

581 

582 with open(output_path, "w", encoding="utf-8") as f: 

583 f.write(final_content) 

584 

585 print(f"📄 Built markdown page: {output_file}") 

586 

587 def copy_static_files(self, source_dirs: list) -> None: 

588 """Copy static files (docs, coverage, etc.) to output directory.""" 

589 for source_dir in source_dirs: 

590 source_path = Path(source_dir) 

591 if source_path.exists(): 

592 if source_path.is_dir(): 

593 dest_path = self.output_dir / source_path.name 

594 if dest_path.exists(): 

595 shutil.rmtree(dest_path) 

596 shutil.copytree(source_path, dest_path) 

597 print(f"📁 Copied directory: {source_dir} -> {dest_path}") 

598 else: 

599 dest_path = self.output_dir / source_path.name 

600 shutil.copy2(source_path, dest_path) 

601 print(f"📄 Copied file: {source_dir} -> {dest_path}") 

602 else: 

603 print(f"⚠️ Source not found: {source_dir}") 

604 

605 def generate_project_info( 

606 self, 

607 version: Optional[str] = None, 

608 commit_sha: Optional[str] = None, 

609 commit_date: Optional[str] = None, 

610 ) -> None: 

611 """Generate project information JSON file.""" 

612 import subprocess 

613 from datetime import datetime 

614 

615 # Get version from pyproject.toml if not provided 

616 if not version: 

617 try: 

618 import tomli 

619 

620 with open("pyproject.toml", "rb") as f: 

621 data = tomli.load(f) 

622 version = data["project"]["version"] 

623 except Exception: 

624 version = "unknown" 

625 

626 # Get git info if not provided 

627 if not commit_sha: 

628 try: 

629 commit_sha = ( 

630 subprocess.check_output(["git", "rev-parse", "HEAD"]) 

631 .decode() 

632 .strip() 

633 ) 

634 except Exception: 

635 commit_sha = "unknown" 

636 

637 if not commit_date: 

638 try: 

639 commit_date = ( 

640 subprocess.check_output( 

641 ["git", "log", "-1", "--format=%cd", "--date=iso"] 

642 ) 

643 .decode() 

644 .strip() 

645 ) 

646 except Exception: 

647 commit_date = datetime.now().isoformat() 

648 

649 project_info = { 

650 "name": "QDrant Loader", 

651 "version": version, 

652 "description": "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources", 

653 "commit": { 

654 "sha": commit_sha, 

655 "short": commit_sha[:7] if commit_sha != "unknown" else "unknown", 

656 "date": commit_date, 

657 }, 

658 "build": { 

659 "timestamp": datetime.now().isoformat(), 

660 "workflow_run_id": os.getenv("GITHUB_RUN_ID", "local"), 

661 }, 

662 } 

663 

664 project_info_path = self.output_dir / "project-info.json" 

665 with open(project_info_path, "w", encoding="utf-8") as f: 

666 json.dump(project_info, f, indent=2) 

667 

668 print(f"📊 Generated: project-info.json") 

669 

670 def build_license_page( 

671 self, 

672 license_file: str, 

673 output_file: str, 

674 page_title: str, 

675 page_description: str, 

676 ) -> None: 

677 """Build a page from a plain text LICENSE file.""" 

678 

679 # Read license file 

680 license_path = Path(license_file) 

681 if not license_path.exists(): 

682 print(f"⚠️ License file not found: {license_file}") 

683 return 

684 

685 with open(license_path, "r", encoding="utf-8") as f: 

686 license_content = f.read() 

687 

688 # Wrap license content in a code block for proper display 

689 html_content = f""" 

690 <div class="alert alert-info mb-4"> 

691 <h4 class="alert-heading"> 

692 <i class="bi bi-shield-check me-2"></i>License Information 

693 </h4> 

694 <p class="mb-0"> 

695 This project is licensed under the GNU General Public License v3.0.  

696 The full license text is provided below. 

697 </p> 

698 </div> 

699  

700 <div class="card border-0 shadow-sm"> 

701 <div class="card-body"> 

702 <pre class="bg-light p-4 rounded" style="white-space: pre-wrap; font-size: 0.9em; line-height: 1.4;">{license_content}</pre> 

703 </div> 

704 </div> 

705  

706 <div class="mt-4"> 

707 <p class="text-muted"> 

708 <i class="bi bi-info-circle me-1"></i> 

709 For more information about the GNU GPLv3 license, visit  

710 <a href="https://www.gnu.org/licenses/gpl-3.0.html" target="_blank" class="text-decoration-none"> 

711 https://www.gnu.org/licenses/gpl-3.0.html 

712 </a> 

713 </p> 

714 </div> 

715 """ 

716 

717 # Calculate relative paths based on output file location 

718 output_path = Path(output_file) 

719 depth = len(output_path.parts) - 1 # Number of directories deep 

720 

721 # Calculate relative path to root 

722 if depth == 0: 

723 home_url = self.base_url 

724 docs_url = f"{self.base_url}docs/" 

725 page_base_url = self.base_url 

726 else: 

727 home_url = "../" * depth + self.base_url if self.base_url else "../" * depth 

728 docs_url = f"{self.base_url}docs/" if self.base_url else "docs/" 

729 page_base_url = ( 

730 "../" * depth + self.base_url if self.base_url else "../" * depth 

731 ) 

732 

733 # Create the documentation content template 

734 doc_content = f""" 

735 <section class="py-5"> 

736 <div class="container"> 

737 <div class="row justify-content-center"> 

738 <div class="col-lg-10"> 

739 <nav aria-label="breadcrumb" class="mb-4"> 

740 <ol class="breadcrumb"> 

741 <li class="breadcrumb-item"> 

742 <a href="{home_url}" class="text-decoration-none"> 

743 <i class="bi bi-house me-1"></i>Home 

744 </a> 

745 </li> 

746 <li class="breadcrumb-item"> 

747 <a href="{docs_url}" class="text-decoration-none">Documentation</a> 

748 </li> 

749 <li class="breadcrumb-item active" aria-current="page">{page_title}</li> 

750 </ol> 

751 </nav> 

752  

753 <div class="mb-4"> 

754 <h1 class="display-5 fw-bold text-primary"> 

755 <i class="bi bi-shield-check me-3"></i>{page_title} 

756 </h1> 

757 <p class="lead text-muted">{page_description}</p> 

758 </div> 

759  

760 {html_content} 

761  

762 <!-- Navigation footer --> 

763 <div class="d-flex justify-content-between align-items-center mt-4"> 

764 <a href="{docs_url}" class="btn btn-outline-primary"> 

765 <i class="bi bi-arrow-left me-2"></i>Back to Documentation 

766 </a> 

767 <div class="text-muted small"> 

768 <i class="bi bi-file-text me-1"></i> 

769 Generated from {license_path.name} 

770 </div> 

771 </div> 

772 </div> 

773 </div> 

774 </div> 

775 </section> 

776 """ 

777 

778 # Build the page using base template 

779 base_template = self.load_template("base.html") 

780 

781 # Calculate canonical URL 

782 if self.base_url.startswith("http"): 

783 canonical_url = ( 

784 self.base_url.rstrip("/") + "/" + output_file.replace("index.html", "") 

785 ) 

786 else: 

787 canonical_url = f"/{output_file.replace('index.html', '')}" 

788 

789 # Get version from project info if available 

790 version = "0.4.0b1" # Default version 

791 try: 

792 import tomli 

793 

794 with open("pyproject.toml", "rb") as f: 

795 pyproject = tomli.load(f) 

796 version = pyproject.get("project", {}).get("version", version) 

797 except: 

798 pass 

799 

800 replacements = { 

801 "page_title": page_title, 

802 "page_description": page_description, 

803 "content": doc_content, 

804 "base_url": page_base_url, 

805 "canonical_url": canonical_url, 

806 "version": version, 

807 "additional_head": "", 

808 "additional_scripts": "", 

809 } 

810 

811 final_content = self.replace_placeholders(base_template, replacements) 

812 

813 # Write output file 

814 output_path = self.output_dir / output_file 

815 output_path.parent.mkdir(parents=True, exist_ok=True) 

816 

817 with open(output_path, "w", encoding="utf-8") as f: 

818 f.write(final_content) 

819 

820 print(f"📄 Built license page: {output_file}") 

821 

822 def build_docs_structure(self) -> None: 

823 """Build documentation structure by converting markdown files to HTML.""" 

824 docs_output = self.output_dir / "docs" 

825 docs_output.mkdir(parents=True, exist_ok=True) 

826 

827 # Main documentation files 

828 main_docs = [ 

829 ("README.md", "docs/README.html", "QDrant Loader", "Main documentation"), 

830 ( 

831 "RELEASE_NOTES.md", 

832 "docs/RELEASE_NOTES.html", 

833 "Release Notes", 

834 "Version history and changes", 

835 ), 

836 ( 

837 "CONTRIBUTING.md", 

838 "docs/CONTRIBUTING.html", 

839 "Contributing Guide", 

840 "How to contribute to the project", 

841 ), 

842 ( 

843 "LICENSE", 

844 "docs/LICENSE.html", 

845 "License", 

846 "GNU GPLv3 License", 

847 ), 

848 ] 

849 

850 for source, output, title, description in main_docs: 

851 if Path(source).exists(): 

852 # Special handling for LICENSE file (plain text) 

853 if source == "LICENSE": 

854 self.build_license_page(source, output, title, description) 

855 else: 

856 self.build_markdown_page(source, output, title, description, title) 

857 

858 # Documentation directory files 

859 if Path("docs").exists(): 

860 for md_file in Path("docs").rglob("*.md"): 

861 relative_path = md_file.relative_to("docs") 

862 output_path = f"docs/{relative_path.with_suffix('.html')}" 

863 breadcrumb = ( 

864 relative_path.stem.replace("-", " ").replace("_", " ").title() 

865 ) 

866 

867 self.build_markdown_page( 

868 str(md_file), output_path, breadcrumb=breadcrumb 

869 ) 

870 

871 # Package documentation 

872 package_docs = [ 

873 ( 

874 "packages/qdrant-loader/README.md", 

875 "docs/packages/qdrant-loader/README.html", 

876 "QDrant Loader Package", 

877 "Core package documentation", 

878 ), 

879 ( 

880 "packages/qdrant-loader-mcp-server/README.md", 

881 "docs/packages/mcp-server/README.html", 

882 "MCP Server Package", 

883 "Model Context Protocol server documentation", 

884 ), 

885 ] 

886 

887 for source, output, title, description in package_docs: 

888 if Path(source).exists(): 

889 self.build_markdown_page(source, output, title, description, title) 

890 

891 def build_coverage_structure( 

892 self, coverage_artifacts_dir: Optional[str] = None 

893 ) -> None: 

894 """Build coverage reports structure.""" 

895 coverage_output = self.output_dir / "coverage" 

896 coverage_output.mkdir(parents=True, exist_ok=True) 

897 

898 if coverage_artifacts_dir and Path(coverage_artifacts_dir).exists(): 

899 # Process coverage artifacts 

900 artifacts_path = Path(coverage_artifacts_dir) 

901 

902 # First, look for htmlcov-* directories directly in the artifacts directory 

903 # (for backward compatibility or local builds) 

904 for coverage_dir in artifacts_path.glob("htmlcov-*"): 

905 if coverage_dir.is_dir(): 

906 package_name = coverage_dir.name.replace("htmlcov-", "") 

907 dest_path = coverage_output / package_name 

908 

909 if dest_path.exists(): 

910 shutil.rmtree(dest_path) 

911 shutil.copytree(coverage_dir, dest_path) 

912 print(f"📊 Copied coverage: {package_name}") 

913 

914 # Then, look for coverage artifacts in subdirectories (GitHub Actions structure) 

915 # Pattern: coverage-artifacts/coverage-{package}-{run_id}/htmlcov-{package}/ 

916 for artifact_dir in artifacts_path.glob("coverage-*"): 

917 if artifact_dir.is_dir(): 

918 # Look for htmlcov-* directories inside this artifact directory 

919 for coverage_dir in artifact_dir.glob("htmlcov-*"): 

920 if coverage_dir.is_dir(): 

921 package_name = coverage_dir.name.replace("htmlcov-", "") 

922 dest_path = coverage_output / package_name 

923 

924 if dest_path.exists(): 

925 shutil.rmtree(dest_path) 

926 shutil.copytree(coverage_dir, dest_path) 

927 print( 

928 f"📊 Copied coverage: {package_name} (from {artifact_dir.name})" 

929 ) 

930 else: 

931 print("⚠️ No coverage artifacts found") 

932 

933 def copy_assets(self) -> None: 

934 """Copy assets directory to output, excluding Python files.""" 

935 assets_src = self.templates_dir.parent / "assets" 

936 assets_dest = self.output_dir / "assets" 

937 

938 if assets_src.exists(): 

939 if assets_dest.exists(): 

940 shutil.rmtree(assets_dest) 

941 

942 # Copy assets but exclude Python files 

943 def ignore_python_files(dir, files): 

944 return [f for f in files if f.endswith(".py")] 

945 

946 shutil.copytree(assets_src, assets_dest, ignore=ignore_python_files) 

947 print(f"📁 Copied assets to {assets_dest} (excluding Python files)") 

948 else: 

949 print("⚠️ Assets directory not found") 

950 

951 def generate_seo_files(self) -> None: 

952 """Generate sitemap.xml and robots.txt for SEO.""" 

953 from datetime import datetime 

954 

955 build_date = datetime.now().strftime("%Y-%m-%d") 

956 

957 # Generate dynamic sitemap.xml based on actual files 

958 self.generate_dynamic_sitemap(build_date) 

959 

960 # Generate robots.txt 

961 robots_template = self.load_template("robots.txt") 

962 robots_path = self.output_dir / "robots.txt" 

963 with open(robots_path, "w", encoding="utf-8") as f: 

964 f.write(robots_template) 

965 print("📄 Generated: robots.txt") 

966 

967 # Generate .nojekyll for GitHub Pages optimization 

968 nojekyll_path = self.output_dir / ".nojekyll" 

969 nojekyll_path.touch() 

970 print("📄 Generated: .nojekyll") 

971 

972 def generate_dynamic_sitemap(self, build_date: str) -> None: 

973 """Generate sitemap.xml dynamically based on actual HTML files.""" 

974 # Determine base URL for sitemap 

975 if self.base_url.startswith("http"): 

976 base_url = self.base_url.rstrip("/") 

977 else: 

978 # Use relative base URL for local testing 

979 base_url = "" 

980 

981 # Find all HTML files in the output directory 

982 html_files = list(self.output_dir.rglob("*.html")) 

983 

984 # Define URL priorities and change frequencies based on path patterns 

985 url_config = { 

986 # Main pages 

987 "index.html": {"priority": "1.0", "changefreq": "weekly"}, 

988 "docs/index.html": {"priority": "0.9", "changefreq": "weekly"}, 

989 "coverage/index.html": {"priority": "0.7", "changefreq": "daily"}, 

990 "privacy-policy.html": {"priority": "0.5", "changefreq": "monthly"}, 

991 # Documentation patterns 

992 "docs/README.html": {"priority": "0.8", "changefreq": "weekly"}, 

993 "docs/RELEASE_NOTES.html": {"priority": "0.6", "changefreq": "monthly"}, 

994 "docs/packages/": {"priority": "0.8", "changefreq": "weekly"}, 

995 "docs/getting-started/": {"priority": "0.8", "changefreq": "weekly"}, 

996 "docs/users/": {"priority": "0.7", "changefreq": "weekly"}, 

997 "docs/developers/": {"priority": "0.6", "changefreq": "monthly"}, 

998 } 

999 

1000 def get_url_config(file_path: str) -> dict: 

1001 """Get priority and changefreq for a given file path.""" 

1002 # Convert to relative path from output directory 

1003 rel_path = file_path.replace(str(self.output_dir) + "/", "") 

1004 

1005 # Check for exact matches first 

1006 if rel_path in url_config: 

1007 return url_config[rel_path] 

1008 

1009 # Check for pattern matches 

1010 for pattern, config in url_config.items(): 

1011 if pattern.endswith("/") and rel_path.startswith(pattern): 

1012 return config 

1013 

1014 # Default configuration 

1015 return {"priority": "0.5", "changefreq": "monthly"} 

1016 

1017 # Generate sitemap XML 

1018 sitemap_content = ['<?xml version="1.0" encoding="UTF-8"?>'] 

1019 sitemap_content.append( 

1020 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' 

1021 ) 

1022 

1023 # Sort files for consistent output 

1024 sorted_files = sorted(html_files, key=lambda x: str(x)) 

1025 

1026 for html_file in sorted_files: 

1027 # Get relative path from output directory 

1028 rel_path = html_file.relative_to(self.output_dir) 

1029 

1030 # Convert to URL path 

1031 url_path = str(rel_path).replace("\\", "/") # Handle Windows paths 

1032 

1033 # Get configuration for this URL 

1034 config = get_url_config(str(html_file)) 

1035 

1036 # Build full URL 

1037 if base_url: 

1038 full_url = f"{base_url}/{url_path}" 

1039 else: 

1040 # For local testing, use relative URLs 

1041 full_url = f"/{url_path}" 

1042 

1043 # Add URL entry to sitemap 

1044 sitemap_content.extend( 

1045 [ 

1046 " <url>", 

1047 f" <loc>{full_url}</loc>", 

1048 f" <lastmod>{build_date}</lastmod>", 

1049 f" <changefreq>{config['changefreq']}</changefreq>", 

1050 f" <priority>{config['priority']}</priority>", 

1051 " </url>", 

1052 ] 

1053 ) 

1054 

1055 sitemap_content.append("</urlset>") 

1056 

1057 # Write sitemap file 

1058 sitemap_path = self.output_dir / "sitemap.xml" 

1059 with open(sitemap_path, "w", encoding="utf-8") as f: 

1060 f.write("\n".join(sitemap_content)) 

1061 

1062 print(f"📄 Generated: sitemap.xml ({len(sorted_files)} URLs)") 

1063 

1064 def generate_dynamic_docs_index(self) -> str: 

1065 """Generate dynamic documentation index content based on existing files.""" 

1066 

1067 # Scan for existing documentation files 

1068 docs_structure = { 

1069 "getting_started": [], 

1070 "user_guides": [], 

1071 "developer_docs": [], 

1072 "packages": [], 

1073 "release_info": [], 

1074 } 

1075 

1076 # Check for main documentation files 

1077 main_docs = [ 

1078 ("README.md", "docs/README.html", "Main README", "Essential", "primary"), 

1079 ( 

1080 "RELEASE_NOTES.md", 

1081 "docs/RELEASE_NOTES.html", 

1082 "Release Notes", 

1083 "Updates", 

1084 "secondary", 

1085 ), 

1086 ( 

1087 "CONTRIBUTING.md", 

1088 "docs/CONTRIBUTING.html", 

1089 "Contributing", 

1090 "Community", 

1091 "dark", 

1092 ), 

1093 ( 

1094 "LICENSE", 

1095 "docs/LICENSE.html", 

1096 "License", 

1097 "Legal", 

1098 "warning", 

1099 ), 

1100 ] 

1101 

1102 for source_file, html_path, title, badge_text, badge_color in main_docs: 

1103 if Path(source_file).exists(): 

1104 docs_structure["release_info"].append( 

1105 { 

1106 "url": html_path.replace("docs/", ""), 

1107 "title": title, 

1108 "icon": ( 

1109 "bi-file-text" 

1110 if "README" in title 

1111 else ( 

1112 "bi-clock-history" 

1113 if "Release" in title 

1114 else ( 

1115 "bi-people" 

1116 if "Contributing" in title 

1117 else "bi-shield-check" 

1118 ) 

1119 ) 

1120 ), 

1121 "badge": badge_text, 

1122 "badge_color": badge_color, 

1123 } 

1124 ) 

1125 

1126 # Check for package documentation 

1127 package_docs = [ 

1128 ( 

1129 "packages/qdrant-loader/README.md", 

1130 "packages/qdrant-loader/README.html", 

1131 "QDrant Loader", 

1132 "Core", 

1133 "info", 

1134 ), 

1135 ( 

1136 "packages/qdrant-loader-mcp-server/README.md", 

1137 "packages/mcp-server/README.html", 

1138 "MCP Server", 

1139 "Integration", 

1140 "info", 

1141 ), 

1142 ] 

1143 

1144 for source_file, html_path, title, badge_text, badge_color in package_docs: 

1145 if Path(source_file).exists(): 

1146 docs_structure["packages"].append( 

1147 { 

1148 "url": html_path, 

1149 "title": title, 

1150 "icon": "bi-arrow-repeat" if "Loader" in title else "bi-plug", 

1151 "badge": badge_text, 

1152 "badge_color": badge_color, 

1153 } 

1154 ) 

1155 

1156 # Scan docs directory structure 

1157 if Path("docs").exists(): 

1158 # Getting started guides 

1159 getting_started_path = Path("docs/getting-started") 

1160 if getting_started_path.exists(): 

1161 for md_file in getting_started_path.glob("*.md"): 

1162 if ( 

1163 md_file.name != "README.md" 

1164 ): # Skip README as it's handled separately 

1165 title = md_file.stem.replace("-", " ").replace("_", " ").title() 

1166 docs_structure["getting_started"].append( 

1167 { 

1168 "url": f"getting-started/{md_file.stem}.html", 

1169 "title": title, 

1170 "icon": "bi-play-circle", 

1171 "badge": "Guide", 

1172 "badge_color": "primary", 

1173 } 

1174 ) 

1175 

1176 # User guides 

1177 users_path = Path("docs/users") 

1178 if users_path.exists(): 

1179 # Main user sections 

1180 user_sections = [ 

1181 ("configuration", "Configuration", "bi-gear"), 

1182 ("detailed-guides", "Detailed Guides", "bi-book"), 

1183 ("cli-reference", "CLI Reference", "bi-terminal"), 

1184 ("workflows", "Workflows", "bi-diagram-3"), 

1185 ("troubleshooting", "Troubleshooting", "bi-question-circle"), 

1186 ] 

1187 

1188 for section_dir, section_title, icon in user_sections: 

1189 section_path = users_path / section_dir 

1190 if section_path.exists() and any(section_path.glob("*.md")): 

1191 docs_structure["user_guides"].append( 

1192 { 

1193 "url": f"users/{section_dir}/", 

1194 "title": section_title, 

1195 "icon": icon, 

1196 "badge": "Users", 

1197 "badge_color": "info", 

1198 } 

1199 ) 

1200 

1201 # Developer documentation 

1202 developers_path = Path("docs/developers") 

1203 if developers_path.exists(): 

1204 dev_sections = [ 

1205 ("architecture", "Architecture", "bi-diagram-2"), 

1206 ("testing", "Testing", "bi-check-circle"), 

1207 ("deployment", "Deployment", "bi-cloud-upload"), 

1208 ("extending", "Extending", "bi-puzzle"), 

1209 ("documentation", "Documentation", "bi-file-text"), 

1210 ] 

1211 

1212 for section_dir, section_title, icon in dev_sections: 

1213 section_path = developers_path / section_dir 

1214 if section_path.exists() and any(section_path.glob("*.md")): 

1215 docs_structure["developer_docs"].append( 

1216 { 

1217 "url": f"developers/{section_dir}/", 

1218 "title": section_title, 

1219 "icon": icon, 

1220 "badge": "Dev", 

1221 "badge_color": "dark", 

1222 } 

1223 ) 

1224 

1225 # Generate HTML content 

1226 html_content = self._generate_docs_cards_html(docs_structure) 

1227 return html_content 

1228 

1229 def _generate_docs_cards_html(self, docs_structure: dict) -> str: 

1230 """Generate HTML cards for documentation sections.""" 

1231 

1232 def generate_card(title, color, icon, items, card_id=""): 

1233 if not items: 

1234 return "" 

1235 

1236 items_html = "" 

1237 for item in items: 

1238 items_html += f""" 

1239 <li class="list-group-item d-flex justify-content-between align-items-center"> 

1240 <a href="{item['url']}" class="text-decoration-none"> 

1241 <i class="{item['icon']} me-2 text-{color}"></i>{item['title']} 

1242 </a> 

1243 <span class="badge bg-{item['badge_color']} rounded-pill">{item['badge']}</span> 

1244 </li>""" 

1245 

1246 return f""" 

1247 <div class="col-lg-6"> 

1248 <div class="card h-100 border-0 shadow"> 

1249 <div class="card-header bg-{color} text-white"> 

1250 <h4 class="mb-0"> 

1251 <i class="{icon} me-2"></i>{title} 

1252 </h4> 

1253 </div> 

1254 <div class="card-body"> 

1255 <ul class="list-group list-group-flush">{items_html} 

1256 </ul> 

1257 </div> 

1258 </div> 

1259 </div>""" 

1260 

1261 # Generate cards for each section 

1262 cards_html = "" 

1263 

1264 # Getting Started (combine main docs and getting started guides) 

1265 getting_started_items = ( 

1266 docs_structure["release_info"] + docs_structure["getting_started"] 

1267 ) 

1268 cards_html += generate_card( 

1269 "Getting Started", "primary", "bi-play-circle", getting_started_items 

1270 ) 

1271 

1272 # Packages 

1273 if docs_structure["packages"]: 

1274 cards_html += generate_card( 

1275 "Packages", "info", "bi-box", docs_structure["packages"] 

1276 ) 

1277 

1278 # User Guides 

1279 if docs_structure["user_guides"]: 

1280 cards_html += generate_card( 

1281 "User Guides", 

1282 "success", 

1283 "bi-person-check", 

1284 docs_structure["user_guides"], 

1285 ) 

1286 

1287 # Developer Documentation 

1288 if docs_structure["developer_docs"]: 

1289 cards_html += generate_card( 

1290 "Development", "dark", "bi-code-slash", docs_structure["developer_docs"] 

1291 ) 

1292 

1293 # Wrap in the main structure 

1294 full_html = f""" 

1295<!-- Documentation Header --> 

1296<section class="py-5 bg-light"> 

1297 <div class="container"> 

1298 <div class="row justify-content-center text-center"> 

1299 <div class="col-lg-8"> 

1300 <h1 class="display-4 fw-bold text-primary"> 

1301 <i class="bi bi-book me-3"></i>Documentation 

1302 </h1> 

1303 <p class="lead text-muted"> 

1304 Comprehensive documentation for QDrant Loader and MCP Server 

1305 </p> 

1306 </div> 

1307 </div> 

1308 </div> 

1309</section> 

1310 

1311<!-- Documentation Grid --> 

1312<section class="py-5"> 

1313 <div class="container"> 

1314 <div class="row g-4"> 

1315{cards_html} 

1316 </div> 

1317 </div> 

1318</section> 

1319 

1320<!-- Quick Actions --> 

1321<section class="py-5 bg-light"> 

1322 <div class="container"> 

1323 <div class="row justify-content-center"> 

1324 <div class="col-lg-8 text-center"> 

1325 <h3 class="mb-4">Quick Actions</h3> 

1326 <div class="d-flex justify-content-center gap-3 flex-wrap"> 

1327 <a href="../coverage/" class="btn btn-outline-primary"> 

1328 <i class="bi bi-graph-up me-2"></i>View Coverage Reports 

1329 </a> 

1330 <a href="https://github.com/martin-papy/qdrant-loader" class="btn btn-outline-secondary" 

1331 target="_blank"> 

1332 <i class="bi bi-github me-2"></i>GitHub Repository 

1333 </a> 

1334 <a href="https://pypi.org/project/qdrant-loader/" class="btn btn-outline-info" target="_blank"> 

1335 <i class="bi bi-box me-2"></i>QDrant Loader PyPI 

1336 </a> 

1337 <a href="https://pypi.org/project/qdrant-loader-mcp-server/" class="btn btn-outline-info" 

1338 target="_blank"> 

1339 <i class="bi bi-plug me-2"></i>MCP Server PyPI 

1340 </a> 

1341 </div> 

1342 </div> 

1343 </div> 

1344 </div> 

1345</section>""" 

1346 

1347 return full_html 

1348 

1349 def build_site( 

1350 self, 

1351 coverage_artifacts_dir: Optional[str] = None, 

1352 test_results_dir: Optional[str] = None, 

1353 ) -> None: 

1354 """Build the complete website.""" 

1355 print("🏗️ Building QDrant Loader website...") 

1356 

1357 # Create output directory 

1358 self.output_dir.mkdir(parents=True, exist_ok=True) 

1359 

1360 # Copy assets first 

1361 self.copy_assets() 

1362 

1363 # Generate project info 

1364 self.generate_project_info() 

1365 

1366 # Build main pages 

1367 self.build_page( 

1368 "base.html", 

1369 "index.html", 

1370 "Home", 

1371 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.", 

1372 "index.html", 

1373 ) 

1374 

1375 # Build dynamic documentation index 

1376 dynamic_docs_content = self.generate_dynamic_docs_index() 

1377 

1378 # Build docs index page with dynamic content 

1379 base_template = self.load_template("base.html") 

1380 

1381 # Calculate canonical URL 

1382 if self.base_url.startswith("http"): 

1383 canonical_url = self.base_url.rstrip("/") + "/docs/" 

1384 else: 

1385 canonical_url = "/docs/" 

1386 

1387 # Get version from project info if available 

1388 version = "0.4.0b1" # Default version 

1389 try: 

1390 import tomli 

1391 

1392 with open("pyproject.toml", "rb") as f: 

1393 pyproject = tomli.load(f) 

1394 version = pyproject.get("project", {}).get("version", version) 

1395 except: 

1396 pass 

1397 

1398 # Set base_url to '../' for docs/index.html (one level deep) 

1399 docs_index_base_url = "../" if not self.base_url else self.base_url 

1400 

1401 replacements = { 

1402 "page_title": "Documentation", 

1403 "page_description": "Comprehensive documentation for QDrant Loader - learn how to load data into Qdrant vector database from various sources.", 

1404 "content": dynamic_docs_content, 

1405 "base_url": docs_index_base_url, 

1406 "canonical_url": canonical_url, 

1407 "version": version, 

1408 "additional_head": "", 

1409 "additional_scripts": "", 

1410 } 

1411 

1412 final_content = self.replace_placeholders(base_template, replacements) 

1413 

1414 # Write docs index file 

1415 docs_index_path = self.output_dir / "docs" / "index.html" 

1416 docs_index_path.parent.mkdir(parents=True, exist_ok=True) 

1417 

1418 with open(docs_index_path, "w", encoding="utf-8") as f: 

1419 f.write(final_content) 

1420 

1421 print("✅ Built: docs/index.html (dynamic)") 

1422 

1423 self.build_page( 

1424 "base.html", 

1425 "coverage-index.html", 

1426 "Test Coverage", 

1427 "Test coverage analysis and reports for QDrant Loader packages - ensuring code quality and reliability.", 

1428 "coverage/index.html", 

1429 ) 

1430 

1431 # Build privacy policy page 

1432 from datetime import datetime 

1433 

1434 last_updated = datetime.now().strftime("%B %d, %Y") 

1435 

1436 self.build_page( 

1437 "base.html", 

1438 "privacy-policy.html", 

1439 "Privacy Policy", 

1440 "Privacy policy for QDrant Loader website - learn how we collect, use, and protect your information when you visit our documentation and use our services.", 

1441 "privacy-policy.html", 

1442 additional_replacements={"last_updated": last_updated}, 

1443 ) 

1444 

1445 # Build documentation structure (converts MD to HTML) 

1446 self.build_docs_structure() 

1447 

1448 # Generate directory index pages to prevent directory listings 

1449 self.generate_directory_indexes() 

1450 

1451 # Build coverage structure 

1452 self.build_coverage_structure(coverage_artifacts_dir) 

1453 

1454 # Copy test results if available 

1455 if test_results_dir and Path(test_results_dir).exists(): 

1456 dest_path = self.output_dir / "test-results" 

1457 if dest_path.exists(): 

1458 shutil.rmtree(dest_path) 

1459 shutil.copytree(test_results_dir, dest_path) 

1460 print(f"📊 Copied: test results") 

1461 

1462 # Generate SEO files after all pages are built 

1463 self.generate_seo_files() 

1464 

1465 print(f"✅ Website built successfully in {self.output_dir}") 

1466 print(f"📊 Generated {len(list(self.output_dir.rglob('*.html')))} HTML pages") 

1467 print(f"📁 Total files: {len(list(self.output_dir.rglob('*')))}") 

1468 

1469 def generate_directory_indexes(self) -> None: 

1470 """Generate index.html files from README.html files to prevent directory listings.""" 

1471 

1472 # Find all README.html files in the docs directory 

1473 docs_path = self.output_dir / "docs" 

1474 if not docs_path.exists(): 

1475 return 

1476 

1477 readme_files = list(docs_path.rglob("README.html")) 

1478 

1479 for readme_file in readme_files: 

1480 # Skip the main docs/README.html since docs/index.html is custom-built 

1481 if readme_file.parent == docs_path: 

1482 print(f"⏭️ Skipping main docs/README.html (custom index exists)") 

1483 continue 

1484 

1485 # Create index.html in the same directory as README.html 

1486 index_file = readme_file.parent / "index.html" 

1487 

1488 # Copy README.html content to index.html 

1489 try: 

1490 with open(readme_file, "r", encoding="utf-8") as f: 

1491 content = f.read() 

1492 

1493 with open(index_file, "w", encoding="utf-8") as f: 

1494 f.write(content) 

1495 

1496 print( 

1497 f"📄 Generated index.html: {index_file.relative_to(self.output_dir)}" 

1498 ) 

1499 

1500 except Exception as e: 

1501 print(f"⚠️ Failed to generate index for {readme_file}: {e}") 

1502 

1503 

1504def main(): 

1505 """Main entry point.""" 

1506 parser = argparse.ArgumentParser( 

1507 description="Build QDrant Loader documentation website" 

1508 ) 

1509 parser.add_argument("--output", "-o", default="site", help="Output directory") 

1510 parser.add_argument( 

1511 "--templates", "-t", default="website/templates", help="Templates directory" 

1512 ) 

1513 parser.add_argument("--coverage-artifacts", help="Coverage artifacts directory") 

1514 parser.add_argument("--test-results", help="Test results directory") 

1515 parser.add_argument("--base-url", default="", help="Base URL for the website") 

1516 

1517 args = parser.parse_args() 

1518 

1519 builder = WebsiteBuilder(args.templates, args.output) 

1520 builder.base_url = args.base_url 

1521 

1522 try: 

1523 builder.build_site(args.coverage_artifacts, args.test_results) 

1524 except Exception as e: 

1525 print(f"❌ Build failed: {e}") 

1526 return 1 

1527 

1528 return 0 

1529 

1530 

1531if __name__ == "__main__": 

1532 exit(main())