Coverage for website / builder / core.py: 89%

407 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-06-11 09:34 +0000

1""" 

2Core Website Builder - Main Orchestration and Lifecycle Management. 

3 

4This module implements the main WebsiteBuilder class that orchestrates 

5all build operations and manages the overall build lifecycle. 

6""" 

7 

8import json 

9import re 

10import subprocess 

11from datetime import UTC 

12from pathlib import Path 

13 

14from .assets import AssetManager 

15from .markdown import MarkdownProcessor 

16from .templates import TemplateProcessor 

17 

18 

19class WebsiteBuilder: 

20 """Builds the QDrant Loader documentation website from templates.""" 

21 

22 def __init__( 

23 self, templates_dir: str = "website/templates", output_dir: str = "site" 

24 ): 

25 """Initialize the website builder.""" 

26 self.templates_dir = Path(templates_dir) 

27 self.output_dir = Path(output_dir) 

28 self.base_url = "" 

29 # Cached docs navigation data (built once per run) 

30 self.docs_nav_data: dict | None = None 

31 

32 # Initialize component processors 

33 self.template_processor = TemplateProcessor(templates_dir) 

34 self.markdown_processor = MarkdownProcessor() 

35 self.asset_manager = AssetManager(output_dir) 

36 

37 # Delegate core operations to specialized processors 

38 def load_template(self, template_name: str) -> str: 

39 """Load a template file.""" 

40 return self.template_processor.load_template(template_name) 

41 

42 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str: 

43 """Replace placeholders in content with actual values.""" 

44 return self.template_processor.replace_placeholders(content, replacements) 

45 

46 def markdown_to_html( 

47 self, markdown_content: str, source_file: str = "", output_file: str = "" 

48 ) -> str: 

49 """Convert markdown to HTML with Bootstrap styling.""" 

50 return self.markdown_processor.markdown_to_html( 

51 markdown_content, source_file, output_file 

52 ) 

53 

54 def copy_assets(self) -> None: 

55 """Copy all website assets to output directory.""" 

56 return self.asset_manager.copy_assets() 

57 

58 def extract_title_from_markdown(self, markdown_content: str) -> str: 

59 """Extract title from markdown content.""" 

60 return self.markdown_processor.extract_title_from_markdown(markdown_content) 

61 

62 # Additional markdown processing methods 

63 def basic_markdown_to_html(self, markdown_content: str) -> str: 

64 """Basic markdown to HTML conversion.""" 

65 return self.markdown_processor.basic_markdown_to_html(markdown_content) 

66 

67 def convert_markdown_links_to_html( 

68 self, markdown_content: str, source_file: str = "", target_dir: str = "" 

69 ) -> str: 

70 """Convert markdown links to HTML format.""" 

71 return self.markdown_processor.convert_markdown_links_to_html( 

72 markdown_content, source_file, target_dir 

73 ) 

74 

75 def add_bootstrap_classes(self, html_content: str) -> str: 

76 """Add Bootstrap classes to HTML elements.""" 

77 return self.markdown_processor.add_bootstrap_classes(html_content) 

78 

79 def render_toc(self, html_content: str) -> str: 

80 """Generate table of contents from HTML headings.""" 

81 return self.markdown_processor.render_toc(html_content) 

82 

83 # Additional asset management methods 

84 def copy_static_files(self, static_files: list[str]) -> None: 

85 """Copy multiple static files.""" 

86 return self.asset_manager.copy_static_files(static_files) 

87 

88 def get_git_timestamp(self, source_path: str) -> str: 

89 """Get the last modified timestamp from Git.""" 

90 try: 

91 result = subprocess.run( 

92 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path], 

93 capture_output=True, 

94 text=True, 

95 cwd=".", 

96 ) 

97 if result.returncode == 0 and result.stdout.strip(): 

98 return result.stdout.strip() 

99 except (subprocess.CalledProcessError, FileNotFoundError): 

100 pass 

101 return "" 

102 

103 def _humanize_title(self, name: str) -> str: 

104 """Convert filename to human-readable title.""" 

105 # Remove file extension and common prefixes 

106 title = ( 

107 name.replace(".md", "") 

108 .replace("README", "") 

109 .replace("_", " ") 

110 .replace("-", " ") 

111 ) 

112 

113 # Handle common patterns 

114 title_mappings = { 

115 "cli reference": "CLI Reference", 

116 "api": "API", 

117 "faq": "FAQ", 

118 "toc": "Table of Contents", 

119 "readme": "Overview", 

120 } 

121 

122 title_lower = title.lower().strip() 

123 if title_lower in title_mappings: 

124 return title_mappings[title_lower] 

125 

126 # Capitalize words 

127 return " ".join(word.capitalize() for word in title.split()) 

128 

129 def generate_project_info(self, **kwargs) -> dict: 

130 """Generate project information for templates.""" 

131 project_info = { 

132 "name": "QDrant Loader", 

133 "version": "0.4.0b1", 

134 "description": "Enterprise-ready vector database toolkit", 

135 "github_url": "https://github.com/martin-papy/qdrant-loader", 

136 } 

137 

138 # Override with any provided kwargs 

139 project_info.update(kwargs) 

140 

141 # Try to load from pyproject.toml 

142 try: 

143 import tomli 

144 

145 with open("pyproject.toml", "rb") as f: 

146 pyproject = tomli.load(f) 

147 project_section = pyproject.get("project", {}) 

148 project_info.update( 

149 { 

150 "name": project_section.get("name", project_info["name"]), 

151 "version": project_section.get( 

152 "version", project_info["version"] 

153 ), 

154 "description": project_section.get( 

155 "description", project_info["description"] 

156 ), 

157 } 

158 ) 

159 # Normalize workspace naming to product name 

160 if isinstance(project_info.get("name"), str) and project_info[ 

161 "name" 

162 ].endswith("-workspace"): 

163 project_info["name"] = "QDrant Loader" 

164 

165 # Try to get homepage/repository from pyproject urls 

166 urls = ( 

167 project_section.get("urls", {}) 

168 if isinstance(project_section, dict) 

169 else {} 

170 ) 

171 homepage = urls.get("Homepage") 

172 if ( 

173 homepage 

174 and not getattr(self, "base_url_user_set", False) 

175 and not self.base_url 

176 ): 

177 # Set base_url from pyproject if not provided externally 

178 self.base_url = homepage.rstrip("/") 

179 repo_url = urls.get("Repository") or urls.get("Source") 

180 if repo_url: 

181 project_info["github_url"] = repo_url 

182 except Exception: 

183 # Ignore malformed project section entries 

184 pass 

185 

186 # Try to get git information 

187 try: 

188 import subprocess 

189 

190 # Get git commit hash 

191 result = subprocess.run( 

192 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True 

193 ) 

194 project_info["commit_hash"] = result.stdout.strip() 

195 

196 # Get git commit date 

197 result = subprocess.run( 

198 ["git", "log", "-1", "--format=%ci"], 

199 capture_output=True, 

200 text=True, 

201 check=True, 

202 ) 

203 project_info["commit_date"] = result.stdout.strip() 

204 

205 except (subprocess.CalledProcessError, FileNotFoundError): 

206 # Git not available or not a git repository 

207 pass 

208 

209 # Add build metadata 

210 from datetime import datetime 

211 

212 commit_hash = project_info.get("commit_hash", "") 

213 project_info["commit"] = { 

214 "hash": commit_hash, 

215 "short": commit_hash[:7] if isinstance(commit_hash, str) else "", 

216 "date": project_info.get("commit_date", ""), 

217 } 

218 project_info["build"] = { 

219 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z" 

220 } 

221 

222 # Write project info JSON file 

223 project_info_path = self.output_dir / "project-info.json" 

224 project_info_path.parent.mkdir(parents=True, exist_ok=True) 

225 with open(project_info_path, "w", encoding="utf-8") as f: 

226 json.dump(project_info, f, indent=2) 

227 

228 return project_info 

229 

230 def build_page( 

231 self, 

232 template_name: str, 

233 output_filename: str, 

234 title: str, 

235 description: str, 

236 canonical_path: str, 

237 content: str = "", 

238 **extra_replacements, 

239 ) -> None: 

240 """Build a single page from template.""" 

241 template_content = self.load_template(template_name) 

242 

243 # Load a content template if available when no explicit content is given. 

244 # For pages where output and canonical differ, missing content should raise. 

245 # For pages where they are the same (e.g., index.html), load content if 

246 # the template exists, otherwise fall back to empty content. 

247 if not content: 

248 try: 

249 content = self.load_template(output_filename) 

250 except FileNotFoundError: 

251 if output_filename != canonical_path: 

252 # Maintain behavior for explicit content templates 

253 raise 

254 # Otherwise, leave content empty 

255 

256 project_info = self.generate_project_info() 

257 

258 # Calculate base URL for relative paths 

259 if canonical_path.count("/") > 0: 

260 base_url = "../" * canonical_path.count("/") 

261 else: 

262 # Normalize root base URL 

263 if self.base_url: 

264 base_url = self.base_url.rstrip("/") + "/" 

265 else: 

266 base_url = "./" 

267 

268 # Merge extra replacements ensuring defaults for optional placeholders 

269 extras = dict(extra_replacements) 

270 extras.setdefault("additional_head", "") 

271 extras.setdefault("additional_scripts", "") 

272 

273 replacements = { 

274 "page_title": title, 

275 "page_description": description, 

276 "content": content, 

277 "base_url": base_url, 

278 "canonical_url": ( 

279 self.base_url.rstrip("/") + "/" + canonical_path 

280 if self.base_url 

281 else canonical_path 

282 ), 

283 "author": project_info.get("name", "QDrant Loader"), 

284 "version": project_info.get("version", "0.4.0b1"), 

285 "project_name": project_info["name"], 

286 "project_version": project_info["version"], 

287 "project_description": project_info["description"], 

288 **extras, 

289 } 

290 

291 final_content = self.replace_placeholders(template_content, replacements) 

292 

293 output_path = self.output_dir / output_filename 

294 output_path.parent.mkdir(parents=True, exist_ok=True) 

295 

296 with open(output_path, "w", encoding="utf-8") as f: 

297 f.write(final_content) 

298 

299 print(f"📄 Built {output_filename}") 

300 

301 def build_site( 

302 self, 

303 coverage_artifacts_dir: str | None = None, 

304 test_results_dir: str | None = None, 

305 ) -> None: 

306 """Build the complete website.""" 

307 print("🏗️ Building QDrant Loader website...") 

308 

309 # Create output directory 

310 self.output_dir.mkdir(parents=True, exist_ok=True) 

311 

312 # Copy assets first 

313 self.copy_assets() 

314 

315 # Generate project info 

316 self.generate_project_info() 

317 

318 # Build main pages 

319 self.build_page( 

320 "base.html", 

321 "index.html", 

322 "Home", 

323 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.", 

324 "index.html", 

325 ) 

326 

327 # Build a friendly 404 page 

328 try: 

329 self.build_page( 

330 "base.html", 

331 "404.html", 

332 "Page Not Found", 

333 "The page you are looking for does not exist.", 

334 "404.html", 

335 content=self.load_template("404.html"), 

336 ) 

337 except Exception as e: 

338 print(f"⚠️ Failed to build 404 page: {e}") 

339 

340 # Build docs structure and pages 

341 self.build_docs_nav() 

342 _docs_structure = self.build_docs_structure() 

343 

344 # Create docs directory and index 

345 docs_output_dir = self.output_dir / "docs" 

346 docs_output_dir.mkdir(exist_ok=True) 

347 

348 # Build docs index page using dedicated template content 

349 self.build_page( 

350 "base.html", 

351 "docs/index.html", 

352 "Documentation", 

353 "QDrant Loader Documentation", 

354 "docs/index.html", 

355 content=self.load_template("docs-index.html"), 

356 ) 

357 

358 # Bridge root docs from repository top-level files 

359 try: 

360 if Path("README.md").exists(): 

361 self.build_markdown_page("README.md", "docs/README.html") 

362 if Path("CHANGELOG.md").exists(): 

363 self.build_markdown_page("CHANGELOG.md", "docs/CHANGELOG.html") 

364 if Path("CONTRIBUTING.md").exists(): 

365 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html") 

366 # License (plain text) rendered via helper 

367 if Path("LICENSE").exists(): 

368 self.build_license_page( 

369 "LICENSE", "docs/LICENSE.html", "License", "License" 

370 ) 

371 # Privacy policy page from template 

372 try: 

373 privacy_template_path = self.templates_dir / "privacy-policy.html" 

374 privacy_last_updated = self.get_git_timestamp( 

375 str(privacy_template_path) 

376 ) 

377 if privacy_last_updated: 

378 privacy_last_updated = privacy_last_updated.split("T", 1)[0] 

379 else: 

380 from datetime import datetime 

381 

382 # Use stable template mtime fallback instead of build date. 

383 privacy_last_updated = ( 

384 datetime.fromtimestamp( 

385 privacy_template_path.stat().st_mtime, tz=UTC 

386 ) 

387 .date() 

388 .isoformat() 

389 ) 

390 

391 self.build_page( 

392 "base.html", 

393 "privacy-policy.html", 

394 "Privacy Policy", 

395 "Privacy policy for QDrant Loader", 

396 "privacy-policy.html", 

397 content=self.load_template("privacy-policy.html"), 

398 last_updated=privacy_last_updated, 

399 ) 

400 except FileNotFoundError: 

401 pass 

402 except Exception as e: 

403 print(f"⚠️ Failed to build root docs pages: {e}") 

404 

405 # Build package README documentation into docs/packages 

406 try: 

407 self.build_package_docs() 

408 except Exception as e: 

409 print(f"⚠️ Failed to build package docs: {e}") 

410 

411 # Always create coverage directory and ensure index.html exists 

412 coverage_output_dir = self.output_dir / "coverage" 

413 coverage_output_dir.mkdir(exist_ok=True) 

414 

415 # Build coverage reports if provided 

416 if coverage_artifacts_dir: 

417 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir) 

418 

419 # Copy coverage artifacts 

420 coverage_path = Path(coverage_artifacts_dir) 

421 if coverage_path.exists(): 

422 import shutil 

423 

424 for item in coverage_path.iterdir(): 

425 if item.is_file(): 

426 shutil.copy2(item, coverage_output_dir / item.name) 

427 elif item.is_dir(): 

428 shutil.copytree( 

429 item, coverage_output_dir / item.name, dirs_exist_ok=True 

430 ) 

431 else: 

432 # Create styled placeholder coverage index if no artifacts provided 

433 placeholder_html = ( 

434 '<section class="py-5"><div class="container">' 

435 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>' 

436 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>' 

437 "</div></section>" 

438 ) 

439 self.build_page( 

440 "base.html", 

441 "coverage/index.html", 

442 "Coverage Reports", 

443 "Test coverage analysis", 

444 "coverage/index.html", 

445 content=placeholder_html, 

446 ) 

447 

448 # Generate directory indexes 

449 self.generate_directory_indexes() 

450 

451 # Generate SEO files 

452 # Build a dynamic sitemap including all HTML pages 

453 try: 

454 self.generate_dynamic_sitemap() 

455 except Exception as e: 

456 print(f"⚠️ Failed to generate dynamic sitemap: {e}") 

457 

458 # Always (re)write robots.txt pointing to the sitemap 

459 try: 

460 self.generate_robots_file() 

461 except Exception as e: 

462 print(f"⚠️ Failed to generate robots.txt: {e}") 

463 

464 # Create .nojekyll file for GitHub Pages 

465 nojekyll_path = self.output_dir / ".nojekyll" 

466 nojekyll_path.touch() 

467 print("📄 Created .nojekyll file") 

468 

469 print("✅ Website build completed successfully!") 

470 

471 def build_docs_nav(self) -> dict: 

472 """Build documentation navigation structure.""" 

473 # Simplified navigation building 

474 docs_dir = Path("docs") 

475 if not docs_dir.exists(): 

476 return {} 

477 

478 nav_data = {"title": "Documentation", "children": []} 

479 

480 for item in sorted(docs_dir.iterdir()): 

481 if item.is_file() and item.suffix == ".md": 

482 nav_data["children"].append( 

483 { 

484 "title": self._humanize_title(item.stem), 

485 "url": f"docs/{item.name}", 

486 } 

487 ) 

488 elif item.is_dir(): 

489 nav_data["children"].append( 

490 { 

491 "title": self._humanize_title(item.name), 

492 "url": f"docs/{item.name}/", 

493 } 

494 ) 

495 

496 self.docs_nav_data = nav_data 

497 return nav_data 

498 

499 def generate_seo_files(self) -> None: 

500 """Generate SEO files like sitemap.xml and robots.txt.""" 

501 from datetime import datetime 

502 

503 # Determine base site URL 

504 site_base = ( 

505 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

506 ) 

507 

508 # Get current date for lastmod 

509 current_date = datetime.now().strftime("%Y-%m-%d") 

510 

511 # Generate simple sitemap.xml 

512 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?> 

513<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> 

514 <url> 

515 <loc>{site_base}/</loc> 

516 <lastmod>{current_date}</lastmod> 

517 <changefreq>weekly</changefreq> 

518 <priority>1.0</priority> 

519 </url> 

520 <url> 

521 <loc>{site_base}/docs/</loc> 

522 <lastmod>{current_date}</lastmod> 

523 <changefreq>weekly</changefreq> 

524 <priority>0.8</priority> 

525 </url> 

526</urlset>""" 

527 

528 sitemap_path = self.output_dir / "sitemap.xml" 

529 with open(sitemap_path, "w", encoding="utf-8") as f: 

530 f.write(sitemap_content) 

531 print("📄 Generated sitemap.xml") 

532 

533 # Generate simple robots.txt 

534 robots_content = f"""User-agent: * 

535Allow: / 

536 

537Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml 

538""" 

539 

540 robots_path = self.output_dir / "robots.txt" 

541 with open(robots_path, "w", encoding="utf-8") as f: 

542 f.write(robots_content.replace("https://example.com", site_base)) 

543 print("📄 Generated robots.txt") 

544 

545 def generate_robots_file(self) -> None: 

546 """Generate only robots.txt referencing the sitemap URL.""" 

547 site_base = ( 

548 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

549 ) 

550 robots_content = f"""User-agent: * 

551Allow: / 

552 

553Sitemap: {site_base}/sitemap.xml 

554""" 

555 robots_path = self.output_dir / "robots.txt" 

556 with open(robots_path, "w", encoding="utf-8") as f: 

557 f.write(robots_content) 

558 print("📄 Generated robots.txt") 

559 

560 def generate_dynamic_sitemap( 

561 self, date: str = None, pages: list[str] = None 

562 ) -> str: 

563 """Generate dynamic sitemap with custom pages.""" 

564 from datetime import datetime 

565 

566 base_url = ( 

567 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

568 ) 

569 

570 # Auto-discover pages if not provided 

571 if pages is None: 

572 pages = [] 

573 # Find HTML files in site directory 

574 if self.output_dir.exists(): 

575 for html_file in self.output_dir.rglob("*.html"): 

576 rel_path = str(html_file.relative_to(self.output_dir)) 

577 pages.append(rel_path) 

578 

579 # Use provided date or current date 

580 if date is None: 

581 date = datetime.now().strftime("%Y-%m-%d") 

582 

583 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n' 

584 sitemap_content += ( 

585 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n' 

586 ) 

587 

588 for page in pages: 

589 sitemap_content += " <url>\n" 

590 sitemap_content += f" <loc>{base_url}/{page}</loc>\n" 

591 sitemap_content += f" <lastmod>{date}</lastmod>\n" 

592 sitemap_content += " <changefreq>weekly</changefreq>\n" 

593 sitemap_content += " <priority>0.8</priority>\n" 

594 sitemap_content += " </url>\n" 

595 

596 sitemap_content += "</urlset>" 

597 

598 # Write sitemap to file 

599 sitemap_path = self.output_dir / "sitemap.xml" 

600 sitemap_path.parent.mkdir(parents=True, exist_ok=True) 

601 with open(sitemap_path, "w", encoding="utf-8") as f: 

602 f.write(sitemap_content) 

603 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages") 

604 

605 return sitemap_content 

606 

607 def build_markdown_page( 

608 self, 

609 markdown_file: str, 

610 output_path: str, 

611 title: str = "", 

612 breadcrumb: str = "", 

613 **kwargs, 

614 ) -> None: 

615 """Build a page from markdown file.""" 

616 markdown_path = Path(markdown_file) 

617 if not markdown_path.exists(): 

618 print( 

619 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation" 

620 ) 

621 return 

622 

623 try: 

624 with open(markdown_path, encoding="utf-8") as f: 

625 markdown_content = f.read() 

626 except Exception as e: 

627 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}") 

628 return 

629 

630 # Extract title if not provided 

631 if not title: 

632 title = self.extract_title_from_markdown(markdown_content) 

633 

634 # Normalize links in markdown before conversion 

635 markdown_content = self.markdown_processor.convert_markdown_links_to_html( 

636 markdown_content, str(markdown_path) 

637 ) 

638 

639 # Convert markdown to HTML 

640 html_content = self.markdown_to_html( 

641 markdown_content, str(markdown_path), output_path 

642 ) 

643 # Normalize any remaining HTML hrefs 

644 html_content = self.markdown_processor.convert_markdown_links_to_html( 

645 html_content, str(markdown_path) 

646 ) 

647 

648 # Build a Table of Contents and wrap in docs layout 

649 toc_html = self.render_toc(html_content) 

650 if toc_html: 

651 toc_html = self.add_bootstrap_classes(toc_html) 

652 

653 wrapped_content = f""" 

654<section> 

655 <div class=\"container-fluid\"> 

656 <div class=\"row toc-layout\"> 

657 <aside class=\"toc-sidebar d-none d-lg-block p-0\"> 

658 <div class=\"position-sticky\"> 

659 {toc_html or '<div class=\"text-muted small\">No sections</div>'} 

660 </div> 

661 </aside> 

662 <div class=\"container-content\"> 

663 {html_content} 

664 </div> 

665 </div> 

666</div> 

667</section> 

668""" 

669 

670 # Build the page 

671 self.build_page( 

672 "base.html", 

673 output_path, 

674 title, 

675 f"{title} - QDrant Loader", 

676 output_path, 

677 content=wrapped_content, 

678 breadcrumb=breadcrumb, 

679 **kwargs, 

680 ) 

681 

682 def build_docs_structure(self) -> dict: 

683 """Build documentation directory structure.""" 

684 docs_dir = Path("docs") 

685 structure = {"title": "Documentation", "children": []} 

686 

687 # Create docs output directory 

688 docs_output_dir = self.output_dir / "docs" 

689 docs_output_dir.mkdir(parents=True, exist_ok=True) 

690 

691 if not docs_dir.exists(): 

692 return structure 

693 

694 # Process all markdown files in docs 

695 for item in sorted(docs_dir.rglob("*.md")): 

696 relative_path = str(item.relative_to(docs_dir)) 

697 output_path = relative_path.replace(".md", ".html") 

698 

699 structure["children"].append( 

700 { 

701 "title": self._humanize_title(item.stem), 

702 "path": relative_path, 

703 "url": f"docs/{output_path}", 

704 } 

705 ) 

706 

707 # Build the page from markdown 

708 try: 

709 self.build_markdown_page( 

710 str(item), 

711 f"docs/{output_path}", 

712 title=self._humanize_title(item.stem), 

713 ) 

714 except Exception as e: 

715 print(f"⚠️ Failed to build docs page {item}: {e}") 

716 

717 return structure 

718 

719 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict: 

720 """Build coverage report structure.""" 

721 # Always create coverage output directory 

722 coverage_output_dir = self.output_dir / "coverage" 

723 coverage_output_dir.mkdir(parents=True, exist_ok=True) 

724 

725 if not coverage_dir: 

726 return {"coverage_reports": []} 

727 

728 coverage_path = Path(coverage_dir) 

729 if not coverage_path.exists(): 

730 return {"coverage_reports": []} 

731 

732 # Copy all coverage files with proper naming 

733 import shutil 

734 

735 for item in coverage_path.iterdir(): 

736 # Map directory names to cleaner package names 

737 dest_name = item.name 

738 if item.is_dir(): 

739 if "htmlcov-loader" in item.name: 

740 dest_name = "loader" 

741 elif "htmlcov-mcp" in item.name: 

742 dest_name = "mcp" 

743 elif "htmlcov-website" in item.name: 

744 dest_name = "website" 

745 elif ( 

746 "htmlcov-core" in item.name 

747 or "htmlcov-qdrant-loader-core" in item.name 

748 ): 

749 dest_name = "core" 

750 elif "htmlcov" in item.name: 

751 dest_name = item.name.replace("htmlcov-", "").replace( 

752 "htmlcov_", "" 

753 ) 

754 

755 dest_path = coverage_output_dir / dest_name 

756 try: 

757 if item.is_file(): 

758 shutil.copy2(item, dest_path) 

759 elif item.is_dir(): 

760 if dest_path.exists(): 

761 shutil.rmtree(dest_path) 

762 shutil.copytree(item, dest_path) 

763 print(f"📁 Copied coverage: {item.name} -> {dest_name}") 

764 except Exception as e: 

765 print(f"⚠️ Failed to copy coverage file {item}: {e}") 

766 

767 # Build reports list using the renamed directories 

768 reports = [] 

769 for subdir in coverage_output_dir.iterdir(): 

770 if subdir.is_dir(): 

771 index_file = subdir / "index.html" 

772 if index_file.exists(): 

773 reports.append( 

774 { 

775 "name": subdir.name, 

776 "path": f"{subdir.name}/index.html", 

777 "url": f"coverage/{subdir.name}/index.html", 

778 } 

779 ) 

780 

781 # Create main coverage index page using site template when reports exist 

782 if reports: 

783 # Build coverage index with Bootstrap styling 

784 index_content = """ 

785<section class=\"py-5\"> 

786 <div class=\"container\"> 

787 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1> 

788 <div class=\"row g-4\">""" 

789 

790 for report in reports: 

791 if report["name"] == "loader": 

792 index_content += """ 

793 <div class="col-lg-6"> 

794 <div class="card"> 

795 <div class="card-header"> 

796 <h4>QDrant Loader Core</h4> 

797 <span id="loader-test-indicator" class="badge">Loading...</span> 

798 </div> 

799 <div class="card-body"> 

800 <div id="loader-coverage">Loader coverage data</div> 

801 <a href="loader/" class="btn btn-primary">View Detailed Report</a> 

802 </div> 

803 </div> 

804 </div>""" 

805 elif report["name"] == "mcp": 

806 index_content += """ 

807 <div class="col-lg-6"> 

808 <div class="card"> 

809 <div class="card-header"> 

810 <h4>MCP Server</h4> 

811 <span id="mcp-test-indicator" class="badge">Loading...</span> 

812 </div> 

813 <div class="card-body"> 

814 <div id="mcp-coverage">MCP Server coverage data</div> 

815 <a href="mcp/" class="btn btn-success">View Detailed Report</a> 

816 </div> 

817 </div> 

818 </div>""" 

819 elif report["name"] == "website": 

820 index_content += """ 

821 <div class="col-lg-6"> 

822 <div class="card"> 

823 <div class="card-header"> 

824 <h4>Website</h4> 

825 <span id="website-test-indicator" class="badge">Loading...</span> 

826 </div> 

827 <div class="card-body"> 

828 <div id="website-coverage">Website coverage data</div> 

829 <a href="website/" class="btn btn-info">View Detailed Report</a> 

830 </div> 

831 </div> 

832 </div>""" 

833 elif report["name"] == "core": 

834 index_content += """ 

835 <div class="col-lg-6"> 

836 <div class="card"> 

837 <div class="card-header"> 

838 <h4>Core Library</h4> 

839 <span id="core-test-indicator" class="badge">Loading...</span> 

840 </div> 

841 <div class="card-body"> 

842 <div id="core-coverage">Core library coverage data</div> 

843 <a href="core/" class="btn btn-warning">View Detailed Report</a> 

844 </div> 

845 </div> 

846 </div>""" 

847 

848 index_content += """ 

849 </div> 

850 </div> 

851</section> 

852 

853<script> 

854// Compute and render coverage summary from status.json 

855function coverageSummary(data){ 

856 try{ 

857 let total = 0, missing = 0; 

858 if (data && data.files){ 

859 for (const k in data.files){ 

860 const f = data.files[k]; 

861 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null); 

862 if (nums && typeof nums.n_statements === 'number'){ 

863 total += (nums.n_statements||0); 

864 missing += (nums.n_missing||0); 

865 } 

866 } 

867 } 

868 // Fallback if a totals object exists 

869 if (total === 0 && data && data.totals){ 

870 if (typeof data.totals.n_statements === 'number'){ 

871 total = data.totals.n_statements||0; 

872 missing = data.totals.n_missing||0; 

873 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){ 

874 total = data.totals.num_statements; 

875 missing = total - data.totals.covered_lines; 

876 } 

877 } 

878 if (total > 0){ 

879 const covered = Math.max(0, total - missing); 

880 const pct = Math.round((covered/total)*1000)/10; // one decimal 

881 return {pct, covered, total}; 

882 } 

883 } catch(e){} 

884 return null; 

885} 

886 

887function renderCoverage(id, summary){ 

888 const el = document.getElementById(id); 

889 if (!el) return; 

890 if (!summary){ el.textContent = 'Loaded'; return; } 

891 const {pct, covered, total} = summary; 

892 el.innerHTML = ` 

893 <div class="d-flex align-items-center"> 

894 <div class="progress flex-grow-1 me-2" style="height: 10px;"> 

895 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div> 

896 </div> 

897 <span class="small fw-semibold">${pct}% (${covered}/${total})</span> 

898 </div>`; 

899} 

900 

901fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{}); 

902fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{}); 

903fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{}); 

904fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{}); 

905</script> 

906""" 

907 # Render through site template for full styling/navigation 

908 self.build_page( 

909 "base.html", 

910 "coverage/index.html", 

911 "Coverage Reports", 

912 "Test coverage analysis", 

913 "coverage/index.html", 

914 content=index_content, 

915 ) 

916 print("📄 Generated coverage index.html") 

917 

918 return {"coverage_reports": reports} 

919 

920 def build_package_docs(self) -> None: 

921 """Build documentation pages from package README files into docs/packages. 

922 

923 Maps package README.md files to site docs under: 

924 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html 

925 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html 

926 - packages/qdrant-loader-core -> docs/packages/core/README.html 

927 """ 

928 package_mappings: list[tuple[str, str, str]] = [ 

929 ("qdrant-loader", "qdrant-loader", "QDrant Loader"), 

930 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"), 

931 ("qdrant-loader-core", "core", "Core Library"), 

932 ] 

933 

934 for pkg_name, alias, display_name in package_mappings: 

935 readme_path = Path("packages") / pkg_name / "README.md" 

936 if not readme_path.exists(): 

937 continue 

938 

939 try: 

940 with open(readme_path, encoding="utf-8") as f: 

941 markdown_content = f.read() 

942 

943 # Normalize links in markdown before conversion 

944 normalized_md = self.markdown_processor.convert_markdown_links_to_html( 

945 markdown_content 

946 ) 

947 

948 html_content = self.markdown_to_html( 

949 normalized_md, 

950 str(readme_path), 

951 f"docs/packages/{alias}/README.html", 

952 ) 

953 # Normalize any remaining HTML hrefs 

954 html_content = self.markdown_processor.convert_markdown_links_to_html( 

955 html_content, str(readme_path), f"docs/packages/{alias}/README.html" 

956 ) 

957 

958 # Final hardening for package README links: collapse relative ../../docs to /docs 

959 try: 

960 html_content = re.sub( 

961 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content 

962 ) 

963 # Convert README root files and .md links under docs to .html 

964 html_content = re.sub( 

965 r'href="(?:\.{2}/)+CONTRIBUTING\.md"', 

966 'href="/docs/CONTRIBUTING.html"', 

967 html_content, 

968 ) 

969 html_content = re.sub( 

970 r'href="(?:\.{2}/)+LICENSE(\.html)?"', 

971 'href="/docs/LICENSE.html"', 

972 html_content, 

973 ) 

974 html_content = re.sub( 

975 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"', 

976 r'href="/docs/\1.html\2"', 

977 html_content, 

978 ) 

979 except Exception: 

980 pass 

981 

982 # Build a Table of Contents and wrap with standard docs layout for consistent look 

983 toc_html = self.render_toc(html_content) 

984 if toc_html: 

985 toc_html = self.add_bootstrap_classes(toc_html) 

986 

987 wrapped_content = f""" 

988<section> 

989 <div class=\"container-fluid\"> 

990 <div class=\"row toc-layout\"> 

991 <aside class=\"toc-sidebar d-none d-lg-block p-0\"> 

992 <div class=\"position-sticky\"> 

993 {toc_html or '<div class=\"text-muted small\">No sections</div>'} 

994 </div> 

995 </aside> 

996 <div class=\"container-content\"> 

997 {html_content} 

998 </div> 

999 </div> 

1000 </div> 

1001</section> 

1002""" 

1003 

1004 output_path = f"docs/packages/{alias}/README.html" 

1005 self.build_page( 

1006 "base.html", 

1007 output_path, 

1008 f"{display_name} - README", 

1009 f"{display_name} Documentation", 

1010 output_path, 

1011 content=wrapped_content, 

1012 ) 

1013 except Exception as e: 

1014 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}") 

1015 

1016 def generate_directory_indexes(self) -> None: 

1017 """Generate index files for directories.""" 

1018 # Look in both source docs and output site docs directories 

1019 source_docs_dir = Path("docs") 

1020 site_docs_dir = self.output_dir / "docs" 

1021 

1022 # Process directories in both locations 

1023 for docs_dir in [source_docs_dir, site_docs_dir]: 

1024 if not docs_dir.exists(): 

1025 continue 

1026 

1027 for directory in docs_dir.rglob("*"): 

1028 if directory.is_dir(): 

1029 # Look for README or index files in various formats 

1030 readme_md = directory / "README.md" 

1031 readme_html = directory / "README.html" 

1032 index_md = directory / "index.md" 

1033 index_html = directory / "index.html" 

1034 

1035 # Determine source file 

1036 source_file = None 

1037 if readme_md.exists(): 

1038 source_file = readme_md 

1039 elif index_md.exists(): 

1040 source_file = index_md 

1041 elif readme_html.exists(): 

1042 source_file = readme_html 

1043 elif index_html.exists(): 

1044 source_file = index_html 

1045 

1046 if source_file: 

1047 try: 

1048 if docs_dir == site_docs_dir: 

1049 # For files in site directory, create/overwrite index.html directly there 

1050 index_file = directory / "index.html" 

1051 if source_file.suffix == ".html": 

1052 # Copy HTML file content directly (always overwrite to avoid stale links) 

1053 content = source_file.read_text(encoding="utf-8") 

1054 index_file.write_text(content, encoding="utf-8") 

1055 print( 

1056 f"📄 Generated index.html from {source_file.name}" 

1057 ) 

1058 else: 

1059 # For source files, process through normal build pipeline 

1060 relative_dir = directory.relative_to(docs_dir) 

1061 output_path = f"docs/{relative_dir}/index.html" 

1062 

1063 if source_file.suffix == ".html": 

1064 # Copy HTML file content directly 

1065 content = source_file.read_text(encoding="utf-8") 

1066 self.build_page( 

1067 "base.html", 

1068 output_path, 

1069 self._humanize_title(directory.name), 

1070 f"{self._humanize_title(directory.name)} Documentation", 

1071 output_path, 

1072 content=content, 

1073 ) 

1074 else: 

1075 # Process markdown file 

1076 self.build_markdown_page( 

1077 str(source_file), 

1078 output_path, 

1079 title=self._humanize_title(directory.name), 

1080 ) 

1081 except Exception as e: 

1082 print(f"⚠️ Failed to generate index for {directory}: {e}") 

1083 

1084 def build_license_page( 

1085 self, 

1086 source_file: str = "LICENSE", 

1087 output_file: str = "license.html", 

1088 title: str = "License", 

1089 description: str = "License", 

1090 ) -> None: 

1091 """Build license page from LICENSE file.""" 

1092 license_path = Path(source_file) 

1093 if not license_path.exists(): 

1094 print(f"⚠️ License file not found: {source_file}, skipping license page") 

1095 return 

1096 

1097 try: 

1098 with open(license_path, encoding="utf-8") as f: 

1099 license_content = f.read() 

1100 

1101 # Create license page with heading 

1102 html_content = f""" 

1103 <h1>License Information</h1> 

1104 <div class="license-content"> 

1105 <pre>{license_content}</pre> 

1106 </div> 

1107 """ 

1108 

1109 self.build_page( 

1110 "base.html", 

1111 output_file, 

1112 title, 

1113 description, 

1114 output_file, 

1115 content=html_content, 

1116 ) 

1117 except Exception as e: 

1118 print(f"⚠️ Failed to build license page: {e}")