Coverage for website / builder / core.py: 89%

406 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-12 03:27 +0000

1""" 

2Core Website Builder - Main Orchestration and Lifecycle Management. 

3 

4This module implements the main WebsiteBuilder class that orchestrates 

5all build operations and manages the overall build lifecycle. 

6""" 

7 

8import json 

9import re 

10import subprocess 

11from pathlib import Path 

12 

13from .assets import AssetManager 

14from .markdown import MarkdownProcessor 

15from .templates import TemplateProcessor 

16 

17 

18class WebsiteBuilder: 

19 """Builds the QDrant Loader documentation website from templates.""" 

20 

21 def __init__( 

22 self, templates_dir: str = "website/templates", output_dir: str = "site" 

23 ): 

24 """Initialize the website builder.""" 

25 self.templates_dir = Path(templates_dir) 

26 self.output_dir = Path(output_dir) 

27 self.base_url = "" 

28 # Cached docs navigation data (built once per run) 

29 self.docs_nav_data: dict | None = None 

30 

31 # Initialize component processors 

32 self.template_processor = TemplateProcessor(templates_dir) 

33 self.markdown_processor = MarkdownProcessor() 

34 self.asset_manager = AssetManager(output_dir) 

35 

36 # Delegate core operations to specialized processors 

37 def load_template(self, template_name: str) -> str: 

38 """Load a template file.""" 

39 return self.template_processor.load_template(template_name) 

40 

41 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str: 

42 """Replace placeholders in content with actual values.""" 

43 return self.template_processor.replace_placeholders(content, replacements) 

44 

45 def markdown_to_html( 

46 self, markdown_content: str, source_file: str = "", output_file: str = "" 

47 ) -> str: 

48 """Convert markdown to HTML with Bootstrap styling.""" 

49 return self.markdown_processor.markdown_to_html( 

50 markdown_content, source_file, output_file 

51 ) 

52 

53 def copy_assets(self) -> None: 

54 """Copy all website assets to output directory.""" 

55 return self.asset_manager.copy_assets() 

56 

57 def extract_title_from_markdown(self, markdown_content: str) -> str: 

58 """Extract title from markdown content.""" 

59 return self.markdown_processor.extract_title_from_markdown(markdown_content) 

60 

61 # Additional markdown processing methods 

62 def basic_markdown_to_html(self, markdown_content: str) -> str: 

63 """Basic markdown to HTML conversion.""" 

64 return self.markdown_processor.basic_markdown_to_html(markdown_content) 

65 

66 def convert_markdown_links_to_html( 

67 self, markdown_content: str, source_file: str = "", target_dir: str = "" 

68 ) -> str: 

69 """Convert markdown links to HTML format.""" 

70 return self.markdown_processor.convert_markdown_links_to_html( 

71 markdown_content, source_file, target_dir 

72 ) 

73 

74 def add_bootstrap_classes(self, html_content: str) -> str: 

75 """Add Bootstrap classes to HTML elements.""" 

76 return self.markdown_processor.add_bootstrap_classes(html_content) 

77 

78 def render_toc(self, html_content: str) -> str: 

79 """Generate table of contents from HTML headings.""" 

80 return self.markdown_processor.render_toc(html_content) 

81 

82 # Additional asset management methods 

83 def copy_static_files(self, static_files: list[str]) -> None: 

84 """Copy multiple static files.""" 

85 return self.asset_manager.copy_static_files(static_files) 

86 

87 def get_git_timestamp(self, source_path: str) -> str: 

88 """Get the last modified timestamp from Git.""" 

89 try: 

90 result = subprocess.run( 

91 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path], 

92 capture_output=True, 

93 text=True, 

94 cwd=".", 

95 ) 

96 if result.returncode == 0 and result.stdout.strip(): 

97 return result.stdout.strip() 

98 except (subprocess.CalledProcessError, FileNotFoundError): 

99 pass 

100 return "" 

101 

102 def _humanize_title(self, name: str) -> str: 

103 """Convert filename to human-readable title.""" 

104 # Remove file extension and common prefixes 

105 title = ( 

106 name.replace(".md", "") 

107 .replace("README", "") 

108 .replace("_", " ") 

109 .replace("-", " ") 

110 ) 

111 

112 # Handle common patterns 

113 title_mappings = { 

114 "cli reference": "CLI Reference", 

115 "api": "API", 

116 "faq": "FAQ", 

117 "toc": "Table of Contents", 

118 "readme": "Overview", 

119 } 

120 

121 title_lower = title.lower().strip() 

122 if title_lower in title_mappings: 

123 return title_mappings[title_lower] 

124 

125 # Capitalize words 

126 return " ".join(word.capitalize() for word in title.split()) 

127 

128 def generate_project_info(self, **kwargs) -> dict: 

129 """Generate project information for templates.""" 

130 project_info = { 

131 "name": "QDrant Loader", 

132 "version": "0.4.0b1", 

133 "description": "Enterprise-ready vector database toolkit", 

134 "github_url": "https://github.com/martin-papy/qdrant-loader", 

135 } 

136 

137 # Override with any provided kwargs 

138 project_info.update(kwargs) 

139 

140 # Try to load from pyproject.toml 

141 try: 

142 import tomli 

143 

144 with open("pyproject.toml", "rb") as f: 

145 pyproject = tomli.load(f) 

146 project_section = pyproject.get("project", {}) 

147 project_info.update( 

148 { 

149 "name": project_section.get("name", project_info["name"]), 

150 "version": project_section.get( 

151 "version", project_info["version"] 

152 ), 

153 "description": project_section.get( 

154 "description", project_info["description"] 

155 ), 

156 } 

157 ) 

158 # Normalize workspace naming to product name 

159 if isinstance(project_info.get("name"), str) and project_info[ 

160 "name" 

161 ].endswith("-workspace"): 

162 project_info["name"] = "QDrant Loader" 

163 

164 # Try to get homepage/repository from pyproject urls 

165 urls = ( 

166 project_section.get("urls", {}) 

167 if isinstance(project_section, dict) 

168 else {} 

169 ) 

170 homepage = urls.get("Homepage") 

171 if ( 

172 homepage 

173 and not getattr(self, "base_url_user_set", False) 

174 and not self.base_url 

175 ): 

176 # Set base_url from pyproject if not provided externally 

177 self.base_url = homepage.rstrip("/") 

178 repo_url = urls.get("Repository") or urls.get("Source") 

179 if repo_url: 

180 project_info["github_url"] = repo_url 

181 except Exception: 

182 # Ignore malformed project section entries 

183 pass 

184 

185 # Try to get git information 

186 try: 

187 import subprocess 

188 

189 # Get git commit hash 

190 result = subprocess.run( 

191 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True 

192 ) 

193 project_info["commit_hash"] = result.stdout.strip() 

194 

195 # Get git commit date 

196 result = subprocess.run( 

197 ["git", "log", "-1", "--format=%ci"], 

198 capture_output=True, 

199 text=True, 

200 check=True, 

201 ) 

202 project_info["commit_date"] = result.stdout.strip() 

203 

204 except (subprocess.CalledProcessError, FileNotFoundError): 

205 # Git not available or not a git repository 

206 pass 

207 

208 # Add build metadata 

209 from datetime import datetime 

210 

211 commit_hash = project_info.get("commit_hash", "") 

212 project_info["commit"] = { 

213 "hash": commit_hash, 

214 "short": commit_hash[:7] if isinstance(commit_hash, str) else "", 

215 "date": project_info.get("commit_date", ""), 

216 } 

217 project_info["build"] = { 

218 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z" 

219 } 

220 

221 # Write project info JSON file 

222 project_info_path = self.output_dir / "project-info.json" 

223 project_info_path.parent.mkdir(parents=True, exist_ok=True) 

224 with open(project_info_path, "w", encoding="utf-8") as f: 

225 json.dump(project_info, f, indent=2) 

226 

227 return project_info 

228 

229 def build_page( 

230 self, 

231 template_name: str, 

232 output_filename: str, 

233 title: str, 

234 description: str, 

235 canonical_path: str, 

236 content: str = "", 

237 **extra_replacements, 

238 ) -> None: 

239 """Build a single page from template.""" 

240 template_content = self.load_template(template_name) 

241 

242 # Load a content template if available when no explicit content is given. 

243 # For pages where output and canonical differ, missing content should raise. 

244 # For pages where they are the same (e.g., index.html), load content if 

245 # the template exists, otherwise fall back to empty content. 

246 if not content: 

247 try: 

248 content = self.load_template(output_filename) 

249 except FileNotFoundError: 

250 if output_filename != canonical_path: 

251 # Maintain behavior for explicit content templates 

252 raise 

253 # Otherwise, leave content empty 

254 

255 project_info = self.generate_project_info() 

256 

257 # Calculate base URL for relative paths 

258 if canonical_path.count("/") > 0: 

259 base_url = "../" * canonical_path.count("/") 

260 else: 

261 # Normalize root base URL 

262 if self.base_url: 

263 base_url = self.base_url.rstrip("/") + "/" 

264 else: 

265 base_url = "./" 

266 

267 # Merge extra replacements ensuring defaults for optional placeholders 

268 extras = dict(extra_replacements) 

269 extras.setdefault("additional_head", "") 

270 extras.setdefault("additional_scripts", "") 

271 

272 replacements = { 

273 "page_title": title, 

274 "page_description": description, 

275 "content": content, 

276 "base_url": base_url, 

277 "canonical_url": ( 

278 self.base_url.rstrip("/") + "/" + canonical_path 

279 if self.base_url 

280 else canonical_path 

281 ), 

282 "author": project_info.get("name", "QDrant Loader"), 

283 "version": project_info.get("version", "0.4.0b1"), 

284 "project_name": project_info["name"], 

285 "project_version": project_info["version"], 

286 "project_description": project_info["description"], 

287 **extras, 

288 } 

289 

290 final_content = self.replace_placeholders(template_content, replacements) 

291 

292 output_path = self.output_dir / output_filename 

293 output_path.parent.mkdir(parents=True, exist_ok=True) 

294 

295 with open(output_path, "w", encoding="utf-8") as f: 

296 f.write(final_content) 

297 

298 print(f"📄 Built {output_filename}") 

299 

300 def build_site( 

301 self, 

302 coverage_artifacts_dir: str | None = None, 

303 test_results_dir: str | None = None, 

304 ) -> None: 

305 """Build the complete website.""" 

306 print("🏗️ Building QDrant Loader website...") 

307 

308 # Create output directory 

309 self.output_dir.mkdir(parents=True, exist_ok=True) 

310 

311 # Copy assets first 

312 self.copy_assets() 

313 

314 # Generate project info 

315 self.generate_project_info() 

316 

317 # Build main pages 

318 self.build_page( 

319 "base.html", 

320 "index.html", 

321 "Home", 

322 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.", 

323 "index.html", 

324 ) 

325 

326 # Build a friendly 404 page 

327 try: 

328 self.build_page( 

329 "base.html", 

330 "404.html", 

331 "Page Not Found", 

332 "The page you are looking for does not exist.", 

333 "404.html", 

334 content=self.load_template("404.html"), 

335 ) 

336 except Exception as e: 

337 print(f"⚠️ Failed to build 404 page: {e}") 

338 

339 # Build docs structure and pages 

340 self.build_docs_nav() 

341 _docs_structure = self.build_docs_structure() 

342 

343 # Create docs directory and index 

344 docs_output_dir = self.output_dir / "docs" 

345 docs_output_dir.mkdir(exist_ok=True) 

346 

347 # Build docs index page using dedicated template content 

348 self.build_page( 

349 "base.html", 

350 "docs/index.html", 

351 "Documentation", 

352 "QDrant Loader Documentation", 

353 "docs/index.html", 

354 content=self.load_template("docs-index.html"), 

355 ) 

356 

357 # Bridge root docs from repository top-level files 

358 try: 

359 if Path("README.md").exists(): 

360 self.build_markdown_page("README.md", "docs/README.html") 

361 if Path("CHANGELOG.md").exists(): 

362 self.build_markdown_page("CHANGELOG.md", "docs/CHANGELOG.html") 

363 if Path("CONTRIBUTING.md").exists(): 

364 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html") 

365 # License (plain text) rendered via helper 

366 if Path("LICENSE").exists(): 

367 self.build_license_page( 

368 "LICENSE", "docs/LICENSE.html", "License", "License" 

369 ) 

370 # Privacy policy page from template 

371 try: 

372 privacy_template_path = self.templates_dir / "privacy-policy.html" 

373 privacy_last_updated = self.get_git_timestamp( 

374 str(privacy_template_path) 

375 ) 

376 if privacy_last_updated: 

377 privacy_last_updated = privacy_last_updated.split("T", 1)[0] 

378 else: 

379 from datetime import datetime, timezone 

380 

381 # Use stable template mtime fallback instead of build date. 

382 privacy_last_updated = datetime.fromtimestamp( 

383 privacy_template_path.stat().st_mtime, tz=timezone.utc 

384 ).date().isoformat() 

385 

386 self.build_page( 

387 "base.html", 

388 "privacy-policy.html", 

389 "Privacy Policy", 

390 "Privacy policy for QDrant Loader", 

391 "privacy-policy.html", 

392 content=self.load_template("privacy-policy.html"), 

393 last_updated=privacy_last_updated, 

394 ) 

395 except FileNotFoundError: 

396 pass 

397 except Exception as e: 

398 print(f"⚠️ Failed to build root docs pages: {e}") 

399 

400 # Build package README documentation into docs/packages 

401 try: 

402 self.build_package_docs() 

403 except Exception as e: 

404 print(f"⚠️ Failed to build package docs: {e}") 

405 

406 # Always create coverage directory and ensure index.html exists 

407 coverage_output_dir = self.output_dir / "coverage" 

408 coverage_output_dir.mkdir(exist_ok=True) 

409 

410 # Build coverage reports if provided 

411 if coverage_artifacts_dir: 

412 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir) 

413 

414 # Copy coverage artifacts 

415 coverage_path = Path(coverage_artifacts_dir) 

416 if coverage_path.exists(): 

417 import shutil 

418 

419 for item in coverage_path.iterdir(): 

420 if item.is_file(): 

421 shutil.copy2(item, coverage_output_dir / item.name) 

422 elif item.is_dir(): 

423 shutil.copytree( 

424 item, coverage_output_dir / item.name, dirs_exist_ok=True 

425 ) 

426 else: 

427 # Create styled placeholder coverage index if no artifacts provided 

428 placeholder_html = ( 

429 '<section class="py-5"><div class="container">' 

430 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>' 

431 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>' 

432 "</div></section>" 

433 ) 

434 self.build_page( 

435 "base.html", 

436 "coverage/index.html", 

437 "Coverage Reports", 

438 "Test coverage analysis", 

439 "coverage/index.html", 

440 content=placeholder_html, 

441 ) 

442 

443 # Generate directory indexes 

444 self.generate_directory_indexes() 

445 

446 # Generate SEO files 

447 # Build a dynamic sitemap including all HTML pages 

448 try: 

449 self.generate_dynamic_sitemap() 

450 except Exception as e: 

451 print(f"⚠️ Failed to generate dynamic sitemap: {e}") 

452 

453 # Always (re)write robots.txt pointing to the sitemap 

454 try: 

455 self.generate_robots_file() 

456 except Exception as e: 

457 print(f"⚠️ Failed to generate robots.txt: {e}") 

458 

459 # Create .nojekyll file for GitHub Pages 

460 nojekyll_path = self.output_dir / ".nojekyll" 

461 nojekyll_path.touch() 

462 print("📄 Created .nojekyll file") 

463 

464 print("✅ Website build completed successfully!") 

465 

466 def build_docs_nav(self) -> dict: 

467 """Build documentation navigation structure.""" 

468 # Simplified navigation building 

469 docs_dir = Path("docs") 

470 if not docs_dir.exists(): 

471 return {} 

472 

473 nav_data = {"title": "Documentation", "children": []} 

474 

475 for item in sorted(docs_dir.iterdir()): 

476 if item.is_file() and item.suffix == ".md": 

477 nav_data["children"].append( 

478 { 

479 "title": self._humanize_title(item.stem), 

480 "url": f"docs/{item.name}", 

481 } 

482 ) 

483 elif item.is_dir(): 

484 nav_data["children"].append( 

485 { 

486 "title": self._humanize_title(item.name), 

487 "url": f"docs/{item.name}/", 

488 } 

489 ) 

490 

491 self.docs_nav_data = nav_data 

492 return nav_data 

493 

494 def generate_seo_files(self) -> None: 

495 """Generate SEO files like sitemap.xml and robots.txt.""" 

496 from datetime import datetime 

497 

498 # Determine base site URL 

499 site_base = ( 

500 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

501 ) 

502 

503 # Get current date for lastmod 

504 current_date = datetime.now().strftime("%Y-%m-%d") 

505 

506 # Generate simple sitemap.xml 

507 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?> 

508<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> 

509 <url> 

510 <loc>{site_base}/</loc> 

511 <lastmod>{current_date}</lastmod> 

512 <changefreq>weekly</changefreq> 

513 <priority>1.0</priority> 

514 </url> 

515 <url> 

516 <loc>{site_base}/docs/</loc> 

517 <lastmod>{current_date}</lastmod> 

518 <changefreq>weekly</changefreq> 

519 <priority>0.8</priority> 

520 </url> 

521</urlset>""" 

522 

523 sitemap_path = self.output_dir / "sitemap.xml" 

524 with open(sitemap_path, "w", encoding="utf-8") as f: 

525 f.write(sitemap_content) 

526 print("📄 Generated sitemap.xml") 

527 

528 # Generate simple robots.txt 

529 robots_content = f"""User-agent: * 

530Allow: / 

531 

532Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml 

533""" 

534 

535 robots_path = self.output_dir / "robots.txt" 

536 with open(robots_path, "w", encoding="utf-8") as f: 

537 f.write(robots_content.replace("https://example.com", site_base)) 

538 print("📄 Generated robots.txt") 

539 

540 def generate_robots_file(self) -> None: 

541 """Generate only robots.txt referencing the sitemap URL.""" 

542 site_base = ( 

543 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

544 ) 

545 robots_content = f"""User-agent: * 

546Allow: / 

547 

548Sitemap: {site_base}/sitemap.xml 

549""" 

550 robots_path = self.output_dir / "robots.txt" 

551 with open(robots_path, "w", encoding="utf-8") as f: 

552 f.write(robots_content) 

553 print("📄 Generated robots.txt") 

554 

555 def generate_dynamic_sitemap( 

556 self, date: str = None, pages: list[str] = None 

557 ) -> str: 

558 """Generate dynamic sitemap with custom pages.""" 

559 from datetime import datetime 

560 

561 base_url = ( 

562 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

563 ) 

564 

565 # Auto-discover pages if not provided 

566 if pages is None: 

567 pages = [] 

568 # Find HTML files in site directory 

569 if self.output_dir.exists(): 

570 for html_file in self.output_dir.rglob("*.html"): 

571 rel_path = str(html_file.relative_to(self.output_dir)) 

572 pages.append(rel_path) 

573 

574 # Use provided date or current date 

575 if date is None: 

576 date = datetime.now().strftime("%Y-%m-%d") 

577 

578 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n' 

579 sitemap_content += ( 

580 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n' 

581 ) 

582 

583 for page in pages: 

584 sitemap_content += " <url>\n" 

585 sitemap_content += f" <loc>{base_url}/{page}</loc>\n" 

586 sitemap_content += f" <lastmod>{date}</lastmod>\n" 

587 sitemap_content += " <changefreq>weekly</changefreq>\n" 

588 sitemap_content += " <priority>0.8</priority>\n" 

589 sitemap_content += " </url>\n" 

590 

591 sitemap_content += "</urlset>" 

592 

593 # Write sitemap to file 

594 sitemap_path = self.output_dir / "sitemap.xml" 

595 sitemap_path.parent.mkdir(parents=True, exist_ok=True) 

596 with open(sitemap_path, "w", encoding="utf-8") as f: 

597 f.write(sitemap_content) 

598 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages") 

599 

600 return sitemap_content 

601 

602 def build_markdown_page( 

603 self, 

604 markdown_file: str, 

605 output_path: str, 

606 title: str = "", 

607 breadcrumb: str = "", 

608 **kwargs, 

609 ) -> None: 

610 """Build a page from markdown file.""" 

611 markdown_path = Path(markdown_file) 

612 if not markdown_path.exists(): 

613 print( 

614 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation" 

615 ) 

616 return 

617 

618 try: 

619 with open(markdown_path, encoding="utf-8") as f: 

620 markdown_content = f.read() 

621 except Exception as e: 

622 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}") 

623 return 

624 

625 # Extract title if not provided 

626 if not title: 

627 title = self.extract_title_from_markdown(markdown_content) 

628 

629 # Normalize links in markdown before conversion 

630 markdown_content = self.markdown_processor.convert_markdown_links_to_html( 

631 markdown_content, str(markdown_path) 

632 ) 

633 

634 # Convert markdown to HTML 

635 html_content = self.markdown_to_html( 

636 markdown_content, str(markdown_path), output_path 

637 ) 

638 # Normalize any remaining HTML hrefs 

639 html_content = self.markdown_processor.convert_markdown_links_to_html( 

640 html_content, str(markdown_path) 

641 ) 

642 

643 # Build a Table of Contents and wrap in docs layout 

644 toc_html = self.render_toc(html_content) 

645 if toc_html: 

646 toc_html = self.add_bootstrap_classes(toc_html) 

647 

648 wrapped_content = f""" 

649<section> 

650 <div class=\"container-fluid\"> 

651 <div class=\"row toc-layout\"> 

652 <aside class=\"toc-sidebar d-none d-lg-block p-0\"> 

653 <div class=\"position-sticky\"> 

654 {toc_html or '<div class=\"text-muted small\">No sections</div>'} 

655 </div> 

656 </aside> 

657 <div class=\"container-content\"> 

658 {html_content} 

659 </div> 

660 </div> 

661</div> 

662</section> 

663""" 

664 

665 # Build the page 

666 self.build_page( 

667 "base.html", 

668 output_path, 

669 title, 

670 f"{title} - QDrant Loader", 

671 output_path, 

672 content=wrapped_content, 

673 breadcrumb=breadcrumb, 

674 **kwargs, 

675 ) 

676 

677 def build_docs_structure(self) -> dict: 

678 """Build documentation directory structure.""" 

679 docs_dir = Path("docs") 

680 structure = {"title": "Documentation", "children": []} 

681 

682 # Create docs output directory 

683 docs_output_dir = self.output_dir / "docs" 

684 docs_output_dir.mkdir(parents=True, exist_ok=True) 

685 

686 if not docs_dir.exists(): 

687 return structure 

688 

689 # Process all markdown files in docs 

690 for item in sorted(docs_dir.rglob("*.md")): 

691 relative_path = str(item.relative_to(docs_dir)) 

692 output_path = relative_path.replace(".md", ".html") 

693 

694 structure["children"].append( 

695 { 

696 "title": self._humanize_title(item.stem), 

697 "path": relative_path, 

698 "url": f"docs/{output_path}", 

699 } 

700 ) 

701 

702 # Build the page from markdown 

703 try: 

704 self.build_markdown_page( 

705 str(item), 

706 f"docs/{output_path}", 

707 title=self._humanize_title(item.stem), 

708 ) 

709 except Exception as e: 

710 print(f"⚠️ Failed to build docs page {item}: {e}") 

711 

712 return structure 

713 

714 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict: 

715 """Build coverage report structure.""" 

716 # Always create coverage output directory 

717 coverage_output_dir = self.output_dir / "coverage" 

718 coverage_output_dir.mkdir(parents=True, exist_ok=True) 

719 

720 if not coverage_dir: 

721 return {"coverage_reports": []} 

722 

723 coverage_path = Path(coverage_dir) 

724 if not coverage_path.exists(): 

725 return {"coverage_reports": []} 

726 

727 # Copy all coverage files with proper naming 

728 import shutil 

729 

730 for item in coverage_path.iterdir(): 

731 # Map directory names to cleaner package names 

732 dest_name = item.name 

733 if item.is_dir(): 

734 if "htmlcov-loader" in item.name: 

735 dest_name = "loader" 

736 elif "htmlcov-mcp" in item.name: 

737 dest_name = "mcp" 

738 elif "htmlcov-website" in item.name: 

739 dest_name = "website" 

740 elif ( 

741 "htmlcov-core" in item.name 

742 or "htmlcov-qdrant-loader-core" in item.name 

743 ): 

744 dest_name = "core" 

745 elif "htmlcov" in item.name: 

746 dest_name = item.name.replace("htmlcov-", "").replace( 

747 "htmlcov_", "" 

748 ) 

749 

750 dest_path = coverage_output_dir / dest_name 

751 try: 

752 if item.is_file(): 

753 shutil.copy2(item, dest_path) 

754 elif item.is_dir(): 

755 if dest_path.exists(): 

756 shutil.rmtree(dest_path) 

757 shutil.copytree(item, dest_path) 

758 print(f"📁 Copied coverage: {item.name} -> {dest_name}") 

759 except Exception as e: 

760 print(f"⚠️ Failed to copy coverage file {item}: {e}") 

761 

762 # Build reports list using the renamed directories 

763 reports = [] 

764 for subdir in coverage_output_dir.iterdir(): 

765 if subdir.is_dir(): 

766 index_file = subdir / "index.html" 

767 if index_file.exists(): 

768 reports.append( 

769 { 

770 "name": subdir.name, 

771 "path": f"{subdir.name}/index.html", 

772 "url": f"coverage/{subdir.name}/index.html", 

773 } 

774 ) 

775 

776 # Create main coverage index page using site template when reports exist 

777 if reports: 

778 # Build coverage index with Bootstrap styling 

779 index_content = """ 

780<section class=\"py-5\"> 

781 <div class=\"container\"> 

782 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1> 

783 <div class=\"row g-4\">""" 

784 

785 for report in reports: 

786 if report["name"] == "loader": 

787 index_content += """ 

788 <div class="col-lg-6"> 

789 <div class="card"> 

790 <div class="card-header"> 

791 <h4>QDrant Loader Core</h4> 

792 <span id="loader-test-indicator" class="badge">Loading...</span> 

793 </div> 

794 <div class="card-body"> 

795 <div id="loader-coverage">Loader coverage data</div> 

796 <a href="loader/" class="btn btn-primary">View Detailed Report</a> 

797 </div> 

798 </div> 

799 </div>""" 

800 elif report["name"] == "mcp": 

801 index_content += """ 

802 <div class="col-lg-6"> 

803 <div class="card"> 

804 <div class="card-header"> 

805 <h4>MCP Server</h4> 

806 <span id="mcp-test-indicator" class="badge">Loading...</span> 

807 </div> 

808 <div class="card-body"> 

809 <div id="mcp-coverage">MCP Server coverage data</div> 

810 <a href="mcp/" class="btn btn-success">View Detailed Report</a> 

811 </div> 

812 </div> 

813 </div>""" 

814 elif report["name"] == "website": 

815 index_content += """ 

816 <div class="col-lg-6"> 

817 <div class="card"> 

818 <div class="card-header"> 

819 <h4>Website</h4> 

820 <span id="website-test-indicator" class="badge">Loading...</span> 

821 </div> 

822 <div class="card-body"> 

823 <div id="website-coverage">Website coverage data</div> 

824 <a href="website/" class="btn btn-info">View Detailed Report</a> 

825 </div> 

826 </div> 

827 </div>""" 

828 elif report["name"] == "core": 

829 index_content += """ 

830 <div class="col-lg-6"> 

831 <div class="card"> 

832 <div class="card-header"> 

833 <h4>Core Library</h4> 

834 <span id="core-test-indicator" class="badge">Loading...</span> 

835 </div> 

836 <div class="card-body"> 

837 <div id="core-coverage">Core library coverage data</div> 

838 <a href="core/" class="btn btn-warning">View Detailed Report</a> 

839 </div> 

840 </div> 

841 </div>""" 

842 

843 index_content += """ 

844 </div> 

845 </div> 

846</section> 

847 

848<script> 

849// Compute and render coverage summary from status.json 

850function coverageSummary(data){ 

851 try{ 

852 let total = 0, missing = 0; 

853 if (data && data.files){ 

854 for (const k in data.files){ 

855 const f = data.files[k]; 

856 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null); 

857 if (nums && typeof nums.n_statements === 'number'){ 

858 total += (nums.n_statements||0); 

859 missing += (nums.n_missing||0); 

860 } 

861 } 

862 } 

863 // Fallback if a totals object exists 

864 if (total === 0 && data && data.totals){ 

865 if (typeof data.totals.n_statements === 'number'){ 

866 total = data.totals.n_statements||0; 

867 missing = data.totals.n_missing||0; 

868 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){ 

869 total = data.totals.num_statements; 

870 missing = total - data.totals.covered_lines; 

871 } 

872 } 

873 if (total > 0){ 

874 const covered = Math.max(0, total - missing); 

875 const pct = Math.round((covered/total)*1000)/10; // one decimal 

876 return {pct, covered, total}; 

877 } 

878 } catch(e){} 

879 return null; 

880} 

881 

882function renderCoverage(id, summary){ 

883 const el = document.getElementById(id); 

884 if (!el) return; 

885 if (!summary){ el.textContent = 'Loaded'; return; } 

886 const {pct, covered, total} = summary; 

887 el.innerHTML = ` 

888 <div class="d-flex align-items-center"> 

889 <div class="progress flex-grow-1 me-2" style="height: 10px;"> 

890 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div> 

891 </div> 

892 <span class="small fw-semibold">${pct}% (${covered}/${total})</span> 

893 </div>`; 

894} 

895 

896fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{}); 

897fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{}); 

898fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{}); 

899fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{}); 

900</script> 

901""" 

902 # Render through site template for full styling/navigation 

903 self.build_page( 

904 "base.html", 

905 "coverage/index.html", 

906 "Coverage Reports", 

907 "Test coverage analysis", 

908 "coverage/index.html", 

909 content=index_content, 

910 ) 

911 print("📄 Generated coverage index.html") 

912 

913 return {"coverage_reports": reports} 

914 

915 def build_package_docs(self) -> None: 

916 """Build documentation pages from package README files into docs/packages. 

917 

918 Maps package README.md files to site docs under: 

919 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html 

920 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html 

921 - packages/qdrant-loader-core -> docs/packages/core/README.html 

922 """ 

923 package_mappings: list[tuple[str, str, str]] = [ 

924 ("qdrant-loader", "qdrant-loader", "QDrant Loader"), 

925 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"), 

926 ("qdrant-loader-core", "core", "Core Library"), 

927 ] 

928 

929 for pkg_name, alias, display_name in package_mappings: 

930 readme_path = Path("packages") / pkg_name / "README.md" 

931 if not readme_path.exists(): 

932 continue 

933 

934 try: 

935 with open(readme_path, encoding="utf-8") as f: 

936 markdown_content = f.read() 

937 

938 # Normalize links in markdown before conversion 

939 normalized_md = self.markdown_processor.convert_markdown_links_to_html( 

940 markdown_content 

941 ) 

942 

943 html_content = self.markdown_to_html( 

944 normalized_md, 

945 str(readme_path), 

946 f"docs/packages/{alias}/README.html", 

947 ) 

948 # Normalize any remaining HTML hrefs 

949 html_content = self.markdown_processor.convert_markdown_links_to_html( 

950 html_content, str(readme_path), f"docs/packages/{alias}/README.html" 

951 ) 

952 

953 # Final hardening for package README links: collapse relative ../../docs to /docs 

954 try: 

955 html_content = re.sub( 

956 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content 

957 ) 

958 # Convert README root files and .md links under docs to .html 

959 html_content = re.sub( 

960 r'href="(?:\.{2}/)+CONTRIBUTING\.md"', 

961 'href="/docs/CONTRIBUTING.html"', 

962 html_content, 

963 ) 

964 html_content = re.sub( 

965 r'href="(?:\.{2}/)+LICENSE(\.html)?"', 

966 'href="/docs/LICENSE.html"', 

967 html_content, 

968 ) 

969 html_content = re.sub( 

970 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"', 

971 r'href="/docs/\1.html\2"', 

972 html_content, 

973 ) 

974 except Exception: 

975 pass 

976 

977 # Build a Table of Contents and wrap with standard docs layout for consistent look 

978 toc_html = self.render_toc(html_content) 

979 if toc_html: 

980 toc_html = self.add_bootstrap_classes(toc_html) 

981 

982 wrapped_content = f""" 

983<section> 

984 <div class=\"container-fluid\"> 

985 <div class=\"row toc-layout\"> 

986 <aside class=\"toc-sidebar d-none d-lg-block p-0\"> 

987 <div class=\"position-sticky\"> 

988 {toc_html or '<div class=\"text-muted small\">No sections</div>'} 

989 </div> 

990 </aside> 

991 <div class=\"container-content\"> 

992 {html_content} 

993 </div> 

994 </div> 

995 </div> 

996</section> 

997""" 

998 

999 output_path = f"docs/packages/{alias}/README.html" 

1000 self.build_page( 

1001 "base.html", 

1002 output_path, 

1003 f"{display_name} - README", 

1004 f"{display_name} Documentation", 

1005 output_path, 

1006 content=wrapped_content, 

1007 ) 

1008 except Exception as e: 

1009 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}") 

1010 

1011 def generate_directory_indexes(self) -> None: 

1012 """Generate index files for directories.""" 

1013 # Look in both source docs and output site docs directories 

1014 source_docs_dir = Path("docs") 

1015 site_docs_dir = self.output_dir / "docs" 

1016 

1017 # Process directories in both locations 

1018 for docs_dir in [source_docs_dir, site_docs_dir]: 

1019 if not docs_dir.exists(): 

1020 continue 

1021 

1022 for directory in docs_dir.rglob("*"): 

1023 if directory.is_dir(): 

1024 # Look for README or index files in various formats 

1025 readme_md = directory / "README.md" 

1026 readme_html = directory / "README.html" 

1027 index_md = directory / "index.md" 

1028 index_html = directory / "index.html" 

1029 

1030 # Determine source file 

1031 source_file = None 

1032 if readme_md.exists(): 

1033 source_file = readme_md 

1034 elif index_md.exists(): 

1035 source_file = index_md 

1036 elif readme_html.exists(): 

1037 source_file = readme_html 

1038 elif index_html.exists(): 

1039 source_file = index_html 

1040 

1041 if source_file: 

1042 try: 

1043 if docs_dir == site_docs_dir: 

1044 # For files in site directory, create/overwrite index.html directly there 

1045 index_file = directory / "index.html" 

1046 if source_file.suffix == ".html": 

1047 # Copy HTML file content directly (always overwrite to avoid stale links) 

1048 content = source_file.read_text(encoding="utf-8") 

1049 index_file.write_text(content, encoding="utf-8") 

1050 print( 

1051 f"📄 Generated index.html from {source_file.name}" 

1052 ) 

1053 else: 

1054 # For source files, process through normal build pipeline 

1055 relative_dir = directory.relative_to(docs_dir) 

1056 output_path = f"docs/{relative_dir}/index.html" 

1057 

1058 if source_file.suffix == ".html": 

1059 # Copy HTML file content directly 

1060 content = source_file.read_text(encoding="utf-8") 

1061 self.build_page( 

1062 "base.html", 

1063 output_path, 

1064 self._humanize_title(directory.name), 

1065 f"{self._humanize_title(directory.name)} Documentation", 

1066 output_path, 

1067 content=content, 

1068 ) 

1069 else: 

1070 # Process markdown file 

1071 self.build_markdown_page( 

1072 str(source_file), 

1073 output_path, 

1074 title=self._humanize_title(directory.name), 

1075 ) 

1076 except Exception as e: 

1077 print(f"⚠️ Failed to generate index for {directory}: {e}") 

1078 

1079 def build_license_page( 

1080 self, 

1081 source_file: str = "LICENSE", 

1082 output_file: str = "license.html", 

1083 title: str = "License", 

1084 description: str = "License", 

1085 ) -> None: 

1086 """Build license page from LICENSE file.""" 

1087 license_path = Path(source_file) 

1088 if not license_path.exists(): 

1089 print(f"⚠️ License file not found: {source_file}, skipping license page") 

1090 return 

1091 

1092 try: 

1093 with open(license_path, encoding="utf-8") as f: 

1094 license_content = f.read() 

1095 

1096 # Create license page with heading 

1097 html_content = f""" 

1098 <h1>License Information</h1> 

1099 <div class="license-content"> 

1100 <pre>{license_content}</pre> 

1101 </div> 

1102 """ 

1103 

1104 self.build_page( 

1105 "base.html", 

1106 output_file, 

1107 title, 

1108 description, 

1109 output_file, 

1110 content=html_content, 

1111 ) 

1112 except Exception as e: 

1113 print(f"⚠️ Failed to build license page: {e}")