Coverage for website/builder/core.py: 89%

400 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:03 +0000

1""" 

2Core Website Builder - Main Orchestration and Lifecycle Management. 

3 

4This module implements the main WebsiteBuilder class that orchestrates 

5all build operations and manages the overall build lifecycle. 

6""" 

7 

8import json 

9import re 

10import subprocess 

11from pathlib import Path 

12 

13from .assets import AssetManager 

14from .markdown import MarkdownProcessor 

15from .templates import TemplateProcessor 

16 

17 

18class WebsiteBuilder: 

19 """Builds the QDrant Loader documentation website from templates.""" 

20 

21 def __init__( 

22 self, templates_dir: str = "website/templates", output_dir: str = "site" 

23 ): 

24 """Initialize the website builder.""" 

25 self.templates_dir = Path(templates_dir) 

26 self.output_dir = Path(output_dir) 

27 self.base_url = "" 

28 # Cached docs navigation data (built once per run) 

29 self.docs_nav_data: dict | None = None 

30 

31 # Initialize component processors 

32 self.template_processor = TemplateProcessor(templates_dir) 

33 self.markdown_processor = MarkdownProcessor() 

34 self.asset_manager = AssetManager(output_dir) 

35 

36 # Delegate core operations to specialized processors 

37 def load_template(self, template_name: str) -> str: 

38 """Load a template file.""" 

39 return self.template_processor.load_template(template_name) 

40 

41 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str: 

42 """Replace placeholders in content with actual values.""" 

43 return self.template_processor.replace_placeholders(content, replacements) 

44 

45 def markdown_to_html( 

46 self, markdown_content: str, source_file: str = "", output_file: str = "" 

47 ) -> str: 

48 """Convert markdown to HTML with Bootstrap styling.""" 

49 return self.markdown_processor.markdown_to_html( 

50 markdown_content, source_file, output_file 

51 ) 

52 

53 def copy_assets(self) -> None: 

54 """Copy all website assets to output directory.""" 

55 return self.asset_manager.copy_assets() 

56 

57 def extract_title_from_markdown(self, markdown_content: str) -> str: 

58 """Extract title from markdown content.""" 

59 return self.markdown_processor.extract_title_from_markdown(markdown_content) 

60 

61 # Additional markdown processing methods 

62 def basic_markdown_to_html(self, markdown_content: str) -> str: 

63 """Basic markdown to HTML conversion.""" 

64 return self.markdown_processor.basic_markdown_to_html(markdown_content) 

65 

66 def convert_markdown_links_to_html( 

67 self, markdown_content: str, source_file: str = "", target_dir: str = "" 

68 ) -> str: 

69 """Convert markdown links to HTML format.""" 

70 return self.markdown_processor.convert_markdown_links_to_html( 

71 markdown_content, source_file, target_dir 

72 ) 

73 

74 def add_bootstrap_classes(self, html_content: str) -> str: 

75 """Add Bootstrap classes to HTML elements.""" 

76 return self.markdown_processor.add_bootstrap_classes(html_content) 

77 

78 def render_toc(self, html_content: str) -> str: 

79 """Generate table of contents from HTML headings.""" 

80 return self.markdown_processor.render_toc(html_content) 

81 

82 # Additional asset management methods 

83 def copy_static_files(self, static_files: list[str]) -> None: 

84 """Copy multiple static files.""" 

85 return self.asset_manager.copy_static_files(static_files) 

86 

87 def get_git_timestamp(self, source_path: str) -> str: 

88 """Get the last modified timestamp from Git.""" 

89 try: 

90 result = subprocess.run( 

91 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path], 

92 capture_output=True, 

93 text=True, 

94 cwd=".", 

95 ) 

96 if result.returncode == 0 and result.stdout.strip(): 

97 return result.stdout.strip() 

98 except (subprocess.CalledProcessError, FileNotFoundError): 

99 pass 

100 return "" 

101 

102 def _humanize_title(self, name: str) -> str: 

103 """Convert filename to human-readable title.""" 

104 # Remove file extension and common prefixes 

105 title = ( 

106 name.replace(".md", "") 

107 .replace("README", "") 

108 .replace("_", " ") 

109 .replace("-", " ") 

110 ) 

111 

112 # Handle common patterns 

113 title_mappings = { 

114 "cli reference": "CLI Reference", 

115 "api": "API", 

116 "faq": "FAQ", 

117 "toc": "Table of Contents", 

118 "readme": "Overview", 

119 } 

120 

121 title_lower = title.lower().strip() 

122 if title_lower in title_mappings: 

123 return title_mappings[title_lower] 

124 

125 # Capitalize words 

126 return " ".join(word.capitalize() for word in title.split()) 

127 

128 def generate_project_info(self, **kwargs) -> dict: 

129 """Generate project information for templates.""" 

130 project_info = { 

131 "name": "QDrant Loader", 

132 "version": "0.4.0b1", 

133 "description": "Enterprise-ready vector database toolkit", 

134 "github_url": "https://github.com/martin-papy/qdrant-loader", 

135 } 

136 

137 # Override with any provided kwargs 

138 project_info.update(kwargs) 

139 

140 # Try to load from pyproject.toml 

141 try: 

142 import tomli 

143 

144 with open("pyproject.toml", "rb") as f: 

145 pyproject = tomli.load(f) 

146 project_section = pyproject.get("project", {}) 

147 project_info.update( 

148 { 

149 "name": project_section.get("name", project_info["name"]), 

150 "version": project_section.get( 

151 "version", project_info["version"] 

152 ), 

153 "description": project_section.get( 

154 "description", project_info["description"] 

155 ), 

156 } 

157 ) 

158 # Normalize workspace naming to product name 

159 if isinstance(project_info.get("name"), str) and project_info[ 

160 "name" 

161 ].endswith("-workspace"): 

162 project_info["name"] = "QDrant Loader" 

163 

164 # Try to get homepage/repository from pyproject urls 

165 urls = ( 

166 project_section.get("urls", {}) 

167 if isinstance(project_section, dict) 

168 else {} 

169 ) 

170 homepage = urls.get("Homepage") 

171 if ( 

172 homepage 

173 and not getattr(self, "base_url_user_set", False) 

174 and not self.base_url 

175 ): 

176 # Set base_url from pyproject if not provided externally 

177 self.base_url = homepage.rstrip("/") 

178 repo_url = urls.get("Repository") or urls.get("Source") 

179 if repo_url: 

180 project_info["github_url"] = repo_url 

181 except Exception: 

182 # Ignore malformed project section entries 

183 pass 

184 

185 # Try to get git information 

186 try: 

187 import subprocess 

188 

189 # Get git commit hash 

190 result = subprocess.run( 

191 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True 

192 ) 

193 project_info["commit_hash"] = result.stdout.strip() 

194 

195 # Get git commit date 

196 result = subprocess.run( 

197 ["git", "log", "-1", "--format=%ci"], 

198 capture_output=True, 

199 text=True, 

200 check=True, 

201 ) 

202 project_info["commit_date"] = result.stdout.strip() 

203 

204 except (subprocess.CalledProcessError, FileNotFoundError): 

205 # Git not available or not a git repository 

206 pass 

207 

208 # Add build metadata 

209 from datetime import datetime 

210 

211 commit_hash = project_info.get("commit_hash", "") 

212 project_info["commit"] = { 

213 "hash": commit_hash, 

214 "short": commit_hash[:7] if isinstance(commit_hash, str) else "", 

215 "date": project_info.get("commit_date", ""), 

216 } 

217 project_info["build"] = { 

218 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z" 

219 } 

220 

221 # Write project info JSON file 

222 project_info_path = self.output_dir / "project-info.json" 

223 project_info_path.parent.mkdir(parents=True, exist_ok=True) 

224 with open(project_info_path, "w", encoding="utf-8") as f: 

225 json.dump(project_info, f, indent=2) 

226 

227 return project_info 

228 

229 def build_page( 

230 self, 

231 template_name: str, 

232 output_filename: str, 

233 title: str, 

234 description: str, 

235 canonical_path: str, 

236 content: str = "", 

237 **extra_replacements, 

238 ) -> None: 

239 """Build a single page from template.""" 

240 template_content = self.load_template(template_name) 

241 

242 # Load a content template if available when no explicit content is given. 

243 # For pages where output and canonical differ, missing content should raise. 

244 # For pages where they are the same (e.g., index.html), load content if 

245 # the template exists, otherwise fall back to empty content. 

246 if not content: 

247 try: 

248 content = self.load_template(output_filename) 

249 except FileNotFoundError: 

250 if output_filename != canonical_path: 

251 # Maintain behavior for explicit content templates 

252 raise 

253 # Otherwise, leave content empty 

254 

255 project_info = self.generate_project_info() 

256 

257 # Calculate base URL for relative paths 

258 if canonical_path.count("/") > 0: 

259 base_url = "../" * canonical_path.count("/") 

260 else: 

261 # Normalize root base URL 

262 if self.base_url: 

263 base_url = self.base_url.rstrip("/") + "/" 

264 else: 

265 base_url = "./" 

266 

267 # Merge extra replacements ensuring defaults for optional placeholders 

268 extras = dict(extra_replacements) 

269 extras.setdefault("additional_head", "") 

270 extras.setdefault("additional_scripts", "") 

271 

272 replacements = { 

273 "page_title": title, 

274 "page_description": description, 

275 "content": content, 

276 "base_url": base_url, 

277 "canonical_url": ( 

278 self.base_url.rstrip("/") + "/" + canonical_path 

279 if self.base_url 

280 else canonical_path 

281 ), 

282 "author": project_info.get("name", "QDrant Loader"), 

283 "version": project_info.get("version", "0.4.0b1"), 

284 "project_name": project_info["name"], 

285 "project_version": project_info["version"], 

286 "project_description": project_info["description"], 

287 **extras, 

288 } 

289 

290 final_content = self.replace_placeholders(template_content, replacements) 

291 

292 output_path = self.output_dir / output_filename 

293 output_path.parent.mkdir(parents=True, exist_ok=True) 

294 

295 with open(output_path, "w", encoding="utf-8") as f: 

296 f.write(final_content) 

297 

298 print(f"📄 Built {output_filename}") 

299 

300 def build_site( 

301 self, 

302 coverage_artifacts_dir: str | None = None, 

303 test_results_dir: str | None = None, 

304 ) -> None: 

305 """Build the complete website.""" 

306 print("🏗️ Building QDrant Loader website...") 

307 

308 # Create output directory 

309 self.output_dir.mkdir(parents=True, exist_ok=True) 

310 

311 # Copy assets first 

312 self.copy_assets() 

313 

314 # Generate project info 

315 self.generate_project_info() 

316 

317 # Build main pages 

318 self.build_page( 

319 "base.html", 

320 "index.html", 

321 "Home", 

322 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.", 

323 "index.html", 

324 ) 

325 

326 # Build a friendly 404 page 

327 try: 

328 self.build_page( 

329 "base.html", 

330 "404.html", 

331 "Page Not Found", 

332 "The page you are looking for does not exist.", 

333 "404.html", 

334 content=self.load_template("404.html"), 

335 ) 

336 except Exception as e: 

337 print(f"⚠️ Failed to build 404 page: {e}") 

338 

339 # Build docs structure and pages 

340 self.build_docs_nav() 

341 _docs_structure = self.build_docs_structure() 

342 

343 # Create docs directory and index 

344 docs_output_dir = self.output_dir / "docs" 

345 docs_output_dir.mkdir(exist_ok=True) 

346 

347 # Build docs index page using dedicated template content 

348 self.build_page( 

349 "base.html", 

350 "docs/index.html", 

351 "Documentation", 

352 "QDrant Loader Documentation", 

353 "docs/index.html", 

354 content=self.load_template("docs-index.html"), 

355 ) 

356 

357 # Bridge root docs from repository top-level files 

358 try: 

359 if Path("README.md").exists(): 

360 self.build_markdown_page("README.md", "docs/README.html") 

361 if Path("RELEASE_NOTES.md").exists(): 

362 self.build_markdown_page("RELEASE_NOTES.md", "docs/RELEASE_NOTES.html") 

363 if Path("CONTRIBUTING.md").exists(): 

364 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html") 

365 # License (plain text) rendered via helper 

366 if Path("LICENSE").exists(): 

367 self.build_license_page( 

368 "LICENSE", "docs/LICENSE.html", "License", "License" 

369 ) 

370 # Privacy policy page from template 

371 try: 

372 self.build_page( 

373 "base.html", 

374 "privacy-policy.html", 

375 "Privacy Policy", 

376 "Privacy policy for QDrant Loader", 

377 "privacy-policy.html", 

378 content=self.load_template("privacy-policy.html"), 

379 ) 

380 except FileNotFoundError: 

381 pass 

382 except Exception as e: 

383 print(f"⚠️ Failed to build root docs pages: {e}") 

384 

385 # Build package README documentation into docs/packages 

386 try: 

387 self.build_package_docs() 

388 except Exception as e: 

389 print(f"⚠️ Failed to build package docs: {e}") 

390 

391 # Always create coverage directory and ensure index.html exists 

392 coverage_output_dir = self.output_dir / "coverage" 

393 coverage_output_dir.mkdir(exist_ok=True) 

394 

395 # Build coverage reports if provided 

396 if coverage_artifacts_dir: 

397 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir) 

398 

399 # Copy coverage artifacts 

400 coverage_path = Path(coverage_artifacts_dir) 

401 if coverage_path.exists(): 

402 import shutil 

403 

404 for item in coverage_path.iterdir(): 

405 if item.is_file(): 

406 shutil.copy2(item, coverage_output_dir / item.name) 

407 elif item.is_dir(): 

408 shutil.copytree( 

409 item, coverage_output_dir / item.name, dirs_exist_ok=True 

410 ) 

411 else: 

412 # Create styled placeholder coverage index if no artifacts provided 

413 placeholder_html = ( 

414 '<section class="py-5"><div class="container">' 

415 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>' 

416 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>' 

417 "</div></section>" 

418 ) 

419 self.build_page( 

420 "base.html", 

421 "coverage/index.html", 

422 "Coverage Reports", 

423 "Test coverage analysis", 

424 "coverage/index.html", 

425 content=placeholder_html, 

426 ) 

427 

428 # Generate directory indexes 

429 self.generate_directory_indexes() 

430 

431 # Generate SEO files 

432 # Build a dynamic sitemap including all HTML pages 

433 try: 

434 self.generate_dynamic_sitemap() 

435 except Exception as e: 

436 print(f"⚠️ Failed to generate dynamic sitemap: {e}") 

437 

438 # Always (re)write robots.txt pointing to the sitemap 

439 try: 

440 self.generate_robots_file() 

441 except Exception as e: 

442 print(f"⚠️ Failed to generate robots.txt: {e}") 

443 

444 # Create .nojekyll file for GitHub Pages 

445 nojekyll_path = self.output_dir / ".nojekyll" 

446 nojekyll_path.touch() 

447 print("📄 Created .nojekyll file") 

448 

449 print("✅ Website build completed successfully!") 

450 

451 def build_docs_nav(self) -> dict: 

452 """Build documentation navigation structure.""" 

453 # Simplified navigation building 

454 docs_dir = Path("docs") 

455 if not docs_dir.exists(): 

456 return {} 

457 

458 nav_data = {"title": "Documentation", "children": []} 

459 

460 for item in sorted(docs_dir.iterdir()): 

461 if item.is_file() and item.suffix == ".md": 

462 nav_data["children"].append( 

463 { 

464 "title": self._humanize_title(item.stem), 

465 "url": f"docs/{item.name}", 

466 } 

467 ) 

468 elif item.is_dir(): 

469 nav_data["children"].append( 

470 { 

471 "title": self._humanize_title(item.name), 

472 "url": f"docs/{item.name}/", 

473 } 

474 ) 

475 

476 self.docs_nav_data = nav_data 

477 return nav_data 

478 

479 def generate_seo_files(self) -> None: 

480 """Generate SEO files like sitemap.xml and robots.txt.""" 

481 from datetime import datetime 

482 

483 # Determine base site URL 

484 site_base = ( 

485 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

486 ) 

487 

488 # Get current date for lastmod 

489 current_date = datetime.now().strftime("%Y-%m-%d") 

490 

491 # Generate simple sitemap.xml 

492 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?> 

493<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> 

494 <url> 

495 <loc>{site_base}/</loc> 

496 <lastmod>{current_date}</lastmod> 

497 <changefreq>weekly</changefreq> 

498 <priority>1.0</priority> 

499 </url> 

500 <url> 

501 <loc>{site_base}/docs/</loc> 

502 <lastmod>{current_date}</lastmod> 

503 <changefreq>weekly</changefreq> 

504 <priority>0.8</priority> 

505 </url> 

506</urlset>""" 

507 

508 sitemap_path = self.output_dir / "sitemap.xml" 

509 with open(sitemap_path, "w", encoding="utf-8") as f: 

510 f.write(sitemap_content) 

511 print("📄 Generated sitemap.xml") 

512 

513 # Generate simple robots.txt 

514 robots_content = f"""User-agent: * 

515Allow: / 

516 

517Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml 

518""" 

519 

520 robots_path = self.output_dir / "robots.txt" 

521 with open(robots_path, "w", encoding="utf-8") as f: 

522 f.write(robots_content.replace("https://example.com", site_base)) 

523 print("📄 Generated robots.txt") 

524 

525 def generate_robots_file(self) -> None: 

526 """Generate only robots.txt referencing the sitemap URL.""" 

527 site_base = ( 

528 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

529 ) 

530 robots_content = f"""User-agent: * 

531Allow: / 

532 

533Sitemap: {site_base}/sitemap.xml 

534""" 

535 robots_path = self.output_dir / "robots.txt" 

536 with open(robots_path, "w", encoding="utf-8") as f: 

537 f.write(robots_content) 

538 print("📄 Generated robots.txt") 

539 

540 def generate_dynamic_sitemap( 

541 self, date: str = None, pages: list[str] = None 

542 ) -> str: 

543 """Generate dynamic sitemap with custom pages.""" 

544 from datetime import datetime 

545 

546 base_url = ( 

547 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net" 

548 ) 

549 

550 # Auto-discover pages if not provided 

551 if pages is None: 

552 pages = [] 

553 # Find HTML files in site directory 

554 if self.output_dir.exists(): 

555 for html_file in self.output_dir.rglob("*.html"): 

556 rel_path = str(html_file.relative_to(self.output_dir)) 

557 pages.append(rel_path) 

558 

559 # Use provided date or current date 

560 if date is None: 

561 date = datetime.now().strftime("%Y-%m-%d") 

562 

563 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n' 

564 sitemap_content += ( 

565 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n' 

566 ) 

567 

568 for page in pages: 

569 sitemap_content += " <url>\n" 

570 sitemap_content += f" <loc>{base_url}/{page}</loc>\n" 

571 sitemap_content += f" <lastmod>{date}</lastmod>\n" 

572 sitemap_content += " <changefreq>weekly</changefreq>\n" 

573 sitemap_content += " <priority>0.8</priority>\n" 

574 sitemap_content += " </url>\n" 

575 

576 sitemap_content += "</urlset>" 

577 

578 # Write sitemap to file 

579 sitemap_path = self.output_dir / "sitemap.xml" 

580 sitemap_path.parent.mkdir(parents=True, exist_ok=True) 

581 with open(sitemap_path, "w", encoding="utf-8") as f: 

582 f.write(sitemap_content) 

583 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages") 

584 

585 return sitemap_content 

586 

587 def build_markdown_page( 

588 self, 

589 markdown_file: str, 

590 output_path: str, 

591 title: str = "", 

592 breadcrumb: str = "", 

593 **kwargs, 

594 ) -> None: 

595 """Build a page from markdown file.""" 

596 markdown_path = Path(markdown_file) 

597 if not markdown_path.exists(): 

598 print( 

599 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation" 

600 ) 

601 return 

602 

603 try: 

604 with open(markdown_path, encoding="utf-8") as f: 

605 markdown_content = f.read() 

606 except Exception as e: 

607 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}") 

608 return 

609 

610 # Extract title if not provided 

611 if not title: 

612 title = self.extract_title_from_markdown(markdown_content) 

613 

614 # Normalize links in markdown before conversion 

615 markdown_content = self.markdown_processor.convert_markdown_links_to_html( 

616 markdown_content, str(markdown_path) 

617 ) 

618 

619 # Convert markdown to HTML 

620 html_content = self.markdown_to_html( 

621 markdown_content, str(markdown_path), output_path 

622 ) 

623 # Normalize any remaining HTML hrefs 

624 html_content = self.markdown_processor.convert_markdown_links_to_html( 

625 html_content, str(markdown_path) 

626 ) 

627 

628 # Build a Table of Contents and wrap in docs layout 

629 toc_html = self.render_toc(html_content) 

630 if toc_html: 

631 toc_html = self.add_bootstrap_classes(toc_html) 

632 

633 wrapped_content = f""" 

634<section class=\"py-5\"> 

635 <div class=\"container\"> 

636 <div class=\"row\"> 

637 <aside class=\"col-lg-3 d-none d-lg-block\"> 

638 <div class=\"position-sticky\" style=\"top: 6rem;\"> 

639 {toc_html or '<div class=\"text-muted small\">No sections</div>'} 

640 </div> 

641 </aside> 

642 <div class=\"col-lg-9\"> 

643 {html_content} 

644 </div> 

645 </div> 

646 </div> 

647</section> 

648""" 

649 

650 # Build the page 

651 self.build_page( 

652 "base.html", 

653 output_path, 

654 title, 

655 f"{title} - QDrant Loader", 

656 output_path, 

657 content=wrapped_content, 

658 breadcrumb=breadcrumb, 

659 **kwargs, 

660 ) 

661 

662 def build_docs_structure(self) -> dict: 

663 """Build documentation directory structure.""" 

664 docs_dir = Path("docs") 

665 structure = {"title": "Documentation", "children": []} 

666 

667 # Create docs output directory 

668 docs_output_dir = self.output_dir / "docs" 

669 docs_output_dir.mkdir(parents=True, exist_ok=True) 

670 

671 if not docs_dir.exists(): 

672 return structure 

673 

674 # Process all markdown files in docs 

675 for item in sorted(docs_dir.rglob("*.md")): 

676 relative_path = str(item.relative_to(docs_dir)) 

677 output_path = relative_path.replace(".md", ".html") 

678 

679 structure["children"].append( 

680 { 

681 "title": self._humanize_title(item.stem), 

682 "path": relative_path, 

683 "url": f"docs/{output_path}", 

684 } 

685 ) 

686 

687 # Build the page from markdown 

688 try: 

689 self.build_markdown_page( 

690 str(item), 

691 f"docs/{output_path}", 

692 title=self._humanize_title(item.stem), 

693 ) 

694 except Exception as e: 

695 print(f"⚠️ Failed to build docs page {item}: {e}") 

696 

697 return structure 

698 

699 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict: 

700 """Build coverage report structure.""" 

701 # Always create coverage output directory 

702 coverage_output_dir = self.output_dir / "coverage" 

703 coverage_output_dir.mkdir(parents=True, exist_ok=True) 

704 

705 if not coverage_dir: 

706 return {"coverage_reports": []} 

707 

708 coverage_path = Path(coverage_dir) 

709 if not coverage_path.exists(): 

710 return {"coverage_reports": []} 

711 

712 # Copy all coverage files with proper naming 

713 import shutil 

714 

715 for item in coverage_path.iterdir(): 

716 # Map directory names to cleaner package names 

717 dest_name = item.name 

718 if item.is_dir(): 

719 if "htmlcov-loader" in item.name: 

720 dest_name = "loader" 

721 elif "htmlcov-mcp" in item.name: 

722 dest_name = "mcp" 

723 elif "htmlcov-website" in item.name: 

724 dest_name = "website" 

725 elif ( 

726 "htmlcov-core" in item.name 

727 or "htmlcov-qdrant-loader-core" in item.name 

728 ): 

729 dest_name = "core" 

730 elif "htmlcov" in item.name: 

731 dest_name = item.name.replace("htmlcov-", "").replace( 

732 "htmlcov_", "" 

733 ) 

734 

735 dest_path = coverage_output_dir / dest_name 

736 try: 

737 if item.is_file(): 

738 shutil.copy2(item, dest_path) 

739 elif item.is_dir(): 

740 if dest_path.exists(): 

741 shutil.rmtree(dest_path) 

742 shutil.copytree(item, dest_path) 

743 print(f"📁 Copied coverage: {item.name} -> {dest_name}") 

744 except Exception as e: 

745 print(f"⚠️ Failed to copy coverage file {item}: {e}") 

746 

747 # Build reports list using the renamed directories 

748 reports = [] 

749 for subdir in coverage_output_dir.iterdir(): 

750 if subdir.is_dir(): 

751 index_file = subdir / "index.html" 

752 if index_file.exists(): 

753 reports.append( 

754 { 

755 "name": subdir.name, 

756 "path": f"{subdir.name}/index.html", 

757 "url": f"coverage/{subdir.name}/index.html", 

758 } 

759 ) 

760 

761 # Create main coverage index page using site template when reports exist 

762 if reports: 

763 # Build coverage index with Bootstrap styling 

764 index_content = """ 

765<section class=\"py-5\"> 

766 <div class=\"container\"> 

767 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1> 

768 <div class=\"row g-4\">""" 

769 

770 for report in reports: 

771 if report["name"] == "loader": 

772 index_content += """ 

773 <div class="col-lg-6"> 

774 <div class="card"> 

775 <div class="card-header"> 

776 <h4>QDrant Loader Core</h4> 

777 <span id="loader-test-indicator" class="badge">Loading...</span> 

778 </div> 

779 <div class="card-body"> 

780 <div id="loader-coverage">Loader coverage data</div> 

781 <a href="loader/" class="btn btn-primary">View Detailed Report</a> 

782 </div> 

783 </div> 

784 </div>""" 

785 elif report["name"] == "mcp": 

786 index_content += """ 

787 <div class="col-lg-6"> 

788 <div class="card"> 

789 <div class="card-header"> 

790 <h4>MCP Server</h4> 

791 <span id="mcp-test-indicator" class="badge">Loading...</span> 

792 </div> 

793 <div class="card-body"> 

794 <div id="mcp-coverage">MCP Server coverage data</div> 

795 <a href="mcp/" class="btn btn-success">View Detailed Report</a> 

796 </div> 

797 </div> 

798 </div>""" 

799 elif report["name"] == "website": 

800 index_content += """ 

801 <div class="col-lg-6"> 

802 <div class="card"> 

803 <div class="card-header"> 

804 <h4>Website</h4> 

805 <span id="website-test-indicator" class="badge">Loading...</span> 

806 </div> 

807 <div class="card-body"> 

808 <div id="website-coverage">Website coverage data</div> 

809 <a href="website/" class="btn btn-info">View Detailed Report</a> 

810 </div> 

811 </div> 

812 </div>""" 

813 elif report["name"] == "core": 

814 index_content += """ 

815 <div class="col-lg-6"> 

816 <div class="card"> 

817 <div class="card-header"> 

818 <h4>Core Library</h4> 

819 <span id="core-test-indicator" class="badge">Loading...</span> 

820 </div> 

821 <div class="card-body"> 

822 <div id="core-coverage">Core library coverage data</div> 

823 <a href="core/" class="btn btn-warning">View Detailed Report</a> 

824 </div> 

825 </div> 

826 </div>""" 

827 

828 index_content += """ 

829 </div> 

830 </div> 

831</section> 

832 

833<script> 

834// Compute and render coverage summary from status.json 

835function coverageSummary(data){ 

836 try{ 

837 let total = 0, missing = 0; 

838 if (data && data.files){ 

839 for (const k in data.files){ 

840 const f = data.files[k]; 

841 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null); 

842 if (nums && typeof nums.n_statements === 'number'){ 

843 total += (nums.n_statements||0); 

844 missing += (nums.n_missing||0); 

845 } 

846 } 

847 } 

848 // Fallback if a totals object exists 

849 if (total === 0 && data && data.totals){ 

850 if (typeof data.totals.n_statements === 'number'){ 

851 total = data.totals.n_statements||0; 

852 missing = data.totals.n_missing||0; 

853 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){ 

854 total = data.totals.num_statements; 

855 missing = total - data.totals.covered_lines; 

856 } 

857 } 

858 if (total > 0){ 

859 const covered = Math.max(0, total - missing); 

860 const pct = Math.round((covered/total)*1000)/10; // one decimal 

861 return {pct, covered, total}; 

862 } 

863 } catch(e){} 

864 return null; 

865} 

866 

867function renderCoverage(id, summary){ 

868 const el = document.getElementById(id); 

869 if (!el) return; 

870 if (!summary){ el.textContent = 'Loaded'; return; } 

871 const {pct, covered, total} = summary; 

872 el.innerHTML = ` 

873 <div class="d-flex align-items-center"> 

874 <div class="progress flex-grow-1 me-2" style="height: 10px;"> 

875 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div> 

876 </div> 

877 <span class="small fw-semibold">${pct}% (${covered}/${total})</span> 

878 </div>`; 

879} 

880 

881fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{}); 

882fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{}); 

883fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{}); 

884fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{}); 

885</script> 

886""" 

887 # Render through site template for full styling/navigation 

888 self.build_page( 

889 "base.html", 

890 "coverage/index.html", 

891 "Coverage Reports", 

892 "Test coverage analysis", 

893 "coverage/index.html", 

894 content=index_content, 

895 ) 

896 print("📄 Generated coverage index.html") 

897 

898 return {"coverage_reports": reports} 

899 

900 def build_package_docs(self) -> None: 

901 """Build documentation pages from package README files into docs/packages. 

902 

903 Maps package README.md files to site docs under: 

904 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html 

905 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html 

906 - packages/qdrant-loader-core -> docs/packages/core/README.html 

907 """ 

908 package_mappings: list[tuple[str, str, str]] = [ 

909 ("qdrant-loader", "qdrant-loader", "QDrant Loader"), 

910 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"), 

911 ("qdrant-loader-core", "core", "Core Library"), 

912 ] 

913 

914 for pkg_name, alias, display_name in package_mappings: 

915 readme_path = Path("packages") / pkg_name / "README.md" 

916 if not readme_path.exists(): 

917 continue 

918 

919 try: 

920 with open(readme_path, encoding="utf-8") as f: 

921 markdown_content = f.read() 

922 

923 # Normalize links in markdown before conversion 

924 normalized_md = self.markdown_processor.convert_markdown_links_to_html( 

925 markdown_content 

926 ) 

927 

928 html_content = self.markdown_to_html( 

929 normalized_md, 

930 str(readme_path), 

931 f"docs/packages/{alias}/README.html", 

932 ) 

933 # Normalize any remaining HTML hrefs 

934 html_content = self.markdown_processor.convert_markdown_links_to_html( 

935 html_content, str(readme_path), f"docs/packages/{alias}/README.html" 

936 ) 

937 

938 # Final hardening for package README links: collapse relative ../../docs to /docs 

939 try: 

940 html_content = re.sub( 

941 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content 

942 ) 

943 # Convert README root files and .md links under docs to .html 

944 html_content = re.sub( 

945 r'href="(?:\.{2}/)+CONTRIBUTING\.md"', 

946 'href="/docs/CONTRIBUTING.html"', 

947 html_content, 

948 ) 

949 html_content = re.sub( 

950 r'href="(?:\.{2}/)+LICENSE(\.html)?"', 

951 'href="/docs/LICENSE.html"', 

952 html_content, 

953 ) 

954 html_content = re.sub( 

955 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"', 

956 r'href="/docs/\1.html\2"', 

957 html_content, 

958 ) 

959 except Exception: 

960 pass 

961 

962 # Build a Table of Contents and wrap with standard docs layout for consistent look 

963 toc_html = self.render_toc(html_content) 

964 if toc_html: 

965 toc_html = self.add_bootstrap_classes(toc_html) 

966 

967 wrapped_content = f""" 

968<section class=\"py-5\"> 

969 <div class=\"container\"> 

970 <div class=\"row\"> 

971 <aside class=\"col-lg-3 d-none d-lg-block\"> 

972 <div class=\"position-sticky\" style=\"top: 6rem;\"> 

973 {toc_html or '<div class=\"text-muted small\">No sections</div>'} 

974 </div> 

975 </aside> 

976 <div class=\"col-lg-9\"> 

977 {html_content} 

978 </div> 

979 </div> 

980 </div> 

981</section> 

982""" 

983 

984 output_path = f"docs/packages/{alias}/README.html" 

985 self.build_page( 

986 "base.html", 

987 output_path, 

988 f"{display_name} - README", 

989 f"{display_name} Documentation", 

990 output_path, 

991 content=wrapped_content, 

992 ) 

993 except Exception as e: 

994 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}") 

995 

996 def generate_directory_indexes(self) -> None: 

997 """Generate index files for directories.""" 

998 # Look in both source docs and output site docs directories 

999 source_docs_dir = Path("docs") 

1000 site_docs_dir = self.output_dir / "docs" 

1001 

1002 # Process directories in both locations 

1003 for docs_dir in [source_docs_dir, site_docs_dir]: 

1004 if not docs_dir.exists(): 

1005 continue 

1006 

1007 for directory in docs_dir.rglob("*"): 

1008 if directory.is_dir(): 

1009 # Look for README or index files in various formats 

1010 readme_md = directory / "README.md" 

1011 readme_html = directory / "README.html" 

1012 index_md = directory / "index.md" 

1013 index_html = directory / "index.html" 

1014 

1015 # Determine source file 

1016 source_file = None 

1017 if readme_md.exists(): 

1018 source_file = readme_md 

1019 elif index_md.exists(): 

1020 source_file = index_md 

1021 elif readme_html.exists(): 

1022 source_file = readme_html 

1023 elif index_html.exists(): 

1024 source_file = index_html 

1025 

1026 if source_file: 

1027 try: 

1028 if docs_dir == site_docs_dir: 

1029 # For files in site directory, create/overwrite index.html directly there 

1030 index_file = directory / "index.html" 

1031 if source_file.suffix == ".html": 

1032 # Copy HTML file content directly (always overwrite to avoid stale links) 

1033 content = source_file.read_text() 

1034 index_file.write_text(content) 

1035 print( 

1036 f"📄 Generated index.html from {source_file.name}" 

1037 ) 

1038 else: 

1039 # For source files, process through normal build pipeline 

1040 relative_dir = directory.relative_to(docs_dir) 

1041 output_path = f"docs/{relative_dir}/index.html" 

1042 

1043 if source_file.suffix == ".html": 

1044 # Copy HTML file content directly 

1045 content = source_file.read_text() 

1046 self.build_page( 

1047 "base.html", 

1048 output_path, 

1049 self._humanize_title(directory.name), 

1050 f"{self._humanize_title(directory.name)} Documentation", 

1051 output_path, 

1052 content=content, 

1053 ) 

1054 else: 

1055 # Process markdown file 

1056 self.build_markdown_page( 

1057 str(source_file), 

1058 output_path, 

1059 title=self._humanize_title(directory.name), 

1060 ) 

1061 except Exception as e: 

1062 print(f"⚠️ Failed to generate index for {directory}: {e}") 

1063 

1064 def build_license_page( 

1065 self, 

1066 source_file: str = "LICENSE", 

1067 output_file: str = "license.html", 

1068 title: str = "License", 

1069 description: str = "License", 

1070 ) -> None: 

1071 """Build license page from LICENSE file.""" 

1072 license_path = Path(source_file) 

1073 if not license_path.exists(): 

1074 print(f"⚠️ License file not found: {source_file}, skipping license page") 

1075 return 

1076 

1077 try: 

1078 with open(license_path, encoding="utf-8") as f: 

1079 license_content = f.read() 

1080 

1081 # Create license page with heading 

1082 html_content = f""" 

1083 <h1>License Information</h1> 

1084 <div class="license-content"> 

1085 <pre>{license_content}</pre> 

1086 </div> 

1087 """ 

1088 

1089 self.build_page( 

1090 "base.html", 

1091 output_file, 

1092 title, 

1093 description, 

1094 output_file, 

1095 content=html_content, 

1096 ) 

1097 except Exception as e: 

1098 print(f"⚠️ Failed to build license page: {e}")