Coverage for website/builder/core.py: 89%

1"""

2Core Website Builder - Main Orchestration and Lifecycle Management.

4This module implements the main WebsiteBuilder class that orchestrates

5all build operations and manages the overall build lifecycle.

6"""

8import json

9import re

10import subprocess

11from pathlib import Path

13from .assets import AssetManager

14from .markdown import MarkdownProcessor

15from .templates import TemplateProcessor

18class WebsiteBuilder:

19 """Builds the QDrant Loader documentation website from templates."""

21 def __init__(

22 self, templates_dir: str = "website/templates", output_dir: str = "site"

23 ):

24 """Initialize the website builder."""

25 self.templates_dir = Path(templates_dir)

26 self.output_dir = Path(output_dir)

27 self.base_url = ""

28 # Cached docs navigation data (built once per run)

29 self.docs_nav_data: dict | None = None

31 # Initialize component processors

32 self.template_processor = TemplateProcessor(templates_dir)

33 self.markdown_processor = MarkdownProcessor()

34 self.asset_manager = AssetManager(output_dir)

36 # Delegate core operations to specialized processors

37 def load_template(self, template_name: str) -> str:

38 """Load a template file."""

39 return self.template_processor.load_template(template_name)

41 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str:

42 """Replace placeholders in content with actual values."""

43 return self.template_processor.replace_placeholders(content, replacements)

45 def markdown_to_html(

46 self, markdown_content: str, source_file: str = "", output_file: str = ""

47 ) -> str:

48 """Convert markdown to HTML with Bootstrap styling."""

49 return self.markdown_processor.markdown_to_html(

50 markdown_content, source_file, output_file

51 )

53 def copy_assets(self) -> None:

54 """Copy all website assets to output directory."""

55 return self.asset_manager.copy_assets()

57 def extract_title_from_markdown(self, markdown_content: str) -> str:

58 """Extract title from markdown content."""

59 return self.markdown_processor.extract_title_from_markdown(markdown_content)

61 # Additional markdown processing methods

62 def basic_markdown_to_html(self, markdown_content: str) -> str:

63 """Basic markdown to HTML conversion."""

64 return self.markdown_processor.basic_markdown_to_html(markdown_content)

66 def convert_markdown_links_to_html(

67 self, markdown_content: str, source_file: str = "", target_dir: str = ""

68 ) -> str:

69 """Convert markdown links to HTML format."""

70 return self.markdown_processor.convert_markdown_links_to_html(

71 markdown_content, source_file, target_dir

72 )

74 def add_bootstrap_classes(self, html_content: str) -> str:

75 """Add Bootstrap classes to HTML elements."""

76 return self.markdown_processor.add_bootstrap_classes(html_content)

78 def render_toc(self, html_content: str) -> str:

79 """Generate table of contents from HTML headings."""

80 return self.markdown_processor.render_toc(html_content)

82 # Additional asset management methods

83 def copy_static_files(self, static_files: list[str]) -> None:

84 """Copy multiple static files."""

85 return self.asset_manager.copy_static_files(static_files)

87 def get_git_timestamp(self, source_path: str) -> str:

88 """Get the last modified timestamp from Git."""

89 try:

90 result = subprocess.run(

91 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path],

92 capture_output=True,

93 text=True,

94 cwd=".",

95 )

96 if result.returncode == 0 and result.stdout.strip():

97 return result.stdout.strip()

98 except (subprocess.CalledProcessError, FileNotFoundError):

99 pass

100 return ""

101

102 def _humanize_title(self, name: str) -> str:

103 """Convert filename to human-readable title."""

104 # Remove file extension and common prefixes

105 title = (

106 name.replace(".md", "")

107 .replace("README", "")

108 .replace("_", " ")

109 .replace("-", " ")

110 )

111

112 # Handle common patterns

113 title_mappings = {

114 "cli reference": "CLI Reference",

115 "api": "API",

116 "faq": "FAQ",

117 "toc": "Table of Contents",

118 "readme": "Overview",

119 }

120

121 title_lower = title.lower().strip()

122 if title_lower in title_mappings:

123 return title_mappings[title_lower]

124

125 # Capitalize words

126 return " ".join(word.capitalize() for word in title.split())

127

128 def generate_project_info(self, **kwargs) -> dict:

129 """Generate project information for templates."""

130 project_info = {

131 "name": "QDrant Loader",

132 "version": "0.4.0b1",

133 "description": "Enterprise-ready vector database toolkit",

134 "github_url": "https://github.com/martin-papy/qdrant-loader",

135 }

136

137 # Override with any provided kwargs

138 project_info.update(kwargs)

139

140 # Try to load from pyproject.toml

141 try:

142 import tomli

143

144 with open("pyproject.toml", "rb") as f:

145 pyproject = tomli.load(f)

146 project_section = pyproject.get("project", {})

147 project_info.update(

148 {

149 "name": project_section.get("name", project_info["name"]),

150 "version": project_section.get(

151 "version", project_info["version"]

152 ),

153 "description": project_section.get(

154 "description", project_info["description"]

155 ),

156 }

157 )

158 # Normalize workspace naming to product name

159 if isinstance(project_info.get("name"), str) and project_info[

160 "name"

161 ].endswith("-workspace"):

162 project_info["name"] = "QDrant Loader"

163

164 # Try to get homepage/repository from pyproject urls

165 urls = (

166 project_section.get("urls", {})

167 if isinstance(project_section, dict)

168 else {}

169 )

170 homepage = urls.get("Homepage")

171 if (

172 homepage

173 and not getattr(self, "base_url_user_set", False)

174 and not self.base_url

175 ):

176 # Set base_url from pyproject if not provided externally

177 self.base_url = homepage.rstrip("/")

178 repo_url = urls.get("Repository") or urls.get("Source")

179 if repo_url:

180 project_info["github_url"] = repo_url

181 except Exception:

182 # Ignore malformed project section entries

183 pass

184

185 # Try to get git information

186 try:

187 import subprocess

188

189 # Get git commit hash

190 result = subprocess.run(

191 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True

192 )

193 project_info["commit_hash"] = result.stdout.strip()

194

195 # Get git commit date

196 result = subprocess.run(

197 ["git", "log", "-1", "--format=%ci"],

198 capture_output=True,

199 text=True,

200 check=True,

201 )

202 project_info["commit_date"] = result.stdout.strip()

203

204 except (subprocess.CalledProcessError, FileNotFoundError):

205 # Git not available or not a git repository

206 pass

207

208 # Add build metadata

209 from datetime import datetime

210

211 commit_hash = project_info.get("commit_hash", "")

212 project_info["commit"] = {

213 "hash": commit_hash,

214 "short": commit_hash[:7] if isinstance(commit_hash, str) else "",

215 "date": project_info.get("commit_date", ""),

216 }

217 project_info["build"] = {

218 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z"

219 }

220

221 # Write project info JSON file

222 project_info_path = self.output_dir / "project-info.json"

223 project_info_path.parent.mkdir(parents=True, exist_ok=True)

224 with open(project_info_path, "w", encoding="utf-8") as f:

225 json.dump(project_info, f, indent=2)

226

227 return project_info

228

229 def build_page(

230 self,

231 template_name: str,

232 output_filename: str,

233 title: str,

234 description: str,

235 canonical_path: str,

236 content: str = "",

237 **extra_replacements,

238 ) -> None:

239 """Build a single page from template."""

240 template_content = self.load_template(template_name)

241

242 # Load a content template if available when no explicit content is given.

243 # For pages where output and canonical differ, missing content should raise.

244 # For pages where they are the same (e.g., index.html), load content if

245 # the template exists, otherwise fall back to empty content.

246 if not content:

247 try:

248 content = self.load_template(output_filename)

249 except FileNotFoundError:

250 if output_filename != canonical_path:

251 # Maintain behavior for explicit content templates

252 raise

253 # Otherwise, leave content empty

254

255 project_info = self.generate_project_info()

256

257 # Calculate base URL for relative paths

258 if canonical_path.count("/") > 0:

259 base_url = "../" * canonical_path.count("/")

260 else:

261 # Normalize root base URL

262 if self.base_url:

263 base_url = self.base_url.rstrip("/") + "/"

264 else:

265 base_url = "./"

266

267 # Merge extra replacements ensuring defaults for optional placeholders

268 extras = dict(extra_replacements)

269 extras.setdefault("additional_head", "")

270 extras.setdefault("additional_scripts", "")

271

272 replacements = {

273 "page_title": title,

274 "page_description": description,

275 "content": content,

276 "base_url": base_url,

277 "canonical_url": (

278 self.base_url.rstrip("/") + "/" + canonical_path

279 if self.base_url

280 else canonical_path

281 ),

282 "author": project_info.get("name", "QDrant Loader"),

283 "version": project_info.get("version", "0.4.0b1"),

284 "project_name": project_info["name"],

285 "project_version": project_info["version"],

286 "project_description": project_info["description"],

287 **extras,

288 }

289

290 final_content = self.replace_placeholders(template_content, replacements)

291

292 output_path = self.output_dir / output_filename

293 output_path.parent.mkdir(parents=True, exist_ok=True)

294

295 with open(output_path, "w", encoding="utf-8") as f:

296 f.write(final_content)

297

298 print(f"📄 Built {output_filename}")

299

300 def build_site(

301 self,

302 coverage_artifacts_dir: str | None = None,

303 test_results_dir: str | None = None,

304 ) -> None:

305 """Build the complete website."""

306 print("🏗️ Building QDrant Loader website...")

307

308 # Create output directory

309 self.output_dir.mkdir(parents=True, exist_ok=True)

310

311 # Copy assets first

312 self.copy_assets()

313

314 # Generate project info

315 self.generate_project_info()

316

317 # Build main pages

318 self.build_page(

319 "base.html",

320 "index.html",

321 "Home",

322 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.",

323 "index.html",

324 )

325

326 # Build a friendly 404 page

327 try:

328 self.build_page(

329 "base.html",

330 "404.html",

331 "Page Not Found",

332 "The page you are looking for does not exist.",

333 "404.html",

334 content=self.load_template("404.html"),

335 )

336 except Exception as e:

337 print(f"⚠️ Failed to build 404 page: {e}")

338

339 # Build docs structure and pages

340 self.build_docs_nav()

341 _docs_structure = self.build_docs_structure()

342

343 # Create docs directory and index

344 docs_output_dir = self.output_dir / "docs"

345 docs_output_dir.mkdir(exist_ok=True)

346

347 # Build docs index page using dedicated template content

348 self.build_page(

349 "base.html",

350 "docs/index.html",

351 "Documentation",

352 "QDrant Loader Documentation",

353 "docs/index.html",

354 content=self.load_template("docs-index.html"),

355 )

356

357 # Bridge root docs from repository top-level files

358 try:

359 if Path("README.md").exists():

360 self.build_markdown_page("README.md", "docs/README.html")

361 if Path("RELEASE_NOTES.md").exists():

362 self.build_markdown_page("RELEASE_NOTES.md", "docs/RELEASE_NOTES.html")

363 if Path("CONTRIBUTING.md").exists():

364 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html")

365 # License (plain text) rendered via helper

366 if Path("LICENSE").exists():

367 self.build_license_page(

368 "LICENSE", "docs/LICENSE.html", "License", "License"

369 )

370 # Privacy policy page from template

371 try:

372 self.build_page(

373 "base.html",

374 "privacy-policy.html",

375 "Privacy Policy",

376 "Privacy policy for QDrant Loader",

377 "privacy-policy.html",

378 content=self.load_template("privacy-policy.html"),

379 )

380 except FileNotFoundError:

381 pass

382 except Exception as e:

383 print(f"⚠️ Failed to build root docs pages: {e}")

384

385 # Build package README documentation into docs/packages

386 try:

387 self.build_package_docs()

388 except Exception as e:

389 print(f"⚠️ Failed to build package docs: {e}")

390

391 # Always create coverage directory and ensure index.html exists

392 coverage_output_dir = self.output_dir / "coverage"

393 coverage_output_dir.mkdir(exist_ok=True)

394

395 # Build coverage reports if provided

396 if coverage_artifacts_dir:

397 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir)

398

399 # Copy coverage artifacts

400 coverage_path = Path(coverage_artifacts_dir)

401 if coverage_path.exists():

402 import shutil

403

404 for item in coverage_path.iterdir():

405 if item.is_file():

406 shutil.copy2(item, coverage_output_dir / item.name)

407 elif item.is_dir():

408 shutil.copytree(

409 item, coverage_output_dir / item.name, dirs_exist_ok=True

410 )

411 else:

412 # Create styled placeholder coverage index if no artifacts provided

413 placeholder_html = (

414 '<section class="py-5"><div class="container">'

415 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>'

416 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>'

417 "</div></section>"

418 )

419 self.build_page(

420 "base.html",

421 "coverage/index.html",

422 "Coverage Reports",

423 "Test coverage analysis",

424 "coverage/index.html",

425 content=placeholder_html,

426 )

427

428 # Generate directory indexes

429 self.generate_directory_indexes()

430

431 # Generate SEO files

432 # Build a dynamic sitemap including all HTML pages

433 try:

434 self.generate_dynamic_sitemap()

435 except Exception as e:

436 print(f"⚠️ Failed to generate dynamic sitemap: {e}")

437

438 # Always (re)write robots.txt pointing to the sitemap

439 try:

440 self.generate_robots_file()

441 except Exception as e:

442 print(f"⚠️ Failed to generate robots.txt: {e}")

443

444 # Create .nojekyll file for GitHub Pages

445 nojekyll_path = self.output_dir / ".nojekyll"

446 nojekyll_path.touch()

447 print("📄 Created .nojekyll file")

448

449 print("✅ Website build completed successfully!")

450

451 def build_docs_nav(self) -> dict:

452 """Build documentation navigation structure."""

453 # Simplified navigation building

454 docs_dir = Path("docs")

455 if not docs_dir.exists():

456 return {}

457

458 nav_data = {"title": "Documentation", "children": []}

459

460 for item in sorted(docs_dir.iterdir()):

461 if item.is_file() and item.suffix == ".md":

462 nav_data["children"].append(

463 {

464 "title": self._humanize_title(item.stem),

465 "url": f"docs/{item.name}",

466 }

467 )

468 elif item.is_dir():

469 nav_data["children"].append(

470 {

471 "title": self._humanize_title(item.name),

472 "url": f"docs/{item.name}/",

473 }

474 )

475

476 self.docs_nav_data = nav_data

477 return nav_data

478

479 def generate_seo_files(self) -> None:

480 """Generate SEO files like sitemap.xml and robots.txt."""

481 from datetime import datetime

482

483 # Determine base site URL

484 site_base = (

485 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"

486 )

487

488 # Get current date for lastmod

489 current_date = datetime.now().strftime("%Y-%m-%d")

490

491 # Generate simple sitemap.xml

492 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?>

493<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

494 <url>

495 <loc>{site_base}/</loc>

496 <lastmod>{current_date}</lastmod>

497 <changefreq>weekly</changefreq>

498 <priority>1.0</priority>

499 </url>

500 <url>

501 <loc>{site_base}/docs/</loc>

502 <lastmod>{current_date}</lastmod>

503 <changefreq>weekly</changefreq>

504 <priority>0.8</priority>

505 </url>

506</urlset>"""

507

508 sitemap_path = self.output_dir / "sitemap.xml"

509 with open(sitemap_path, "w", encoding="utf-8") as f:

510 f.write(sitemap_content)

511 print("📄 Generated sitemap.xml")

512

513 # Generate simple robots.txt

514 robots_content = f"""User-agent: *

515Allow: /

516

517Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml

518"""

519

520 robots_path = self.output_dir / "robots.txt"

521 with open(robots_path, "w", encoding="utf-8") as f:

522 f.write(robots_content.replace("https://example.com", site_base))

523 print("📄 Generated robots.txt")

524

525 def generate_robots_file(self) -> None:

526 """Generate only robots.txt referencing the sitemap URL."""

527 site_base = (

528 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"

529 )

530 robots_content = f"""User-agent: *

531Allow: /

532

533Sitemap: {site_base}/sitemap.xml

534"""

535 robots_path = self.output_dir / "robots.txt"

536 with open(robots_path, "w", encoding="utf-8") as f:

537 f.write(robots_content)

538 print("📄 Generated robots.txt")

539

540 def generate_dynamic_sitemap(

541 self, date: str = None, pages: list[str] = None

542 ) -> str:

543 """Generate dynamic sitemap with custom pages."""

544 from datetime import datetime

545

546 base_url = (

547 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"

548 )

549

550 # Auto-discover pages if not provided

551 if pages is None:

552 pages = []

553 # Find HTML files in site directory

554 if self.output_dir.exists():

555 for html_file in self.output_dir.rglob("*.html"):

556 rel_path = str(html_file.relative_to(self.output_dir))

557 pages.append(rel_path)

558

559 # Use provided date or current date

560 if date is None:

561 date = datetime.now().strftime("%Y-%m-%d")

562

563 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n'

564 sitemap_content += (

565 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'

566 )

567

568 for page in pages:

569 sitemap_content += " <url>\n"

570 sitemap_content += f" <loc>{base_url}/{page}</loc>\n"

571 sitemap_content += f" <lastmod>{date}</lastmod>\n"

572 sitemap_content += " <changefreq>weekly</changefreq>\n"

573 sitemap_content += " <priority>0.8</priority>\n"

574 sitemap_content += " </url>\n"

575

576 sitemap_content += "</urlset>"

577

578 # Write sitemap to file

579 sitemap_path = self.output_dir / "sitemap.xml"

580 sitemap_path.parent.mkdir(parents=True, exist_ok=True)

581 with open(sitemap_path, "w", encoding="utf-8") as f:

582 f.write(sitemap_content)

583 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages")

584

585 return sitemap_content

586

587 def build_markdown_page(

588 self,

589 markdown_file: str,

590 output_path: str,

591 title: str = "",

592 breadcrumb: str = "",

593 **kwargs,

594 ) -> None:

595 """Build a page from markdown file."""

596 markdown_path = Path(markdown_file)

597 if not markdown_path.exists():

598 print(

599 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation"

600 )

601 return

602

603 try:

604 with open(markdown_path, encoding="utf-8") as f:

605 markdown_content = f.read()

606 except Exception as e:

607 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}")

608 return

609

610 # Extract title if not provided

611 if not title:

612 title = self.extract_title_from_markdown(markdown_content)

613

614 # Normalize links in markdown before conversion

615 markdown_content = self.markdown_processor.convert_markdown_links_to_html(

616 markdown_content, str(markdown_path)

617 )

618

619 # Convert markdown to HTML

620 html_content = self.markdown_to_html(

621 markdown_content, str(markdown_path), output_path

622 )

623 # Normalize any remaining HTML hrefs

624 html_content = self.markdown_processor.convert_markdown_links_to_html(

625 html_content, str(markdown_path)

626 )

627

628 # Build a Table of Contents and wrap in docs layout

629 toc_html = self.render_toc(html_content)

630 if toc_html:

631 toc_html = self.add_bootstrap_classes(toc_html)

632

633 wrapped_content = f"""

634<section class=\"py-5\">

635 <div class=\"container\">

636 <div class=\"row\">

637 <aside class=\"col-lg-3 d-none d-lg-block\">

638 <div class=\"position-sticky\" style=\"top: 6rem;\">

639 {toc_html or '<div class=\"text-muted small\">No sections</div>'}

640 </div>

641 </aside>

642 <div class=\"col-lg-9\">

643 {html_content}

644 </div>

645 </div>

646 </div>

647</section>

648"""

649

650 # Build the page

651 self.build_page(

652 "base.html",

653 output_path,

654 title,

655 f"{title} - QDrant Loader",

656 output_path,

657 content=wrapped_content,

658 breadcrumb=breadcrumb,

659 **kwargs,

660 )

661

662 def build_docs_structure(self) -> dict:

663 """Build documentation directory structure."""

664 docs_dir = Path("docs")

665 structure = {"title": "Documentation", "children": []}

666

667 # Create docs output directory

668 docs_output_dir = self.output_dir / "docs"

669 docs_output_dir.mkdir(parents=True, exist_ok=True)

670

671 if not docs_dir.exists():

672 return structure

673

674 # Process all markdown files in docs

675 for item in sorted(docs_dir.rglob("*.md")):

676 relative_path = str(item.relative_to(docs_dir))

677 output_path = relative_path.replace(".md", ".html")

678

679 structure["children"].append(

680 {

681 "title": self._humanize_title(item.stem),

682 "path": relative_path,

683 "url": f"docs/{output_path}",

684 }

685 )

686

687 # Build the page from markdown

688 try:

689 self.build_markdown_page(

690 str(item),

691 f"docs/{output_path}",

692 title=self._humanize_title(item.stem),

693 )

694 except Exception as e:

695 print(f"⚠️ Failed to build docs page {item}: {e}")

696

697 return structure

698

699 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict:

700 """Build coverage report structure."""

701 # Always create coverage output directory

702 coverage_output_dir = self.output_dir / "coverage"

703 coverage_output_dir.mkdir(parents=True, exist_ok=True)

704

705 if not coverage_dir:

706 return {"coverage_reports": []}

707

708 coverage_path = Path(coverage_dir)

709 if not coverage_path.exists():

710 return {"coverage_reports": []}

711

712 # Copy all coverage files with proper naming

713 import shutil

714

715 for item in coverage_path.iterdir():

716 # Map directory names to cleaner package names

717 dest_name = item.name

718 if item.is_dir():

719 if "htmlcov-loader" in item.name:

720 dest_name = "loader"

721 elif "htmlcov-mcp" in item.name:

722 dest_name = "mcp"

723 elif "htmlcov-website" in item.name:

724 dest_name = "website"

725 elif (

726 "htmlcov-core" in item.name

727 or "htmlcov-qdrant-loader-core" in item.name

728 ):

729 dest_name = "core"

730 elif "htmlcov" in item.name:

731 dest_name = item.name.replace("htmlcov-", "").replace(

732 "htmlcov_", ""

733 )

734

735 dest_path = coverage_output_dir / dest_name

736 try:

737 if item.is_file():

738 shutil.copy2(item, dest_path)

739 elif item.is_dir():

740 if dest_path.exists():

741 shutil.rmtree(dest_path)

742 shutil.copytree(item, dest_path)

743 print(f"📁 Copied coverage: {item.name} -> {dest_name}")

744 except Exception as e:

745 print(f"⚠️ Failed to copy coverage file {item}: {e}")

746

747 # Build reports list using the renamed directories

748 reports = []

749 for subdir in coverage_output_dir.iterdir():

750 if subdir.is_dir():

751 index_file = subdir / "index.html"

752 if index_file.exists():

753 reports.append(

754 {

755 "name": subdir.name,

756 "path": f"{subdir.name}/index.html",

757 "url": f"coverage/{subdir.name}/index.html",

758 }

759 )

760

761 # Create main coverage index page using site template when reports exist

762 if reports:

763 # Build coverage index with Bootstrap styling

764 index_content = """

765<section class=\"py-5\">

766 <div class=\"container\">

767 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1>

768 <div class=\"row g-4\">"""

769

770 for report in reports:

771 if report["name"] == "loader":

772 index_content += """

773 <div class="col-lg-6">

774 <div class="card">

775 <div class="card-header">

776 <h4>QDrant Loader Core</h4>

777 <span id="loader-test-indicator" class="badge">Loading...</span>

778 </div>

779 <div class="card-body">

780 <div id="loader-coverage">Loader coverage data</div>

781 <a href="loader/" class="btn btn-primary">View Detailed Report</a>

782 </div>

783 </div>

784 </div>"""

785 elif report["name"] == "mcp":

786 index_content += """

787 <div class="col-lg-6">

788 <div class="card">

789 <div class="card-header">

790 <h4>MCP Server</h4>

791 <span id="mcp-test-indicator" class="badge">Loading...</span>

792 </div>

793 <div class="card-body">

794 <div id="mcp-coverage">MCP Server coverage data</div>

795 <a href="mcp/" class="btn btn-success">View Detailed Report</a>

796 </div>

797 </div>

798 </div>"""

799 elif report["name"] == "website":

800 index_content += """

801 <div class="col-lg-6">

802 <div class="card">

803 <div class="card-header">

804 <h4>Website</h4>

805 <span id="website-test-indicator" class="badge">Loading...</span>

806 </div>

807 <div class="card-body">

808 <div id="website-coverage">Website coverage data</div>

809 <a href="website/" class="btn btn-info">View Detailed Report</a>

810 </div>

811 </div>

812 </div>"""

813 elif report["name"] == "core":

814 index_content += """

815 <div class="col-lg-6">

816 <div class="card">

817 <div class="card-header">

818 <h4>Core Library</h4>

819 <span id="core-test-indicator" class="badge">Loading...</span>

820 </div>

821 <div class="card-body">

822 <div id="core-coverage">Core library coverage data</div>

823 <a href="core/" class="btn btn-warning">View Detailed Report</a>

824 </div>

825 </div>

826 </div>"""

827

828 index_content += """

829 </div>

830 </div>

831</section>

832

833<script>

834// Compute and render coverage summary from status.json

835function coverageSummary(data){

836 try{

837 let total = 0, missing = 0;

838 if (data && data.files){

839 for (const k in data.files){

840 const f = data.files[k];

841 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null);

842 if (nums && typeof nums.n_statements === 'number'){

843 total += (nums.n_statements||0);

844 missing += (nums.n_missing||0);

845 }

846 }

847 }

848 // Fallback if a totals object exists

849 if (total === 0 && data && data.totals){

850 if (typeof data.totals.n_statements === 'number'){

851 total = data.totals.n_statements||0;

852 missing = data.totals.n_missing||0;

853 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){

854 total = data.totals.num_statements;

855 missing = total - data.totals.covered_lines;

856 }

857 }

858 if (total > 0){

859 const covered = Math.max(0, total - missing);

860 const pct = Math.round((covered/total)*1000)/10; // one decimal

861 return {pct, covered, total};

862 }

863 } catch(e){}

864 return null;

865}

866

867function renderCoverage(id, summary){

868 const el = document.getElementById(id);

869 if (!el) return;

870 if (!summary){ el.textContent = 'Loaded'; return; }

871 const {pct, covered, total} = summary;

872 el.innerHTML = `

873 <div class="d-flex align-items-center">

874 <div class="progress flex-grow-1 me-2" style="height: 10px;">

875 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div>

876 </div>

877 <span class="small fw-semibold">${pct}% (${covered}/${total})</span>

878 </div>`;

879}

880

881fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{});

882fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{});

883fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{});

884fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{});

885</script>

886"""

887 # Render through site template for full styling/navigation

888 self.build_page(

889 "base.html",

890 "coverage/index.html",

891 "Coverage Reports",

892 "Test coverage analysis",

893 "coverage/index.html",

894 content=index_content,

895 )

896 print("📄 Generated coverage index.html")

897

898 return {"coverage_reports": reports}

899

900 def build_package_docs(self) -> None:

901 """Build documentation pages from package README files into docs/packages.

902

903 Maps package README.md files to site docs under:

904 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html

905 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html

906 - packages/qdrant-loader-core -> docs/packages/core/README.html

907 """

908 package_mappings: list[tuple[str, str, str]] = [

909 ("qdrant-loader", "qdrant-loader", "QDrant Loader"),

910 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"),

911 ("qdrant-loader-core", "core", "Core Library"),

912 ]

913

914 for pkg_name, alias, display_name in package_mappings:

915 readme_path = Path("packages") / pkg_name / "README.md"

916 if not readme_path.exists():

917 continue

918

919 try:

920 with open(readme_path, encoding="utf-8") as f:

921 markdown_content = f.read()

922

923 # Normalize links in markdown before conversion

924 normalized_md = self.markdown_processor.convert_markdown_links_to_html(

925 markdown_content

926 )

927

928 html_content = self.markdown_to_html(

929 normalized_md,

930 str(readme_path),

931 f"docs/packages/{alias}/README.html",

932 )

933 # Normalize any remaining HTML hrefs

934 html_content = self.markdown_processor.convert_markdown_links_to_html(

935 html_content, str(readme_path), f"docs/packages/{alias}/README.html"

936 )

937

938 # Final hardening for package README links: collapse relative ../../docs to /docs

939 try:

940 html_content = re.sub(

941 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content

942 )

943 # Convert README root files and .md links under docs to .html

944 html_content = re.sub(

945 r'href="(?:\.{2}/)+CONTRIBUTING\.md"',

946 'href="/docs/CONTRIBUTING.html"',

947 html_content,

948 )

949 html_content = re.sub(

950 r'href="(?:\.{2}/)+LICENSE(\.html)?"',

951 'href="/docs/LICENSE.html"',

952 html_content,

953 )

954 html_content = re.sub(

955 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"',

956 r'href="/docs/\1.html\2"',

957 html_content,

958 )

959 except Exception:

960 pass

961

962 # Build a Table of Contents and wrap with standard docs layout for consistent look

963 toc_html = self.render_toc(html_content)

964 if toc_html:

965 toc_html = self.add_bootstrap_classes(toc_html)

966

967 wrapped_content = f"""

968<section class=\"py-5\">

969 <div class=\"container\">

970 <div class=\"row\">

971 <aside class=\"col-lg-3 d-none d-lg-block\">

972 <div class=\"position-sticky\" style=\"top: 6rem;\">

973 {toc_html or '<div class=\"text-muted small\">No sections</div>'}

974 </div>

975 </aside>

976 <div class=\"col-lg-9\">

977 {html_content}

978 </div>

979 </div>

980 </div>

981</section>

982"""

983

984 output_path = f"docs/packages/{alias}/README.html"

985 self.build_page(

986 "base.html",

987 output_path,

988 f"{display_name} - README",

989 f"{display_name} Documentation",

990 output_path,

991 content=wrapped_content,

992 )

993 except Exception as e:

994 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}")

995

996 def generate_directory_indexes(self) -> None:

997 """Generate index files for directories."""

998 # Look in both source docs and output site docs directories

999 source_docs_dir = Path("docs")

1000 site_docs_dir = self.output_dir / "docs"

1001

1002 # Process directories in both locations

1003 for docs_dir in [source_docs_dir, site_docs_dir]:

1004 if not docs_dir.exists():

1005 continue

1006

1007 for directory in docs_dir.rglob("*"):

1008 if directory.is_dir():

1009 # Look for README or index files in various formats

1010 readme_md = directory / "README.md"

1011 readme_html = directory / "README.html"

1012 index_md = directory / "index.md"

1013 index_html = directory / "index.html"

1014

1015 # Determine source file

1016 source_file = None

1017 if readme_md.exists():

1018 source_file = readme_md

1019 elif index_md.exists():

1020 source_file = index_md

1021 elif readme_html.exists():

1022 source_file = readme_html

1023 elif index_html.exists():

1024 source_file = index_html

1025

1026 if source_file:

1027 try:

1028 if docs_dir == site_docs_dir:

1029 # For files in site directory, create/overwrite index.html directly there

1030 index_file = directory / "index.html"

1031 if source_file.suffix == ".html":

1032 # Copy HTML file content directly (always overwrite to avoid stale links)

1033 content = source_file.read_text()

1034 index_file.write_text(content)

1035 print(

1036 f"📄 Generated index.html from {source_file.name}"

1037 )

1038 else:

1039 # For source files, process through normal build pipeline

1040 relative_dir = directory.relative_to(docs_dir)

1041 output_path = f"docs/{relative_dir}/index.html"

1042

1043 if source_file.suffix == ".html":

1044 # Copy HTML file content directly

1045 content = source_file.read_text()

1046 self.build_page(

1047 "base.html",

1048 output_path,

1049 self._humanize_title(directory.name),

1050 f"{self._humanize_title(directory.name)} Documentation",

1051 output_path,

1052 content=content,

1053 )

1054 else:

1055 # Process markdown file

1056 self.build_markdown_page(

1057 str(source_file),

1058 output_path,

1059 title=self._humanize_title(directory.name),

1060 )

1061 except Exception as e:

1062 print(f"⚠️ Failed to generate index for {directory}: {e}")

1063

1064 def build_license_page(

1065 self,

1066 source_file: str = "LICENSE",

1067 output_file: str = "license.html",

1068 title: str = "License",

1069 description: str = "License",

1070 ) -> None:

1071 """Build license page from LICENSE file."""

1072 license_path = Path(source_file)

1073 if not license_path.exists():

1074 print(f"⚠️ License file not found: {source_file}, skipping license page")

1075 return

1076

1077 try:

1078 with open(license_path, encoding="utf-8") as f:

1079 license_content = f.read()

1080

1081 # Create license page with heading

1082 html_content = f"""

1083 <h1>License Information</h1>

1084 <div class="license-content">

1085 <pre>{license_content}</pre>

1086 </div>

1087 """

1088

1089 self.build_page(

1090 "base.html",

1091 output_file,

1092 title,

1093 description,

1094 output_file,

1095 content=html_content,

1096 )

1097 except Exception as e:

1098 print(f"⚠️ Failed to build license page: {e}")