Coverage for website/builder/core.py: 89%

1"""

2Core Website Builder - Main Orchestration and Lifecycle Management.

4This module implements the main WebsiteBuilder class that orchestrates

5all build operations and manages the overall build lifecycle.

6"""

8import json

9import re

10import subprocess

11from pathlib import Path

13from .assets import AssetManager

14from .markdown import MarkdownProcessor

15from .templates import TemplateProcessor

18class WebsiteBuilder:

19 """Builds the QDrant Loader documentation website from templates."""

21 def __init__(

22 self, templates_dir: str = "website/templates", output_dir: str = "site"

23 ):

24 """Initialize the website builder."""

25 self.templates_dir = Path(templates_dir)

26 self.output_dir = Path(output_dir)

27 self.base_url = ""

28 # Cached docs navigation data (built once per run)

29 self.docs_nav_data: dict | None = None

31 # Initialize component processors

32 self.template_processor = TemplateProcessor(templates_dir)

33 self.markdown_processor = MarkdownProcessor()

34 self.asset_manager = AssetManager(output_dir)

36 # Delegate core operations to specialized processors

37 def load_template(self, template_name: str) -> str:

38 """Load a template file."""

39 return self.template_processor.load_template(template_name)

41 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str:

42 """Replace placeholders in content with actual values."""

43 return self.template_processor.replace_placeholders(content, replacements)

45 def markdown_to_html(

46 self, markdown_content: str, source_file: str = "", output_file: str = ""

47 ) -> str:

48 """Convert markdown to HTML with Bootstrap styling."""

49 return self.markdown_processor.markdown_to_html(

50 markdown_content, source_file, output_file

51 )

53 def copy_assets(self) -> None:

54 """Copy all website assets to output directory."""

55 return self.asset_manager.copy_assets()

57 def extract_title_from_markdown(self, markdown_content: str) -> str:

58 """Extract title from markdown content."""

59 return self.markdown_processor.extract_title_from_markdown(markdown_content)

61 # Additional markdown processing methods

62 def basic_markdown_to_html(self, markdown_content: str) -> str:

63 """Basic markdown to HTML conversion."""

64 return self.markdown_processor.basic_markdown_to_html(markdown_content)

66 def convert_markdown_links_to_html(

67 self, markdown_content: str, source_file: str = "", target_dir: str = ""

68 ) -> str:

69 """Convert markdown links to HTML format."""

70 return self.markdown_processor.convert_markdown_links_to_html(

71 markdown_content, source_file, target_dir

72 )

74 def add_bootstrap_classes(self, html_content: str) -> str:

75 """Add Bootstrap classes to HTML elements."""

76 return self.markdown_processor.add_bootstrap_classes(html_content)

78 def render_toc(self, html_content: str) -> str:

79 """Generate table of contents from HTML headings."""

80 return self.markdown_processor.render_toc(html_content)

82 # Additional asset management methods

83 def copy_static_files(self, static_files: list[str]) -> None:

84 """Copy multiple static files."""

85 return self.asset_manager.copy_static_files(static_files)

87 def get_git_timestamp(self, source_path: str) -> str:

88 """Get the last modified timestamp from Git."""

89 try:

90 result = subprocess.run(

91 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path],

92 capture_output=True,

93 text=True,

94 cwd=".",

95 )

96 if result.returncode == 0 and result.stdout.strip():

97 return result.stdout.strip()

98 except (subprocess.CalledProcessError, FileNotFoundError):

99 pass

100 return ""

101

102 def _humanize_title(self, name: str) -> str:

103 """Convert filename to human-readable title."""

104 # Remove file extension and common prefixes

105 title = (

106 name.replace(".md", "")

107 .replace("README", "")

108 .replace("_", " ")

109 .replace("-", " ")

110 )

111

112 # Handle common patterns

113 title_mappings = {

114 "cli reference": "CLI Reference",

115 "api": "API",

116 "faq": "FAQ",

117 "toc": "Table of Contents",

118 "readme": "Overview",

119 }

120

121 title_lower = title.lower().strip()

122 if title_lower in title_mappings:

123 return title_mappings[title_lower]

124

125 # Capitalize words

126 return " ".join(word.capitalize() for word in title.split())

127

128 def generate_project_info(self, **kwargs) -> dict:

129 """Generate project information for templates."""

130 project_info = {

131 "name": "QDrant Loader",

132 "version": "0.4.0b1",

133 "description": "Enterprise-ready vector database toolkit",

134 "github_url": "https://github.com/martin-papy/qdrant-loader",

135 }

136

137 # Override with any provided kwargs

138 project_info.update(kwargs)

139

140 # Try to load from pyproject.toml

141 try:

142 import tomli

143

144 with open("pyproject.toml", "rb") as f:

145 pyproject = tomli.load(f)

146 project_section = pyproject.get("project", {})

147 project_info.update(

148 {

149 "name": project_section.get("name", project_info["name"]),

150 "version": project_section.get(

151 "version", project_info["version"]

152 ),

153 "description": project_section.get(

154 "description", project_info["description"]

155 ),

156 }

157 )

158 # Normalize workspace naming to product name

159 if isinstance(project_info.get("name"), str) and project_info[

160 "name"

161 ].endswith("-workspace"):

162 project_info["name"] = "QDrant Loader"

163

164 # Try to get homepage/repository from pyproject urls

165 urls = (

166 project_section.get("urls", {})

167 if isinstance(project_section, dict)

168 else {}

169 )

170 homepage = urls.get("Homepage")

171 if (

172 homepage

173 and not getattr(self, "base_url_user_set", False)

174 and not self.base_url

175 ):

176 # Set base_url from pyproject if not provided externally

177 self.base_url = homepage.rstrip("/")

178 repo_url = urls.get("Repository") or urls.get("Source")

179 if repo_url:

180 project_info["github_url"] = repo_url

181 except Exception:

182 # Ignore malformed project section entries

183 pass

184

185 # Try to get git information

186 try:

187 import subprocess

188

189 # Get git commit hash

190 result = subprocess.run(

191 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True

192 )

193 project_info["commit_hash"] = result.stdout.strip()

194

195 # Get git commit date

196 result = subprocess.run(

197 ["git", "log", "-1", "--format=%ci"],

198 capture_output=True,

199 text=True,

200 check=True,

201 )

202 project_info["commit_date"] = result.stdout.strip()

203

204 except (subprocess.CalledProcessError, FileNotFoundError):

205 # Git not available or not a git repository

206 pass

207

208 # Add build metadata

209 from datetime import datetime

210

211 commit_hash = project_info.get("commit_hash", "")

212 project_info["commit"] = {

213 "hash": commit_hash,

214 "short": commit_hash[:7] if isinstance(commit_hash, str) else "",

215 "date": project_info.get("commit_date", ""),

216 }

217 project_info["build"] = {

218 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z"

219 }

220

221 # Write project info JSON file

222 project_info_path = self.output_dir / "project-info.json"

223 project_info_path.parent.mkdir(parents=True, exist_ok=True)

224 with open(project_info_path, "w", encoding="utf-8") as f:

225 json.dump(project_info, f, indent=2)

226

227 return project_info

228

229 def build_page(

230 self,

231 template_name: str,

232 output_filename: str,

233 title: str,

234 description: str,

235 canonical_path: str,

236 content: str = "",

237 **extra_replacements,

238 ) -> None:

239 """Build a single page from template."""

240 template_content = self.load_template(template_name)

241

242 # Load a content template if available when no explicit content is given.

243 # For pages where output and canonical differ, missing content should raise.

244 # For pages where they are the same (e.g., index.html), load content if

245 # the template exists, otherwise fall back to empty content.

246 if not content:

247 try:

248 content = self.load_template(output_filename)

249 except FileNotFoundError:

250 if output_filename != canonical_path:

251 # Maintain behavior for explicit content templates

252 raise

253 # Otherwise, leave content empty

254

255 project_info = self.generate_project_info()

256

257 # Calculate base URL for relative paths

258 if canonical_path.count("/") > 0:

259 base_url = "../" * canonical_path.count("/")

260 else:

261 # Normalize root base URL

262 if self.base_url:

263 base_url = self.base_url.rstrip("/") + "/"

264 else:

265 base_url = "./"

266

267 # Merge extra replacements ensuring defaults for optional placeholders

268 extras = dict(extra_replacements)

269 extras.setdefault("additional_head", "")

270 extras.setdefault("additional_scripts", "")

271

272 replacements = {

273 "page_title": title,

274 "page_description": description,

275 "content": content,

276 "base_url": base_url,

277 "canonical_url": (

278 self.base_url.rstrip("/") + "/" + canonical_path

279 if self.base_url

280 else canonical_path

281 ),

282 "author": project_info.get("name", "QDrant Loader"),

283 "version": project_info.get("version", "0.4.0b1"),

284 "project_name": project_info["name"],

285 "project_version": project_info["version"],

286 "project_description": project_info["description"],

287 **extras,

288 }

289

290 final_content = self.replace_placeholders(template_content, replacements)

291

292 output_path = self.output_dir / output_filename

293 output_path.parent.mkdir(parents=True, exist_ok=True)

294

295 with open(output_path, "w", encoding="utf-8") as f:

296 f.write(final_content)

297

298 print(f"📄 Built {output_filename}")

299

300 def build_site(

301 self,

302 coverage_artifacts_dir: str | None = None,

303 test_results_dir: str | None = None,

304 ) -> None:

305 """Build the complete website."""

306 print("🏗️ Building QDrant Loader website...")

307

308 # Create output directory

309 self.output_dir.mkdir(parents=True, exist_ok=True)

310

311 # Copy assets first

312 self.copy_assets()

313

314 # Generate project info

315 self.generate_project_info()

316

317 # Build main pages

318 self.build_page(

319 "base.html",

320 "index.html",

321 "Home",

322 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.",

323 "index.html",

324 )

325

326 # Build a friendly 404 page

327 try:

328 self.build_page(

329 "base.html",

330 "404.html",

331 "Page Not Found",

332 "The page you are looking for does not exist.",

333 "404.html",

334 content=self.load_template("404.html"),

335 )

336 except Exception as e:

337 print(f"⚠️ Failed to build 404 page: {e}")

338

339 # Build docs structure and pages

340 self.build_docs_nav()

341 _docs_structure = self.build_docs_structure()

342

343 # Create docs directory and index

344 docs_output_dir = self.output_dir / "docs"

345 docs_output_dir.mkdir(exist_ok=True)

346

347 # Build docs index page using dedicated template content

348 self.build_page(

349 "base.html",

350 "docs/index.html",

351 "Documentation",

352 "QDrant Loader Documentation",

353 "docs/index.html",

354 content=self.load_template("docs-index.html"),

355 )

356

357 # Bridge root docs from repository top-level files

358 try:

359 if Path("README.md").exists():

360 self.build_markdown_page("README.md", "docs/README.html")

361 if Path("CHANGELOG.md").exists():

362 self.build_markdown_page("CHANGELOG.md", "docs/CHANGELOG.html")

363 if Path("CONTRIBUTING.md").exists():

364 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html")

365 # License (plain text) rendered via helper

366 if Path("LICENSE").exists():

367 self.build_license_page(

368 "LICENSE", "docs/LICENSE.html", "License", "License"

369 )

370 # Privacy policy page from template

371 try:

372 privacy_template_path = self.templates_dir / "privacy-policy.html"

373 privacy_last_updated = self.get_git_timestamp(

374 str(privacy_template_path)

375 )

376 if privacy_last_updated:

377 privacy_last_updated = privacy_last_updated.split("T", 1)[0]

378 else:

379 from datetime import datetime, timezone

380

381 # Use stable template mtime fallback instead of build date.

382 privacy_last_updated = datetime.fromtimestamp(

383 privacy_template_path.stat().st_mtime, tz=timezone.utc

384 ).date().isoformat()

385

386 self.build_page(

387 "base.html",

388 "privacy-policy.html",

389 "Privacy Policy",

390 "Privacy policy for QDrant Loader",

391 "privacy-policy.html",

392 content=self.load_template("privacy-policy.html"),

393 last_updated=privacy_last_updated,

394 )

395 except FileNotFoundError:

396 pass

397 except Exception as e:

398 print(f"⚠️ Failed to build root docs pages: {e}")

399

400 # Build package README documentation into docs/packages

401 try:

402 self.build_package_docs()

403 except Exception as e:

404 print(f"⚠️ Failed to build package docs: {e}")

405

406 # Always create coverage directory and ensure index.html exists

407 coverage_output_dir = self.output_dir / "coverage"

408 coverage_output_dir.mkdir(exist_ok=True)

409

410 # Build coverage reports if provided

411 if coverage_artifacts_dir:

412 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir)

413

414 # Copy coverage artifacts

415 coverage_path = Path(coverage_artifacts_dir)

416 if coverage_path.exists():

417 import shutil

418

419 for item in coverage_path.iterdir():

420 if item.is_file():

421 shutil.copy2(item, coverage_output_dir / item.name)

422 elif item.is_dir():

423 shutil.copytree(

424 item, coverage_output_dir / item.name, dirs_exist_ok=True

425 )

426 else:

427 # Create styled placeholder coverage index if no artifacts provided

428 placeholder_html = (

429 '<section class="py-5"><div class="container">'

430 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>'

431 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>'

432 "</div></section>"

433 )

434 self.build_page(

435 "base.html",

436 "coverage/index.html",

437 "Coverage Reports",

438 "Test coverage analysis",

439 "coverage/index.html",

440 content=placeholder_html,

441 )

442

443 # Generate directory indexes

444 self.generate_directory_indexes()

445

446 # Generate SEO files

447 # Build a dynamic sitemap including all HTML pages

448 try:

449 self.generate_dynamic_sitemap()

450 except Exception as e:

451 print(f"⚠️ Failed to generate dynamic sitemap: {e}")

452

453 # Always (re)write robots.txt pointing to the sitemap

454 try:

455 self.generate_robots_file()

456 except Exception as e:

457 print(f"⚠️ Failed to generate robots.txt: {e}")

458

459 # Create .nojekyll file for GitHub Pages

460 nojekyll_path = self.output_dir / ".nojekyll"

461 nojekyll_path.touch()

462 print("📄 Created .nojekyll file")

463

464 print("✅ Website build completed successfully!")

465

466 def build_docs_nav(self) -> dict:

467 """Build documentation navigation structure."""

468 # Simplified navigation building

469 docs_dir = Path("docs")

470 if not docs_dir.exists():

471 return {}

472

473 nav_data = {"title": "Documentation", "children": []}

474

475 for item in sorted(docs_dir.iterdir()):

476 if item.is_file() and item.suffix == ".md":

477 nav_data["children"].append(

478 {

479 "title": self._humanize_title(item.stem),

480 "url": f"docs/{item.name}",

481 }

482 )

483 elif item.is_dir():

484 nav_data["children"].append(

485 {

486 "title": self._humanize_title(item.name),

487 "url": f"docs/{item.name}/",

488 }

489 )

490

491 self.docs_nav_data = nav_data

492 return nav_data

493

494 def generate_seo_files(self) -> None:

495 """Generate SEO files like sitemap.xml and robots.txt."""

496 from datetime import datetime

497

498 # Determine base site URL

499 site_base = (

500 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"

501 )

502

503 # Get current date for lastmod

504 current_date = datetime.now().strftime("%Y-%m-%d")

505

506 # Generate simple sitemap.xml

507 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?>

508<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

509 <url>

510 <loc>{site_base}/</loc>

511 <lastmod>{current_date}</lastmod>

512 <changefreq>weekly</changefreq>

513 <priority>1.0</priority>

514 </url>

515 <url>

516 <loc>{site_base}/docs/</loc>

517 <lastmod>{current_date}</lastmod>

518 <changefreq>weekly</changefreq>

519 <priority>0.8</priority>

520 </url>

521</urlset>"""

522

523 sitemap_path = self.output_dir / "sitemap.xml"

524 with open(sitemap_path, "w", encoding="utf-8") as f:

525 f.write(sitemap_content)

526 print("📄 Generated sitemap.xml")

527

528 # Generate simple robots.txt

529 robots_content = f"""User-agent: *

530Allow: /

531

532Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml

533"""

534

535 robots_path = self.output_dir / "robots.txt"

536 with open(robots_path, "w", encoding="utf-8") as f:

537 f.write(robots_content.replace("https://example.com", site_base))

538 print("📄 Generated robots.txt")

539

540 def generate_robots_file(self) -> None:

541 """Generate only robots.txt referencing the sitemap URL."""

542 site_base = (

543 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"

544 )

545 robots_content = f"""User-agent: *

546Allow: /

547

548Sitemap: {site_base}/sitemap.xml

549"""

550 robots_path = self.output_dir / "robots.txt"

551 with open(robots_path, "w", encoding="utf-8") as f:

552 f.write(robots_content)

553 print("📄 Generated robots.txt")

554

555 def generate_dynamic_sitemap(

556 self, date: str = None, pages: list[str] = None

557 ) -> str:

558 """Generate dynamic sitemap with custom pages."""

559 from datetime import datetime

560

561 base_url = (

562 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"

563 )

564

565 # Auto-discover pages if not provided

566 if pages is None:

567 pages = []

568 # Find HTML files in site directory

569 if self.output_dir.exists():

570 for html_file in self.output_dir.rglob("*.html"):

571 rel_path = str(html_file.relative_to(self.output_dir))

572 pages.append(rel_path)

573

574 # Use provided date or current date

575 if date is None:

576 date = datetime.now().strftime("%Y-%m-%d")

577

578 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n'

579 sitemap_content += (

580 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'

581 )

582

583 for page in pages:

584 sitemap_content += " <url>\n"

585 sitemap_content += f" <loc>{base_url}/{page}</loc>\n"

586 sitemap_content += f" <lastmod>{date}</lastmod>\n"

587 sitemap_content += " <changefreq>weekly</changefreq>\n"

588 sitemap_content += " <priority>0.8</priority>\n"

589 sitemap_content += " </url>\n"

590

591 sitemap_content += "</urlset>"

592

593 # Write sitemap to file

594 sitemap_path = self.output_dir / "sitemap.xml"

595 sitemap_path.parent.mkdir(parents=True, exist_ok=True)

596 with open(sitemap_path, "w", encoding="utf-8") as f:

597 f.write(sitemap_content)

598 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages")

599

600 return sitemap_content

601

602 def build_markdown_page(

603 self,

604 markdown_file: str,

605 output_path: str,

606 title: str = "",

607 breadcrumb: str = "",

608 **kwargs,

609 ) -> None:

610 """Build a page from markdown file."""

611 markdown_path = Path(markdown_file)

612 if not markdown_path.exists():

613 print(

614 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation"

615 )

616 return

617

618 try:

619 with open(markdown_path, encoding="utf-8") as f:

620 markdown_content = f.read()

621 except Exception as e:

622 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}")

623 return

624

625 # Extract title if not provided

626 if not title:

627 title = self.extract_title_from_markdown(markdown_content)

628

629 # Normalize links in markdown before conversion

630 markdown_content = self.markdown_processor.convert_markdown_links_to_html(

631 markdown_content, str(markdown_path)

632 )

633

634 # Convert markdown to HTML

635 html_content = self.markdown_to_html(

636 markdown_content, str(markdown_path), output_path

637 )

638 # Normalize any remaining HTML hrefs

639 html_content = self.markdown_processor.convert_markdown_links_to_html(

640 html_content, str(markdown_path)

641 )

642

643 # Build a Table of Contents and wrap in docs layout

644 toc_html = self.render_toc(html_content)

645 if toc_html:

646 toc_html = self.add_bootstrap_classes(toc_html)

647

648 wrapped_content = f"""

649<section>

650 <div class=\"container-fluid\">

651 <div class=\"row toc-layout\">

652 <aside class=\"toc-sidebar d-none d-lg-block p-0\">

653 <div class=\"position-sticky\">

654 {toc_html or '<div class=\"text-muted small\">No sections</div>'}

655 </div>

656 </aside>

657 <div class=\"container-content\">

658 {html_content}

659 </div>

660 </div>

661</div>

662</section>

663"""

664

665 # Build the page

666 self.build_page(

667 "base.html",

668 output_path,

669 title,

670 f"{title} - QDrant Loader",

671 output_path,

672 content=wrapped_content,

673 breadcrumb=breadcrumb,

674 **kwargs,

675 )

676

677 def build_docs_structure(self) -> dict:

678 """Build documentation directory structure."""

679 docs_dir = Path("docs")

680 structure = {"title": "Documentation", "children": []}

681

682 # Create docs output directory

683 docs_output_dir = self.output_dir / "docs"

684 docs_output_dir.mkdir(parents=True, exist_ok=True)

685

686 if not docs_dir.exists():

687 return structure

688

689 # Process all markdown files in docs

690 for item in sorted(docs_dir.rglob("*.md")):

691 relative_path = str(item.relative_to(docs_dir))

692 output_path = relative_path.replace(".md", ".html")

693

694 structure["children"].append(

695 {

696 "title": self._humanize_title(item.stem),

697 "path": relative_path,

698 "url": f"docs/{output_path}",

699 }

700 )

701

702 # Build the page from markdown

703 try:

704 self.build_markdown_page(

705 str(item),

706 f"docs/{output_path}",

707 title=self._humanize_title(item.stem),

708 )

709 except Exception as e:

710 print(f"⚠️ Failed to build docs page {item}: {e}")

711

712 return structure

713

714 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict:

715 """Build coverage report structure."""

716 # Always create coverage output directory

717 coverage_output_dir = self.output_dir / "coverage"

718 coverage_output_dir.mkdir(parents=True, exist_ok=True)

719

720 if not coverage_dir:

721 return {"coverage_reports": []}

722

723 coverage_path = Path(coverage_dir)

724 if not coverage_path.exists():

725 return {"coverage_reports": []}

726

727 # Copy all coverage files with proper naming

728 import shutil

729

730 for item in coverage_path.iterdir():

731 # Map directory names to cleaner package names

732 dest_name = item.name

733 if item.is_dir():

734 if "htmlcov-loader" in item.name:

735 dest_name = "loader"

736 elif "htmlcov-mcp" in item.name:

737 dest_name = "mcp"

738 elif "htmlcov-website" in item.name:

739 dest_name = "website"

740 elif (

741 "htmlcov-core" in item.name

742 or "htmlcov-qdrant-loader-core" in item.name

743 ):

744 dest_name = "core"

745 elif "htmlcov" in item.name:

746 dest_name = item.name.replace("htmlcov-", "").replace(

747 "htmlcov_", ""

748 )

749

750 dest_path = coverage_output_dir / dest_name

751 try:

752 if item.is_file():

753 shutil.copy2(item, dest_path)

754 elif item.is_dir():

755 if dest_path.exists():

756 shutil.rmtree(dest_path)

757 shutil.copytree(item, dest_path)

758 print(f"📁 Copied coverage: {item.name} -> {dest_name}")

759 except Exception as e:

760 print(f"⚠️ Failed to copy coverage file {item}: {e}")

761

762 # Build reports list using the renamed directories

763 reports = []

764 for subdir in coverage_output_dir.iterdir():

765 if subdir.is_dir():

766 index_file = subdir / "index.html"

767 if index_file.exists():

768 reports.append(

769 {

770 "name": subdir.name,

771 "path": f"{subdir.name}/index.html",

772 "url": f"coverage/{subdir.name}/index.html",

773 }

774 )

775

776 # Create main coverage index page using site template when reports exist

777 if reports:

778 # Build coverage index with Bootstrap styling

779 index_content = """

780<section class=\"py-5\">

781 <div class=\"container\">

782 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1>

783 <div class=\"row g-4\">"""

784

785 for report in reports:

786 if report["name"] == "loader":

787 index_content += """

788 <div class="col-lg-6">

789 <div class="card">

790 <div class="card-header">

791 <h4>QDrant Loader Core</h4>

792 <span id="loader-test-indicator" class="badge">Loading...</span>

793 </div>

794 <div class="card-body">

795 <div id="loader-coverage">Loader coverage data</div>

796 <a href="loader/" class="btn btn-primary">View Detailed Report</a>

797 </div>

798 </div>

799 </div>"""

800 elif report["name"] == "mcp":

801 index_content += """

802 <div class="col-lg-6">

803 <div class="card">

804 <div class="card-header">

805 <h4>MCP Server</h4>

806 <span id="mcp-test-indicator" class="badge">Loading...</span>

807 </div>

808 <div class="card-body">

809 <div id="mcp-coverage">MCP Server coverage data</div>

810 <a href="mcp/" class="btn btn-success">View Detailed Report</a>

811 </div>

812 </div>

813 </div>"""

814 elif report["name"] == "website":

815 index_content += """

816 <div class="col-lg-6">

817 <div class="card">

818 <div class="card-header">

819 <h4>Website</h4>

820 <span id="website-test-indicator" class="badge">Loading...</span>

821 </div>

822 <div class="card-body">

823 <div id="website-coverage">Website coverage data</div>

824 <a href="website/" class="btn btn-info">View Detailed Report</a>

825 </div>

826 </div>

827 </div>"""

828 elif report["name"] == "core":

829 index_content += """

830 <div class="col-lg-6">

831 <div class="card">

832 <div class="card-header">

833 <h4>Core Library</h4>

834 <span id="core-test-indicator" class="badge">Loading...</span>

835 </div>

836 <div class="card-body">

837 <div id="core-coverage">Core library coverage data</div>

838 <a href="core/" class="btn btn-warning">View Detailed Report</a>

839 </div>

840 </div>

841 </div>"""

842

843 index_content += """

844 </div>

845 </div>

846</section>

847

848<script>

849// Compute and render coverage summary from status.json

850function coverageSummary(data){

851 try{

852 let total = 0, missing = 0;

853 if (data && data.files){

854 for (const k in data.files){

855 const f = data.files[k];

856 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null);

857 if (nums && typeof nums.n_statements === 'number'){

858 total += (nums.n_statements||0);

859 missing += (nums.n_missing||0);

860 }

861 }

862 }

863 // Fallback if a totals object exists

864 if (total === 0 && data && data.totals){

865 if (typeof data.totals.n_statements === 'number'){

866 total = data.totals.n_statements||0;

867 missing = data.totals.n_missing||0;

868 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){

869 total = data.totals.num_statements;

870 missing = total - data.totals.covered_lines;

871 }

872 }

873 if (total > 0){

874 const covered = Math.max(0, total - missing);

875 const pct = Math.round((covered/total)*1000)/10; // one decimal

876 return {pct, covered, total};

877 }

878 } catch(e){}

879 return null;

880}

881

882function renderCoverage(id, summary){

883 const el = document.getElementById(id);

884 if (!el) return;

885 if (!summary){ el.textContent = 'Loaded'; return; }

886 const {pct, covered, total} = summary;

887 el.innerHTML = `

888 <div class="d-flex align-items-center">

889 <div class="progress flex-grow-1 me-2" style="height: 10px;">

890 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div>

891 </div>

892 <span class="small fw-semibold">${pct}% (${covered}/${total})</span>

893 </div>`;

894}

895

896fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{});

897fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{});

898fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{});

899fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{});

900</script>

901"""

902 # Render through site template for full styling/navigation

903 self.build_page(

904 "base.html",

905 "coverage/index.html",

906 "Coverage Reports",

907 "Test coverage analysis",

908 "coverage/index.html",

909 content=index_content,

910 )

911 print("📄 Generated coverage index.html")

912

913 return {"coverage_reports": reports}

914

915 def build_package_docs(self) -> None:

916 """Build documentation pages from package README files into docs/packages.

917

918 Maps package README.md files to site docs under:

919 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html

920 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html

921 - packages/qdrant-loader-core -> docs/packages/core/README.html

922 """

923 package_mappings: list[tuple[str, str, str]] = [

924 ("qdrant-loader", "qdrant-loader", "QDrant Loader"),

925 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"),

926 ("qdrant-loader-core", "core", "Core Library"),

927 ]

928

929 for pkg_name, alias, display_name in package_mappings:

930 readme_path = Path("packages") / pkg_name / "README.md"

931 if not readme_path.exists():

932 continue

933

934 try:

935 with open(readme_path, encoding="utf-8") as f:

936 markdown_content = f.read()

937

938 # Normalize links in markdown before conversion

939 normalized_md = self.markdown_processor.convert_markdown_links_to_html(

940 markdown_content

941 )

942

943 html_content = self.markdown_to_html(

944 normalized_md,

945 str(readme_path),

946 f"docs/packages/{alias}/README.html",

947 )

948 # Normalize any remaining HTML hrefs

949 html_content = self.markdown_processor.convert_markdown_links_to_html(

950 html_content, str(readme_path), f"docs/packages/{alias}/README.html"

951 )

952

953 # Final hardening for package README links: collapse relative ../../docs to /docs

954 try:

955 html_content = re.sub(

956 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content

957 )

958 # Convert README root files and .md links under docs to .html

959 html_content = re.sub(

960 r'href="(?:\.{2}/)+CONTRIBUTING\.md"',

961 'href="/docs/CONTRIBUTING.html"',

962 html_content,

963 )

964 html_content = re.sub(

965 r'href="(?:\.{2}/)+LICENSE(\.html)?"',

966 'href="/docs/LICENSE.html"',

967 html_content,

968 )

969 html_content = re.sub(

970 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"',

971 r'href="/docs/\1.html\2"',

972 html_content,

973 )

974 except Exception:

975 pass

976

977 # Build a Table of Contents and wrap with standard docs layout for consistent look

978 toc_html = self.render_toc(html_content)

979 if toc_html:

980 toc_html = self.add_bootstrap_classes(toc_html)

981

982 wrapped_content = f"""

983<section>

984 <div class=\"container-fluid\">

985 <div class=\"row toc-layout\">

986 <aside class=\"toc-sidebar d-none d-lg-block p-0\">

987 <div class=\"position-sticky\">

988 {toc_html or '<div class=\"text-muted small\">No sections</div>'}

989 </div>

990 </aside>

991 <div class=\"container-content\">

992 {html_content}

993 </div>

994 </div>

995 </div>

996</section>

997"""

998

999 output_path = f"docs/packages/{alias}/README.html"

1000 self.build_page(

1001 "base.html",

1002 output_path,

1003 f"{display_name} - README",

1004 f"{display_name} Documentation",

1005 output_path,

1006 content=wrapped_content,

1007 )

1008 except Exception as e:

1009 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}")

1010

1011 def generate_directory_indexes(self) -> None:

1012 """Generate index files for directories."""

1013 # Look in both source docs and output site docs directories

1014 source_docs_dir = Path("docs")

1015 site_docs_dir = self.output_dir / "docs"

1016

1017 # Process directories in both locations

1018 for docs_dir in [source_docs_dir, site_docs_dir]:

1019 if not docs_dir.exists():

1020 continue

1021

1022 for directory in docs_dir.rglob("*"):

1023 if directory.is_dir():

1024 # Look for README or index files in various formats

1025 readme_md = directory / "README.md"

1026 readme_html = directory / "README.html"

1027 index_md = directory / "index.md"

1028 index_html = directory / "index.html"

1029

1030 # Determine source file

1031 source_file = None

1032 if readme_md.exists():

1033 source_file = readme_md

1034 elif index_md.exists():

1035 source_file = index_md

1036 elif readme_html.exists():

1037 source_file = readme_html

1038 elif index_html.exists():

1039 source_file = index_html

1040

1041 if source_file:

1042 try:

1043 if docs_dir == site_docs_dir:

1044 # For files in site directory, create/overwrite index.html directly there

1045 index_file = directory / "index.html"

1046 if source_file.suffix == ".html":

1047 # Copy HTML file content directly (always overwrite to avoid stale links)

1048 content = source_file.read_text(encoding="utf-8")

1049 index_file.write_text(content, encoding="utf-8")

1050 print(

1051 f"📄 Generated index.html from {source_file.name}"

1052 )

1053 else:

1054 # For source files, process through normal build pipeline

1055 relative_dir = directory.relative_to(docs_dir)

1056 output_path = f"docs/{relative_dir}/index.html"

1057

1058 if source_file.suffix == ".html":

1059 # Copy HTML file content directly

1060 content = source_file.read_text(encoding="utf-8")

1061 self.build_page(

1062 "base.html",

1063 output_path,

1064 self._humanize_title(directory.name),

1065 f"{self._humanize_title(directory.name)} Documentation",

1066 output_path,

1067 content=content,

1068 )

1069 else:

1070 # Process markdown file

1071 self.build_markdown_page(

1072 str(source_file),

1073 output_path,

1074 title=self._humanize_title(directory.name),

1075 )

1076 except Exception as e:

1077 print(f"⚠️ Failed to generate index for {directory}: {e}")

1078

1079 def build_license_page(

1080 self,

1081 source_file: str = "LICENSE",

1082 output_file: str = "license.html",

1083 title: str = "License",

1084 description: str = "License",

1085 ) -> None:

1086 """Build license page from LICENSE file."""

1087 license_path = Path(source_file)

1088 if not license_path.exists():

1089 print(f"⚠️ License file not found: {source_file}, skipping license page")

1090 return

1091

1092 try:

1093 with open(license_path, encoding="utf-8") as f:

1094 license_content = f.read()

1095

1096 # Create license page with heading

1097 html_content = f"""

1098 <h1>License Information</h1>

1099 <div class="license-content">

1100 <pre>{license_content}</pre>

1101 </div>

1102 """

1103

1104 self.build_page(

1105 "base.html",

1106 output_file,

1107 title,

1108 description,

1109 output_file,

1110 content=html_content,

1111 )

1112 except Exception as e:

1113 print(f"⚠️ Failed to build license page: {e}")

Coverage for website / builder / core.py: 89%

406 statements