Coverage for website / builder / core.py: 89%
407 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-06-11 09:34 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-06-11 09:34 +0000
1"""
2Core Website Builder - Main Orchestration and Lifecycle Management.
4This module implements the main WebsiteBuilder class that orchestrates
5all build operations and manages the overall build lifecycle.
6"""
8import json
9import re
10import subprocess
11from datetime import UTC
12from pathlib import Path
14from .assets import AssetManager
15from .markdown import MarkdownProcessor
16from .templates import TemplateProcessor
19class WebsiteBuilder:
20 """Builds the QDrant Loader documentation website from templates."""
22 def __init__(
23 self, templates_dir: str = "website/templates", output_dir: str = "site"
24 ):
25 """Initialize the website builder."""
26 self.templates_dir = Path(templates_dir)
27 self.output_dir = Path(output_dir)
28 self.base_url = ""
29 # Cached docs navigation data (built once per run)
30 self.docs_nav_data: dict | None = None
32 # Initialize component processors
33 self.template_processor = TemplateProcessor(templates_dir)
34 self.markdown_processor = MarkdownProcessor()
35 self.asset_manager = AssetManager(output_dir)
37 # Delegate core operations to specialized processors
38 def load_template(self, template_name: str) -> str:
39 """Load a template file."""
40 return self.template_processor.load_template(template_name)
42 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str:
43 """Replace placeholders in content with actual values."""
44 return self.template_processor.replace_placeholders(content, replacements)
46 def markdown_to_html(
47 self, markdown_content: str, source_file: str = "", output_file: str = ""
48 ) -> str:
49 """Convert markdown to HTML with Bootstrap styling."""
50 return self.markdown_processor.markdown_to_html(
51 markdown_content, source_file, output_file
52 )
54 def copy_assets(self) -> None:
55 """Copy all website assets to output directory."""
56 return self.asset_manager.copy_assets()
58 def extract_title_from_markdown(self, markdown_content: str) -> str:
59 """Extract title from markdown content."""
60 return self.markdown_processor.extract_title_from_markdown(markdown_content)
62 # Additional markdown processing methods
63 def basic_markdown_to_html(self, markdown_content: str) -> str:
64 """Basic markdown to HTML conversion."""
65 return self.markdown_processor.basic_markdown_to_html(markdown_content)
67 def convert_markdown_links_to_html(
68 self, markdown_content: str, source_file: str = "", target_dir: str = ""
69 ) -> str:
70 """Convert markdown links to HTML format."""
71 return self.markdown_processor.convert_markdown_links_to_html(
72 markdown_content, source_file, target_dir
73 )
75 def add_bootstrap_classes(self, html_content: str) -> str:
76 """Add Bootstrap classes to HTML elements."""
77 return self.markdown_processor.add_bootstrap_classes(html_content)
79 def render_toc(self, html_content: str) -> str:
80 """Generate table of contents from HTML headings."""
81 return self.markdown_processor.render_toc(html_content)
83 # Additional asset management methods
84 def copy_static_files(self, static_files: list[str]) -> None:
85 """Copy multiple static files."""
86 return self.asset_manager.copy_static_files(static_files)
88 def get_git_timestamp(self, source_path: str) -> str:
89 """Get the last modified timestamp from Git."""
90 try:
91 result = subprocess.run(
92 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path],
93 capture_output=True,
94 text=True,
95 cwd=".",
96 )
97 if result.returncode == 0 and result.stdout.strip():
98 return result.stdout.strip()
99 except (subprocess.CalledProcessError, FileNotFoundError):
100 pass
101 return ""
103 def _humanize_title(self, name: str) -> str:
104 """Convert filename to human-readable title."""
105 # Remove file extension and common prefixes
106 title = (
107 name.replace(".md", "")
108 .replace("README", "")
109 .replace("_", " ")
110 .replace("-", " ")
111 )
113 # Handle common patterns
114 title_mappings = {
115 "cli reference": "CLI Reference",
116 "api": "API",
117 "faq": "FAQ",
118 "toc": "Table of Contents",
119 "readme": "Overview",
120 }
122 title_lower = title.lower().strip()
123 if title_lower in title_mappings:
124 return title_mappings[title_lower]
126 # Capitalize words
127 return " ".join(word.capitalize() for word in title.split())
129 def generate_project_info(self, **kwargs) -> dict:
130 """Generate project information for templates."""
131 project_info = {
132 "name": "QDrant Loader",
133 "version": "0.4.0b1",
134 "description": "Enterprise-ready vector database toolkit",
135 "github_url": "https://github.com/martin-papy/qdrant-loader",
136 }
138 # Override with any provided kwargs
139 project_info.update(kwargs)
141 # Try to load from pyproject.toml
142 try:
143 import tomli
145 with open("pyproject.toml", "rb") as f:
146 pyproject = tomli.load(f)
147 project_section = pyproject.get("project", {})
148 project_info.update(
149 {
150 "name": project_section.get("name", project_info["name"]),
151 "version": project_section.get(
152 "version", project_info["version"]
153 ),
154 "description": project_section.get(
155 "description", project_info["description"]
156 ),
157 }
158 )
159 # Normalize workspace naming to product name
160 if isinstance(project_info.get("name"), str) and project_info[
161 "name"
162 ].endswith("-workspace"):
163 project_info["name"] = "QDrant Loader"
165 # Try to get homepage/repository from pyproject urls
166 urls = (
167 project_section.get("urls", {})
168 if isinstance(project_section, dict)
169 else {}
170 )
171 homepage = urls.get("Homepage")
172 if (
173 homepage
174 and not getattr(self, "base_url_user_set", False)
175 and not self.base_url
176 ):
177 # Set base_url from pyproject if not provided externally
178 self.base_url = homepage.rstrip("/")
179 repo_url = urls.get("Repository") or urls.get("Source")
180 if repo_url:
181 project_info["github_url"] = repo_url
182 except Exception:
183 # Ignore malformed project section entries
184 pass
186 # Try to get git information
187 try:
188 import subprocess
190 # Get git commit hash
191 result = subprocess.run(
192 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True
193 )
194 project_info["commit_hash"] = result.stdout.strip()
196 # Get git commit date
197 result = subprocess.run(
198 ["git", "log", "-1", "--format=%ci"],
199 capture_output=True,
200 text=True,
201 check=True,
202 )
203 project_info["commit_date"] = result.stdout.strip()
205 except (subprocess.CalledProcessError, FileNotFoundError):
206 # Git not available or not a git repository
207 pass
209 # Add build metadata
210 from datetime import datetime
212 commit_hash = project_info.get("commit_hash", "")
213 project_info["commit"] = {
214 "hash": commit_hash,
215 "short": commit_hash[:7] if isinstance(commit_hash, str) else "",
216 "date": project_info.get("commit_date", ""),
217 }
218 project_info["build"] = {
219 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z"
220 }
222 # Write project info JSON file
223 project_info_path = self.output_dir / "project-info.json"
224 project_info_path.parent.mkdir(parents=True, exist_ok=True)
225 with open(project_info_path, "w", encoding="utf-8") as f:
226 json.dump(project_info, f, indent=2)
228 return project_info
230 def build_page(
231 self,
232 template_name: str,
233 output_filename: str,
234 title: str,
235 description: str,
236 canonical_path: str,
237 content: str = "",
238 **extra_replacements,
239 ) -> None:
240 """Build a single page from template."""
241 template_content = self.load_template(template_name)
243 # Load a content template if available when no explicit content is given.
244 # For pages where output and canonical differ, missing content should raise.
245 # For pages where they are the same (e.g., index.html), load content if
246 # the template exists, otherwise fall back to empty content.
247 if not content:
248 try:
249 content = self.load_template(output_filename)
250 except FileNotFoundError:
251 if output_filename != canonical_path:
252 # Maintain behavior for explicit content templates
253 raise
254 # Otherwise, leave content empty
256 project_info = self.generate_project_info()
258 # Calculate base URL for relative paths
259 if canonical_path.count("/") > 0:
260 base_url = "../" * canonical_path.count("/")
261 else:
262 # Normalize root base URL
263 if self.base_url:
264 base_url = self.base_url.rstrip("/") + "/"
265 else:
266 base_url = "./"
268 # Merge extra replacements ensuring defaults for optional placeholders
269 extras = dict(extra_replacements)
270 extras.setdefault("additional_head", "")
271 extras.setdefault("additional_scripts", "")
273 replacements = {
274 "page_title": title,
275 "page_description": description,
276 "content": content,
277 "base_url": base_url,
278 "canonical_url": (
279 self.base_url.rstrip("/") + "/" + canonical_path
280 if self.base_url
281 else canonical_path
282 ),
283 "author": project_info.get("name", "QDrant Loader"),
284 "version": project_info.get("version", "0.4.0b1"),
285 "project_name": project_info["name"],
286 "project_version": project_info["version"],
287 "project_description": project_info["description"],
288 **extras,
289 }
291 final_content = self.replace_placeholders(template_content, replacements)
293 output_path = self.output_dir / output_filename
294 output_path.parent.mkdir(parents=True, exist_ok=True)
296 with open(output_path, "w", encoding="utf-8") as f:
297 f.write(final_content)
299 print(f"📄 Built {output_filename}")
301 def build_site(
302 self,
303 coverage_artifacts_dir: str | None = None,
304 test_results_dir: str | None = None,
305 ) -> None:
306 """Build the complete website."""
307 print("🏗️ Building QDrant Loader website...")
309 # Create output directory
310 self.output_dir.mkdir(parents=True, exist_ok=True)
312 # Copy assets first
313 self.copy_assets()
315 # Generate project info
316 self.generate_project_info()
318 # Build main pages
319 self.build_page(
320 "base.html",
321 "index.html",
322 "Home",
323 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.",
324 "index.html",
325 )
327 # Build a friendly 404 page
328 try:
329 self.build_page(
330 "base.html",
331 "404.html",
332 "Page Not Found",
333 "The page you are looking for does not exist.",
334 "404.html",
335 content=self.load_template("404.html"),
336 )
337 except Exception as e:
338 print(f"⚠️ Failed to build 404 page: {e}")
340 # Build docs structure and pages
341 self.build_docs_nav()
342 _docs_structure = self.build_docs_structure()
344 # Create docs directory and index
345 docs_output_dir = self.output_dir / "docs"
346 docs_output_dir.mkdir(exist_ok=True)
348 # Build docs index page using dedicated template content
349 self.build_page(
350 "base.html",
351 "docs/index.html",
352 "Documentation",
353 "QDrant Loader Documentation",
354 "docs/index.html",
355 content=self.load_template("docs-index.html"),
356 )
358 # Bridge root docs from repository top-level files
359 try:
360 if Path("README.md").exists():
361 self.build_markdown_page("README.md", "docs/README.html")
362 if Path("CHANGELOG.md").exists():
363 self.build_markdown_page("CHANGELOG.md", "docs/CHANGELOG.html")
364 if Path("CONTRIBUTING.md").exists():
365 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html")
366 # License (plain text) rendered via helper
367 if Path("LICENSE").exists():
368 self.build_license_page(
369 "LICENSE", "docs/LICENSE.html", "License", "License"
370 )
371 # Privacy policy page from template
372 try:
373 privacy_template_path = self.templates_dir / "privacy-policy.html"
374 privacy_last_updated = self.get_git_timestamp(
375 str(privacy_template_path)
376 )
377 if privacy_last_updated:
378 privacy_last_updated = privacy_last_updated.split("T", 1)[0]
379 else:
380 from datetime import datetime
382 # Use stable template mtime fallback instead of build date.
383 privacy_last_updated = (
384 datetime.fromtimestamp(
385 privacy_template_path.stat().st_mtime, tz=UTC
386 )
387 .date()
388 .isoformat()
389 )
391 self.build_page(
392 "base.html",
393 "privacy-policy.html",
394 "Privacy Policy",
395 "Privacy policy for QDrant Loader",
396 "privacy-policy.html",
397 content=self.load_template("privacy-policy.html"),
398 last_updated=privacy_last_updated,
399 )
400 except FileNotFoundError:
401 pass
402 except Exception as e:
403 print(f"⚠️ Failed to build root docs pages: {e}")
405 # Build package README documentation into docs/packages
406 try:
407 self.build_package_docs()
408 except Exception as e:
409 print(f"⚠️ Failed to build package docs: {e}")
411 # Always create coverage directory and ensure index.html exists
412 coverage_output_dir = self.output_dir / "coverage"
413 coverage_output_dir.mkdir(exist_ok=True)
415 # Build coverage reports if provided
416 if coverage_artifacts_dir:
417 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir)
419 # Copy coverage artifacts
420 coverage_path = Path(coverage_artifacts_dir)
421 if coverage_path.exists():
422 import shutil
424 for item in coverage_path.iterdir():
425 if item.is_file():
426 shutil.copy2(item, coverage_output_dir / item.name)
427 elif item.is_dir():
428 shutil.copytree(
429 item, coverage_output_dir / item.name, dirs_exist_ok=True
430 )
431 else:
432 # Create styled placeholder coverage index if no artifacts provided
433 placeholder_html = (
434 '<section class="py-5"><div class="container">'
435 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>'
436 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>'
437 "</div></section>"
438 )
439 self.build_page(
440 "base.html",
441 "coverage/index.html",
442 "Coverage Reports",
443 "Test coverage analysis",
444 "coverage/index.html",
445 content=placeholder_html,
446 )
448 # Generate directory indexes
449 self.generate_directory_indexes()
451 # Generate SEO files
452 # Build a dynamic sitemap including all HTML pages
453 try:
454 self.generate_dynamic_sitemap()
455 except Exception as e:
456 print(f"⚠️ Failed to generate dynamic sitemap: {e}")
458 # Always (re)write robots.txt pointing to the sitemap
459 try:
460 self.generate_robots_file()
461 except Exception as e:
462 print(f"⚠️ Failed to generate robots.txt: {e}")
464 # Create .nojekyll file for GitHub Pages
465 nojekyll_path = self.output_dir / ".nojekyll"
466 nojekyll_path.touch()
467 print("📄 Created .nojekyll file")
469 print("✅ Website build completed successfully!")
471 def build_docs_nav(self) -> dict:
472 """Build documentation navigation structure."""
473 # Simplified navigation building
474 docs_dir = Path("docs")
475 if not docs_dir.exists():
476 return {}
478 nav_data = {"title": "Documentation", "children": []}
480 for item in sorted(docs_dir.iterdir()):
481 if item.is_file() and item.suffix == ".md":
482 nav_data["children"].append(
483 {
484 "title": self._humanize_title(item.stem),
485 "url": f"docs/{item.name}",
486 }
487 )
488 elif item.is_dir():
489 nav_data["children"].append(
490 {
491 "title": self._humanize_title(item.name),
492 "url": f"docs/{item.name}/",
493 }
494 )
496 self.docs_nav_data = nav_data
497 return nav_data
499 def generate_seo_files(self) -> None:
500 """Generate SEO files like sitemap.xml and robots.txt."""
501 from datetime import datetime
503 # Determine base site URL
504 site_base = (
505 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"
506 )
508 # Get current date for lastmod
509 current_date = datetime.now().strftime("%Y-%m-%d")
511 # Generate simple sitemap.xml
512 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?>
513<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
514 <url>
515 <loc>{site_base}/</loc>
516 <lastmod>{current_date}</lastmod>
517 <changefreq>weekly</changefreq>
518 <priority>1.0</priority>
519 </url>
520 <url>
521 <loc>{site_base}/docs/</loc>
522 <lastmod>{current_date}</lastmod>
523 <changefreq>weekly</changefreq>
524 <priority>0.8</priority>
525 </url>
526</urlset>"""
528 sitemap_path = self.output_dir / "sitemap.xml"
529 with open(sitemap_path, "w", encoding="utf-8") as f:
530 f.write(sitemap_content)
531 print("📄 Generated sitemap.xml")
533 # Generate simple robots.txt
534 robots_content = f"""User-agent: *
535Allow: /
537Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml
538"""
540 robots_path = self.output_dir / "robots.txt"
541 with open(robots_path, "w", encoding="utf-8") as f:
542 f.write(robots_content.replace("https://example.com", site_base))
543 print("📄 Generated robots.txt")
545 def generate_robots_file(self) -> None:
546 """Generate only robots.txt referencing the sitemap URL."""
547 site_base = (
548 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"
549 )
550 robots_content = f"""User-agent: *
551Allow: /
553Sitemap: {site_base}/sitemap.xml
554"""
555 robots_path = self.output_dir / "robots.txt"
556 with open(robots_path, "w", encoding="utf-8") as f:
557 f.write(robots_content)
558 print("📄 Generated robots.txt")
560 def generate_dynamic_sitemap(
561 self, date: str = None, pages: list[str] = None
562 ) -> str:
563 """Generate dynamic sitemap with custom pages."""
564 from datetime import datetime
566 base_url = (
567 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"
568 )
570 # Auto-discover pages if not provided
571 if pages is None:
572 pages = []
573 # Find HTML files in site directory
574 if self.output_dir.exists():
575 for html_file in self.output_dir.rglob("*.html"):
576 rel_path = str(html_file.relative_to(self.output_dir))
577 pages.append(rel_path)
579 # Use provided date or current date
580 if date is None:
581 date = datetime.now().strftime("%Y-%m-%d")
583 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
584 sitemap_content += (
585 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
586 )
588 for page in pages:
589 sitemap_content += " <url>\n"
590 sitemap_content += f" <loc>{base_url}/{page}</loc>\n"
591 sitemap_content += f" <lastmod>{date}</lastmod>\n"
592 sitemap_content += " <changefreq>weekly</changefreq>\n"
593 sitemap_content += " <priority>0.8</priority>\n"
594 sitemap_content += " </url>\n"
596 sitemap_content += "</urlset>"
598 # Write sitemap to file
599 sitemap_path = self.output_dir / "sitemap.xml"
600 sitemap_path.parent.mkdir(parents=True, exist_ok=True)
601 with open(sitemap_path, "w", encoding="utf-8") as f:
602 f.write(sitemap_content)
603 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages")
605 return sitemap_content
607 def build_markdown_page(
608 self,
609 markdown_file: str,
610 output_path: str,
611 title: str = "",
612 breadcrumb: str = "",
613 **kwargs,
614 ) -> None:
615 """Build a page from markdown file."""
616 markdown_path = Path(markdown_file)
617 if not markdown_path.exists():
618 print(
619 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation"
620 )
621 return
623 try:
624 with open(markdown_path, encoding="utf-8") as f:
625 markdown_content = f.read()
626 except Exception as e:
627 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}")
628 return
630 # Extract title if not provided
631 if not title:
632 title = self.extract_title_from_markdown(markdown_content)
634 # Normalize links in markdown before conversion
635 markdown_content = self.markdown_processor.convert_markdown_links_to_html(
636 markdown_content, str(markdown_path)
637 )
639 # Convert markdown to HTML
640 html_content = self.markdown_to_html(
641 markdown_content, str(markdown_path), output_path
642 )
643 # Normalize any remaining HTML hrefs
644 html_content = self.markdown_processor.convert_markdown_links_to_html(
645 html_content, str(markdown_path)
646 )
648 # Build a Table of Contents and wrap in docs layout
649 toc_html = self.render_toc(html_content)
650 if toc_html:
651 toc_html = self.add_bootstrap_classes(toc_html)
653 wrapped_content = f"""
654<section>
655 <div class=\"container-fluid\">
656 <div class=\"row toc-layout\">
657 <aside class=\"toc-sidebar d-none d-lg-block p-0\">
658 <div class=\"position-sticky\">
659 {toc_html or '<div class=\"text-muted small\">No sections</div>'}
660 </div>
661 </aside>
662 <div class=\"container-content\">
663 {html_content}
664 </div>
665 </div>
666</div>
667</section>
668"""
670 # Build the page
671 self.build_page(
672 "base.html",
673 output_path,
674 title,
675 f"{title} - QDrant Loader",
676 output_path,
677 content=wrapped_content,
678 breadcrumb=breadcrumb,
679 **kwargs,
680 )
682 def build_docs_structure(self) -> dict:
683 """Build documentation directory structure."""
684 docs_dir = Path("docs")
685 structure = {"title": "Documentation", "children": []}
687 # Create docs output directory
688 docs_output_dir = self.output_dir / "docs"
689 docs_output_dir.mkdir(parents=True, exist_ok=True)
691 if not docs_dir.exists():
692 return structure
694 # Process all markdown files in docs
695 for item in sorted(docs_dir.rglob("*.md")):
696 relative_path = str(item.relative_to(docs_dir))
697 output_path = relative_path.replace(".md", ".html")
699 structure["children"].append(
700 {
701 "title": self._humanize_title(item.stem),
702 "path": relative_path,
703 "url": f"docs/{output_path}",
704 }
705 )
707 # Build the page from markdown
708 try:
709 self.build_markdown_page(
710 str(item),
711 f"docs/{output_path}",
712 title=self._humanize_title(item.stem),
713 )
714 except Exception as e:
715 print(f"⚠️ Failed to build docs page {item}: {e}")
717 return structure
719 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict:
720 """Build coverage report structure."""
721 # Always create coverage output directory
722 coverage_output_dir = self.output_dir / "coverage"
723 coverage_output_dir.mkdir(parents=True, exist_ok=True)
725 if not coverage_dir:
726 return {"coverage_reports": []}
728 coverage_path = Path(coverage_dir)
729 if not coverage_path.exists():
730 return {"coverage_reports": []}
732 # Copy all coverage files with proper naming
733 import shutil
735 for item in coverage_path.iterdir():
736 # Map directory names to cleaner package names
737 dest_name = item.name
738 if item.is_dir():
739 if "htmlcov-loader" in item.name:
740 dest_name = "loader"
741 elif "htmlcov-mcp" in item.name:
742 dest_name = "mcp"
743 elif "htmlcov-website" in item.name:
744 dest_name = "website"
745 elif (
746 "htmlcov-core" in item.name
747 or "htmlcov-qdrant-loader-core" in item.name
748 ):
749 dest_name = "core"
750 elif "htmlcov" in item.name:
751 dest_name = item.name.replace("htmlcov-", "").replace(
752 "htmlcov_", ""
753 )
755 dest_path = coverage_output_dir / dest_name
756 try:
757 if item.is_file():
758 shutil.copy2(item, dest_path)
759 elif item.is_dir():
760 if dest_path.exists():
761 shutil.rmtree(dest_path)
762 shutil.copytree(item, dest_path)
763 print(f"📁 Copied coverage: {item.name} -> {dest_name}")
764 except Exception as e:
765 print(f"⚠️ Failed to copy coverage file {item}: {e}")
767 # Build reports list using the renamed directories
768 reports = []
769 for subdir in coverage_output_dir.iterdir():
770 if subdir.is_dir():
771 index_file = subdir / "index.html"
772 if index_file.exists():
773 reports.append(
774 {
775 "name": subdir.name,
776 "path": f"{subdir.name}/index.html",
777 "url": f"coverage/{subdir.name}/index.html",
778 }
779 )
781 # Create main coverage index page using site template when reports exist
782 if reports:
783 # Build coverage index with Bootstrap styling
784 index_content = """
785<section class=\"py-5\">
786 <div class=\"container\">
787 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1>
788 <div class=\"row g-4\">"""
790 for report in reports:
791 if report["name"] == "loader":
792 index_content += """
793 <div class="col-lg-6">
794 <div class="card">
795 <div class="card-header">
796 <h4>QDrant Loader Core</h4>
797 <span id="loader-test-indicator" class="badge">Loading...</span>
798 </div>
799 <div class="card-body">
800 <div id="loader-coverage">Loader coverage data</div>
801 <a href="loader/" class="btn btn-primary">View Detailed Report</a>
802 </div>
803 </div>
804 </div>"""
805 elif report["name"] == "mcp":
806 index_content += """
807 <div class="col-lg-6">
808 <div class="card">
809 <div class="card-header">
810 <h4>MCP Server</h4>
811 <span id="mcp-test-indicator" class="badge">Loading...</span>
812 </div>
813 <div class="card-body">
814 <div id="mcp-coverage">MCP Server coverage data</div>
815 <a href="mcp/" class="btn btn-success">View Detailed Report</a>
816 </div>
817 </div>
818 </div>"""
819 elif report["name"] == "website":
820 index_content += """
821 <div class="col-lg-6">
822 <div class="card">
823 <div class="card-header">
824 <h4>Website</h4>
825 <span id="website-test-indicator" class="badge">Loading...</span>
826 </div>
827 <div class="card-body">
828 <div id="website-coverage">Website coverage data</div>
829 <a href="website/" class="btn btn-info">View Detailed Report</a>
830 </div>
831 </div>
832 </div>"""
833 elif report["name"] == "core":
834 index_content += """
835 <div class="col-lg-6">
836 <div class="card">
837 <div class="card-header">
838 <h4>Core Library</h4>
839 <span id="core-test-indicator" class="badge">Loading...</span>
840 </div>
841 <div class="card-body">
842 <div id="core-coverage">Core library coverage data</div>
843 <a href="core/" class="btn btn-warning">View Detailed Report</a>
844 </div>
845 </div>
846 </div>"""
848 index_content += """
849 </div>
850 </div>
851</section>
853<script>
854// Compute and render coverage summary from status.json
855function coverageSummary(data){
856 try{
857 let total = 0, missing = 0;
858 if (data && data.files){
859 for (const k in data.files){
860 const f = data.files[k];
861 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null);
862 if (nums && typeof nums.n_statements === 'number'){
863 total += (nums.n_statements||0);
864 missing += (nums.n_missing||0);
865 }
866 }
867 }
868 // Fallback if a totals object exists
869 if (total === 0 && data && data.totals){
870 if (typeof data.totals.n_statements === 'number'){
871 total = data.totals.n_statements||0;
872 missing = data.totals.n_missing||0;
873 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){
874 total = data.totals.num_statements;
875 missing = total - data.totals.covered_lines;
876 }
877 }
878 if (total > 0){
879 const covered = Math.max(0, total - missing);
880 const pct = Math.round((covered/total)*1000)/10; // one decimal
881 return {pct, covered, total};
882 }
883 } catch(e){}
884 return null;
885}
887function renderCoverage(id, summary){
888 const el = document.getElementById(id);
889 if (!el) return;
890 if (!summary){ el.textContent = 'Loaded'; return; }
891 const {pct, covered, total} = summary;
892 el.innerHTML = `
893 <div class="d-flex align-items-center">
894 <div class="progress flex-grow-1 me-2" style="height: 10px;">
895 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div>
896 </div>
897 <span class="small fw-semibold">${pct}% (${covered}/${total})</span>
898 </div>`;
899}
901fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{});
902fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{});
903fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{});
904fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{});
905</script>
906"""
907 # Render through site template for full styling/navigation
908 self.build_page(
909 "base.html",
910 "coverage/index.html",
911 "Coverage Reports",
912 "Test coverage analysis",
913 "coverage/index.html",
914 content=index_content,
915 )
916 print("📄 Generated coverage index.html")
918 return {"coverage_reports": reports}
920 def build_package_docs(self) -> None:
921 """Build documentation pages from package README files into docs/packages.
923 Maps package README.md files to site docs under:
924 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html
925 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html
926 - packages/qdrant-loader-core -> docs/packages/core/README.html
927 """
928 package_mappings: list[tuple[str, str, str]] = [
929 ("qdrant-loader", "qdrant-loader", "QDrant Loader"),
930 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"),
931 ("qdrant-loader-core", "core", "Core Library"),
932 ]
934 for pkg_name, alias, display_name in package_mappings:
935 readme_path = Path("packages") / pkg_name / "README.md"
936 if not readme_path.exists():
937 continue
939 try:
940 with open(readme_path, encoding="utf-8") as f:
941 markdown_content = f.read()
943 # Normalize links in markdown before conversion
944 normalized_md = self.markdown_processor.convert_markdown_links_to_html(
945 markdown_content
946 )
948 html_content = self.markdown_to_html(
949 normalized_md,
950 str(readme_path),
951 f"docs/packages/{alias}/README.html",
952 )
953 # Normalize any remaining HTML hrefs
954 html_content = self.markdown_processor.convert_markdown_links_to_html(
955 html_content, str(readme_path), f"docs/packages/{alias}/README.html"
956 )
958 # Final hardening for package README links: collapse relative ../../docs to /docs
959 try:
960 html_content = re.sub(
961 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content
962 )
963 # Convert README root files and .md links under docs to .html
964 html_content = re.sub(
965 r'href="(?:\.{2}/)+CONTRIBUTING\.md"',
966 'href="/docs/CONTRIBUTING.html"',
967 html_content,
968 )
969 html_content = re.sub(
970 r'href="(?:\.{2}/)+LICENSE(\.html)?"',
971 'href="/docs/LICENSE.html"',
972 html_content,
973 )
974 html_content = re.sub(
975 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"',
976 r'href="/docs/\1.html\2"',
977 html_content,
978 )
979 except Exception:
980 pass
982 # Build a Table of Contents and wrap with standard docs layout for consistent look
983 toc_html = self.render_toc(html_content)
984 if toc_html:
985 toc_html = self.add_bootstrap_classes(toc_html)
987 wrapped_content = f"""
988<section>
989 <div class=\"container-fluid\">
990 <div class=\"row toc-layout\">
991 <aside class=\"toc-sidebar d-none d-lg-block p-0\">
992 <div class=\"position-sticky\">
993 {toc_html or '<div class=\"text-muted small\">No sections</div>'}
994 </div>
995 </aside>
996 <div class=\"container-content\">
997 {html_content}
998 </div>
999 </div>
1000 </div>
1001</section>
1002"""
1004 output_path = f"docs/packages/{alias}/README.html"
1005 self.build_page(
1006 "base.html",
1007 output_path,
1008 f"{display_name} - README",
1009 f"{display_name} Documentation",
1010 output_path,
1011 content=wrapped_content,
1012 )
1013 except Exception as e:
1014 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}")
1016 def generate_directory_indexes(self) -> None:
1017 """Generate index files for directories."""
1018 # Look in both source docs and output site docs directories
1019 source_docs_dir = Path("docs")
1020 site_docs_dir = self.output_dir / "docs"
1022 # Process directories in both locations
1023 for docs_dir in [source_docs_dir, site_docs_dir]:
1024 if not docs_dir.exists():
1025 continue
1027 for directory in docs_dir.rglob("*"):
1028 if directory.is_dir():
1029 # Look for README or index files in various formats
1030 readme_md = directory / "README.md"
1031 readme_html = directory / "README.html"
1032 index_md = directory / "index.md"
1033 index_html = directory / "index.html"
1035 # Determine source file
1036 source_file = None
1037 if readme_md.exists():
1038 source_file = readme_md
1039 elif index_md.exists():
1040 source_file = index_md
1041 elif readme_html.exists():
1042 source_file = readme_html
1043 elif index_html.exists():
1044 source_file = index_html
1046 if source_file:
1047 try:
1048 if docs_dir == site_docs_dir:
1049 # For files in site directory, create/overwrite index.html directly there
1050 index_file = directory / "index.html"
1051 if source_file.suffix == ".html":
1052 # Copy HTML file content directly (always overwrite to avoid stale links)
1053 content = source_file.read_text(encoding="utf-8")
1054 index_file.write_text(content, encoding="utf-8")
1055 print(
1056 f"📄 Generated index.html from {source_file.name}"
1057 )
1058 else:
1059 # For source files, process through normal build pipeline
1060 relative_dir = directory.relative_to(docs_dir)
1061 output_path = f"docs/{relative_dir}/index.html"
1063 if source_file.suffix == ".html":
1064 # Copy HTML file content directly
1065 content = source_file.read_text(encoding="utf-8")
1066 self.build_page(
1067 "base.html",
1068 output_path,
1069 self._humanize_title(directory.name),
1070 f"{self._humanize_title(directory.name)} Documentation",
1071 output_path,
1072 content=content,
1073 )
1074 else:
1075 # Process markdown file
1076 self.build_markdown_page(
1077 str(source_file),
1078 output_path,
1079 title=self._humanize_title(directory.name),
1080 )
1081 except Exception as e:
1082 print(f"⚠️ Failed to generate index for {directory}: {e}")
1084 def build_license_page(
1085 self,
1086 source_file: str = "LICENSE",
1087 output_file: str = "license.html",
1088 title: str = "License",
1089 description: str = "License",
1090 ) -> None:
1091 """Build license page from LICENSE file."""
1092 license_path = Path(source_file)
1093 if not license_path.exists():
1094 print(f"⚠️ License file not found: {source_file}, skipping license page")
1095 return
1097 try:
1098 with open(license_path, encoding="utf-8") as f:
1099 license_content = f.read()
1101 # Create license page with heading
1102 html_content = f"""
1103 <h1>License Information</h1>
1104 <div class="license-content">
1105 <pre>{license_content}</pre>
1106 </div>
1107 """
1109 self.build_page(
1110 "base.html",
1111 output_file,
1112 title,
1113 description,
1114 output_file,
1115 content=html_content,
1116 )
1117 except Exception as e:
1118 print(f"⚠️ Failed to build license page: {e}")