Coverage for website/builder/core.py: 89%
400 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:03 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:03 +0000
1"""
2Core Website Builder - Main Orchestration and Lifecycle Management.
4This module implements the main WebsiteBuilder class that orchestrates
5all build operations and manages the overall build lifecycle.
6"""
8import json
9import re
10import subprocess
11from pathlib import Path
13from .assets import AssetManager
14from .markdown import MarkdownProcessor
15from .templates import TemplateProcessor
18class WebsiteBuilder:
19 """Builds the QDrant Loader documentation website from templates."""
21 def __init__(
22 self, templates_dir: str = "website/templates", output_dir: str = "site"
23 ):
24 """Initialize the website builder."""
25 self.templates_dir = Path(templates_dir)
26 self.output_dir = Path(output_dir)
27 self.base_url = ""
28 # Cached docs navigation data (built once per run)
29 self.docs_nav_data: dict | None = None
31 # Initialize component processors
32 self.template_processor = TemplateProcessor(templates_dir)
33 self.markdown_processor = MarkdownProcessor()
34 self.asset_manager = AssetManager(output_dir)
36 # Delegate core operations to specialized processors
37 def load_template(self, template_name: str) -> str:
38 """Load a template file."""
39 return self.template_processor.load_template(template_name)
41 def replace_placeholders(self, content: str, replacements: dict[str, str]) -> str:
42 """Replace placeholders in content with actual values."""
43 return self.template_processor.replace_placeholders(content, replacements)
45 def markdown_to_html(
46 self, markdown_content: str, source_file: str = "", output_file: str = ""
47 ) -> str:
48 """Convert markdown to HTML with Bootstrap styling."""
49 return self.markdown_processor.markdown_to_html(
50 markdown_content, source_file, output_file
51 )
53 def copy_assets(self) -> None:
54 """Copy all website assets to output directory."""
55 return self.asset_manager.copy_assets()
57 def extract_title_from_markdown(self, markdown_content: str) -> str:
58 """Extract title from markdown content."""
59 return self.markdown_processor.extract_title_from_markdown(markdown_content)
61 # Additional markdown processing methods
62 def basic_markdown_to_html(self, markdown_content: str) -> str:
63 """Basic markdown to HTML conversion."""
64 return self.markdown_processor.basic_markdown_to_html(markdown_content)
66 def convert_markdown_links_to_html(
67 self, markdown_content: str, source_file: str = "", target_dir: str = ""
68 ) -> str:
69 """Convert markdown links to HTML format."""
70 return self.markdown_processor.convert_markdown_links_to_html(
71 markdown_content, source_file, target_dir
72 )
74 def add_bootstrap_classes(self, html_content: str) -> str:
75 """Add Bootstrap classes to HTML elements."""
76 return self.markdown_processor.add_bootstrap_classes(html_content)
78 def render_toc(self, html_content: str) -> str:
79 """Generate table of contents from HTML headings."""
80 return self.markdown_processor.render_toc(html_content)
82 # Additional asset management methods
83 def copy_static_files(self, static_files: list[str]) -> None:
84 """Copy multiple static files."""
85 return self.asset_manager.copy_static_files(static_files)
87 def get_git_timestamp(self, source_path: str) -> str:
88 """Get the last modified timestamp from Git."""
89 try:
90 result = subprocess.run(
91 ["git", "log", "-1", "--format=%cd", "--date=iso-strict", source_path],
92 capture_output=True,
93 text=True,
94 cwd=".",
95 )
96 if result.returncode == 0 and result.stdout.strip():
97 return result.stdout.strip()
98 except (subprocess.CalledProcessError, FileNotFoundError):
99 pass
100 return ""
102 def _humanize_title(self, name: str) -> str:
103 """Convert filename to human-readable title."""
104 # Remove file extension and common prefixes
105 title = (
106 name.replace(".md", "")
107 .replace("README", "")
108 .replace("_", " ")
109 .replace("-", " ")
110 )
112 # Handle common patterns
113 title_mappings = {
114 "cli reference": "CLI Reference",
115 "api": "API",
116 "faq": "FAQ",
117 "toc": "Table of Contents",
118 "readme": "Overview",
119 }
121 title_lower = title.lower().strip()
122 if title_lower in title_mappings:
123 return title_mappings[title_lower]
125 # Capitalize words
126 return " ".join(word.capitalize() for word in title.split())
128 def generate_project_info(self, **kwargs) -> dict:
129 """Generate project information for templates."""
130 project_info = {
131 "name": "QDrant Loader",
132 "version": "0.4.0b1",
133 "description": "Enterprise-ready vector database toolkit",
134 "github_url": "https://github.com/martin-papy/qdrant-loader",
135 }
137 # Override with any provided kwargs
138 project_info.update(kwargs)
140 # Try to load from pyproject.toml
141 try:
142 import tomli
144 with open("pyproject.toml", "rb") as f:
145 pyproject = tomli.load(f)
146 project_section = pyproject.get("project", {})
147 project_info.update(
148 {
149 "name": project_section.get("name", project_info["name"]),
150 "version": project_section.get(
151 "version", project_info["version"]
152 ),
153 "description": project_section.get(
154 "description", project_info["description"]
155 ),
156 }
157 )
158 # Normalize workspace naming to product name
159 if isinstance(project_info.get("name"), str) and project_info[
160 "name"
161 ].endswith("-workspace"):
162 project_info["name"] = "QDrant Loader"
164 # Try to get homepage/repository from pyproject urls
165 urls = (
166 project_section.get("urls", {})
167 if isinstance(project_section, dict)
168 else {}
169 )
170 homepage = urls.get("Homepage")
171 if (
172 homepage
173 and not getattr(self, "base_url_user_set", False)
174 and not self.base_url
175 ):
176 # Set base_url from pyproject if not provided externally
177 self.base_url = homepage.rstrip("/")
178 repo_url = urls.get("Repository") or urls.get("Source")
179 if repo_url:
180 project_info["github_url"] = repo_url
181 except Exception:
182 # Ignore malformed project section entries
183 pass
185 # Try to get git information
186 try:
187 import subprocess
189 # Get git commit hash
190 result = subprocess.run(
191 ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True
192 )
193 project_info["commit_hash"] = result.stdout.strip()
195 # Get git commit date
196 result = subprocess.run(
197 ["git", "log", "-1", "--format=%ci"],
198 capture_output=True,
199 text=True,
200 check=True,
201 )
202 project_info["commit_date"] = result.stdout.strip()
204 except (subprocess.CalledProcessError, FileNotFoundError):
205 # Git not available or not a git repository
206 pass
208 # Add build metadata
209 from datetime import datetime
211 commit_hash = project_info.get("commit_hash", "")
212 project_info["commit"] = {
213 "hash": commit_hash,
214 "short": commit_hash[:7] if isinstance(commit_hash, str) else "",
215 "date": project_info.get("commit_date", ""),
216 }
217 project_info["build"] = {
218 "timestamp": datetime.utcnow().isoformat(timespec="seconds") + "Z"
219 }
221 # Write project info JSON file
222 project_info_path = self.output_dir / "project-info.json"
223 project_info_path.parent.mkdir(parents=True, exist_ok=True)
224 with open(project_info_path, "w", encoding="utf-8") as f:
225 json.dump(project_info, f, indent=2)
227 return project_info
229 def build_page(
230 self,
231 template_name: str,
232 output_filename: str,
233 title: str,
234 description: str,
235 canonical_path: str,
236 content: str = "",
237 **extra_replacements,
238 ) -> None:
239 """Build a single page from template."""
240 template_content = self.load_template(template_name)
242 # Load a content template if available when no explicit content is given.
243 # For pages where output and canonical differ, missing content should raise.
244 # For pages where they are the same (e.g., index.html), load content if
245 # the template exists, otherwise fall back to empty content.
246 if not content:
247 try:
248 content = self.load_template(output_filename)
249 except FileNotFoundError:
250 if output_filename != canonical_path:
251 # Maintain behavior for explicit content templates
252 raise
253 # Otherwise, leave content empty
255 project_info = self.generate_project_info()
257 # Calculate base URL for relative paths
258 if canonical_path.count("/") > 0:
259 base_url = "../" * canonical_path.count("/")
260 else:
261 # Normalize root base URL
262 if self.base_url:
263 base_url = self.base_url.rstrip("/") + "/"
264 else:
265 base_url = "./"
267 # Merge extra replacements ensuring defaults for optional placeholders
268 extras = dict(extra_replacements)
269 extras.setdefault("additional_head", "")
270 extras.setdefault("additional_scripts", "")
272 replacements = {
273 "page_title": title,
274 "page_description": description,
275 "content": content,
276 "base_url": base_url,
277 "canonical_url": (
278 self.base_url.rstrip("/") + "/" + canonical_path
279 if self.base_url
280 else canonical_path
281 ),
282 "author": project_info.get("name", "QDrant Loader"),
283 "version": project_info.get("version", "0.4.0b1"),
284 "project_name": project_info["name"],
285 "project_version": project_info["version"],
286 "project_description": project_info["description"],
287 **extras,
288 }
290 final_content = self.replace_placeholders(template_content, replacements)
292 output_path = self.output_dir / output_filename
293 output_path.parent.mkdir(parents=True, exist_ok=True)
295 with open(output_path, "w", encoding="utf-8") as f:
296 f.write(final_content)
298 print(f"📄 Built {output_filename}")
300 def build_site(
301 self,
302 coverage_artifacts_dir: str | None = None,
303 test_results_dir: str | None = None,
304 ) -> None:
305 """Build the complete website."""
306 print("🏗️ Building QDrant Loader website...")
308 # Create output directory
309 self.output_dir.mkdir(parents=True, exist_ok=True)
311 # Copy assets first
312 self.copy_assets()
314 # Generate project info
315 self.generate_project_info()
317 # Build main pages
318 self.build_page(
319 "base.html",
320 "index.html",
321 "Home",
322 "Enterprise-ready vector database toolkit for building searchable knowledge bases from multiple data sources including Confluence, Jira, and local files.",
323 "index.html",
324 )
326 # Build a friendly 404 page
327 try:
328 self.build_page(
329 "base.html",
330 "404.html",
331 "Page Not Found",
332 "The page you are looking for does not exist.",
333 "404.html",
334 content=self.load_template("404.html"),
335 )
336 except Exception as e:
337 print(f"⚠️ Failed to build 404 page: {e}")
339 # Build docs structure and pages
340 self.build_docs_nav()
341 _docs_structure = self.build_docs_structure()
343 # Create docs directory and index
344 docs_output_dir = self.output_dir / "docs"
345 docs_output_dir.mkdir(exist_ok=True)
347 # Build docs index page using dedicated template content
348 self.build_page(
349 "base.html",
350 "docs/index.html",
351 "Documentation",
352 "QDrant Loader Documentation",
353 "docs/index.html",
354 content=self.load_template("docs-index.html"),
355 )
357 # Bridge root docs from repository top-level files
358 try:
359 if Path("README.md").exists():
360 self.build_markdown_page("README.md", "docs/README.html")
361 if Path("RELEASE_NOTES.md").exists():
362 self.build_markdown_page("RELEASE_NOTES.md", "docs/RELEASE_NOTES.html")
363 if Path("CONTRIBUTING.md").exists():
364 self.build_markdown_page("CONTRIBUTING.md", "docs/CONTRIBUTING.html")
365 # License (plain text) rendered via helper
366 if Path("LICENSE").exists():
367 self.build_license_page(
368 "LICENSE", "docs/LICENSE.html", "License", "License"
369 )
370 # Privacy policy page from template
371 try:
372 self.build_page(
373 "base.html",
374 "privacy-policy.html",
375 "Privacy Policy",
376 "Privacy policy for QDrant Loader",
377 "privacy-policy.html",
378 content=self.load_template("privacy-policy.html"),
379 )
380 except FileNotFoundError:
381 pass
382 except Exception as e:
383 print(f"⚠️ Failed to build root docs pages: {e}")
385 # Build package README documentation into docs/packages
386 try:
387 self.build_package_docs()
388 except Exception as e:
389 print(f"⚠️ Failed to build package docs: {e}")
391 # Always create coverage directory and ensure index.html exists
392 coverage_output_dir = self.output_dir / "coverage"
393 coverage_output_dir.mkdir(exist_ok=True)
395 # Build coverage reports if provided
396 if coverage_artifacts_dir:
397 _coverage_structure = self.build_coverage_structure(coverage_artifacts_dir)
399 # Copy coverage artifacts
400 coverage_path = Path(coverage_artifacts_dir)
401 if coverage_path.exists():
402 import shutil
404 for item in coverage_path.iterdir():
405 if item.is_file():
406 shutil.copy2(item, coverage_output_dir / item.name)
407 elif item.is_dir():
408 shutil.copytree(
409 item, coverage_output_dir / item.name, dirs_exist_ok=True
410 )
411 else:
412 # Create styled placeholder coverage index if no artifacts provided
413 placeholder_html = (
414 '<section class="py-5"><div class="container">'
415 '<h1 class="display-5 fw-bold text-primary"><i class="bi bi-graph-up me-2"></i>Coverage Reports</h1>'
416 '<div class="alert alert-info mt-4">No coverage artifacts available.</div>'
417 "</div></section>"
418 )
419 self.build_page(
420 "base.html",
421 "coverage/index.html",
422 "Coverage Reports",
423 "Test coverage analysis",
424 "coverage/index.html",
425 content=placeholder_html,
426 )
428 # Generate directory indexes
429 self.generate_directory_indexes()
431 # Generate SEO files
432 # Build a dynamic sitemap including all HTML pages
433 try:
434 self.generate_dynamic_sitemap()
435 except Exception as e:
436 print(f"⚠️ Failed to generate dynamic sitemap: {e}")
438 # Always (re)write robots.txt pointing to the sitemap
439 try:
440 self.generate_robots_file()
441 except Exception as e:
442 print(f"⚠️ Failed to generate robots.txt: {e}")
444 # Create .nojekyll file for GitHub Pages
445 nojekyll_path = self.output_dir / ".nojekyll"
446 nojekyll_path.touch()
447 print("📄 Created .nojekyll file")
449 print("✅ Website build completed successfully!")
451 def build_docs_nav(self) -> dict:
452 """Build documentation navigation structure."""
453 # Simplified navigation building
454 docs_dir = Path("docs")
455 if not docs_dir.exists():
456 return {}
458 nav_data = {"title": "Documentation", "children": []}
460 for item in sorted(docs_dir.iterdir()):
461 if item.is_file() and item.suffix == ".md":
462 nav_data["children"].append(
463 {
464 "title": self._humanize_title(item.stem),
465 "url": f"docs/{item.name}",
466 }
467 )
468 elif item.is_dir():
469 nav_data["children"].append(
470 {
471 "title": self._humanize_title(item.name),
472 "url": f"docs/{item.name}/",
473 }
474 )
476 self.docs_nav_data = nav_data
477 return nav_data
479 def generate_seo_files(self) -> None:
480 """Generate SEO files like sitemap.xml and robots.txt."""
481 from datetime import datetime
483 # Determine base site URL
484 site_base = (
485 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"
486 )
488 # Get current date for lastmod
489 current_date = datetime.now().strftime("%Y-%m-%d")
491 # Generate simple sitemap.xml
492 sitemap_content = f"""<?xml version="1.0" encoding="UTF-8"?>
493<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
494 <url>
495 <loc>{site_base}/</loc>
496 <lastmod>{current_date}</lastmod>
497 <changefreq>weekly</changefreq>
498 <priority>1.0</priority>
499 </url>
500 <url>
501 <loc>{site_base}/docs/</loc>
502 <lastmod>{current_date}</lastmod>
503 <changefreq>weekly</changefreq>
504 <priority>0.8</priority>
505 </url>
506</urlset>"""
508 sitemap_path = self.output_dir / "sitemap.xml"
509 with open(sitemap_path, "w", encoding="utf-8") as f:
510 f.write(sitemap_content)
511 print("📄 Generated sitemap.xml")
513 # Generate simple robots.txt
514 robots_content = f"""User-agent: *
515Allow: /
517Sitemap: {self.base_url.rstrip('/') if self.base_url else 'https://example.com'}/sitemap.xml
518"""
520 robots_path = self.output_dir / "robots.txt"
521 with open(robots_path, "w", encoding="utf-8") as f:
522 f.write(robots_content.replace("https://example.com", site_base))
523 print("📄 Generated robots.txt")
525 def generate_robots_file(self) -> None:
526 """Generate only robots.txt referencing the sitemap URL."""
527 site_base = (
528 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"
529 )
530 robots_content = f"""User-agent: *
531Allow: /
533Sitemap: {site_base}/sitemap.xml
534"""
535 robots_path = self.output_dir / "robots.txt"
536 with open(robots_path, "w", encoding="utf-8") as f:
537 f.write(robots_content)
538 print("📄 Generated robots.txt")
540 def generate_dynamic_sitemap(
541 self, date: str = None, pages: list[str] = None
542 ) -> str:
543 """Generate dynamic sitemap with custom pages."""
544 from datetime import datetime
546 base_url = (
547 self.base_url.rstrip("/") if self.base_url else "https://qdrant-loader.net"
548 )
550 # Auto-discover pages if not provided
551 if pages is None:
552 pages = []
553 # Find HTML files in site directory
554 if self.output_dir.exists():
555 for html_file in self.output_dir.rglob("*.html"):
556 rel_path = str(html_file.relative_to(self.output_dir))
557 pages.append(rel_path)
559 # Use provided date or current date
560 if date is None:
561 date = datetime.now().strftime("%Y-%m-%d")
563 sitemap_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
564 sitemap_content += (
565 '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
566 )
568 for page in pages:
569 sitemap_content += " <url>\n"
570 sitemap_content += f" <loc>{base_url}/{page}</loc>\n"
571 sitemap_content += f" <lastmod>{date}</lastmod>\n"
572 sitemap_content += " <changefreq>weekly</changefreq>\n"
573 sitemap_content += " <priority>0.8</priority>\n"
574 sitemap_content += " </url>\n"
576 sitemap_content += "</urlset>"
578 # Write sitemap to file
579 sitemap_path = self.output_dir / "sitemap.xml"
580 sitemap_path.parent.mkdir(parents=True, exist_ok=True)
581 with open(sitemap_path, "w", encoding="utf-8") as f:
582 f.write(sitemap_content)
583 print(f"📄 Generated dynamic sitemap.xml with {len(pages)} pages")
585 return sitemap_content
587 def build_markdown_page(
588 self,
589 markdown_file: str,
590 output_path: str,
591 title: str = "",
592 breadcrumb: str = "",
593 **kwargs,
594 ) -> None:
595 """Build a page from markdown file."""
596 markdown_path = Path(markdown_file)
597 if not markdown_path.exists():
598 print(
599 f"⚠️ Markdown file not found: {markdown_file}, skipping page generation"
600 )
601 return
603 try:
604 with open(markdown_path, encoding="utf-8") as f:
605 markdown_content = f.read()
606 except Exception as e:
607 print(f"⚠️ Failed to read markdown file {markdown_file}: {e}")
608 return
610 # Extract title if not provided
611 if not title:
612 title = self.extract_title_from_markdown(markdown_content)
614 # Normalize links in markdown before conversion
615 markdown_content = self.markdown_processor.convert_markdown_links_to_html(
616 markdown_content, str(markdown_path)
617 )
619 # Convert markdown to HTML
620 html_content = self.markdown_to_html(
621 markdown_content, str(markdown_path), output_path
622 )
623 # Normalize any remaining HTML hrefs
624 html_content = self.markdown_processor.convert_markdown_links_to_html(
625 html_content, str(markdown_path)
626 )
628 # Build a Table of Contents and wrap in docs layout
629 toc_html = self.render_toc(html_content)
630 if toc_html:
631 toc_html = self.add_bootstrap_classes(toc_html)
633 wrapped_content = f"""
634<section class=\"py-5\">
635 <div class=\"container\">
636 <div class=\"row\">
637 <aside class=\"col-lg-3 d-none d-lg-block\">
638 <div class=\"position-sticky\" style=\"top: 6rem;\">
639 {toc_html or '<div class=\"text-muted small\">No sections</div>'}
640 </div>
641 </aside>
642 <div class=\"col-lg-9\">
643 {html_content}
644 </div>
645 </div>
646 </div>
647</section>
648"""
650 # Build the page
651 self.build_page(
652 "base.html",
653 output_path,
654 title,
655 f"{title} - QDrant Loader",
656 output_path,
657 content=wrapped_content,
658 breadcrumb=breadcrumb,
659 **kwargs,
660 )
662 def build_docs_structure(self) -> dict:
663 """Build documentation directory structure."""
664 docs_dir = Path("docs")
665 structure = {"title": "Documentation", "children": []}
667 # Create docs output directory
668 docs_output_dir = self.output_dir / "docs"
669 docs_output_dir.mkdir(parents=True, exist_ok=True)
671 if not docs_dir.exists():
672 return structure
674 # Process all markdown files in docs
675 for item in sorted(docs_dir.rglob("*.md")):
676 relative_path = str(item.relative_to(docs_dir))
677 output_path = relative_path.replace(".md", ".html")
679 structure["children"].append(
680 {
681 "title": self._humanize_title(item.stem),
682 "path": relative_path,
683 "url": f"docs/{output_path}",
684 }
685 )
687 # Build the page from markdown
688 try:
689 self.build_markdown_page(
690 str(item),
691 f"docs/{output_path}",
692 title=self._humanize_title(item.stem),
693 )
694 except Exception as e:
695 print(f"⚠️ Failed to build docs page {item}: {e}")
697 return structure
699 def build_coverage_structure(self, coverage_dir: str | None = None) -> dict:
700 """Build coverage report structure."""
701 # Always create coverage output directory
702 coverage_output_dir = self.output_dir / "coverage"
703 coverage_output_dir.mkdir(parents=True, exist_ok=True)
705 if not coverage_dir:
706 return {"coverage_reports": []}
708 coverage_path = Path(coverage_dir)
709 if not coverage_path.exists():
710 return {"coverage_reports": []}
712 # Copy all coverage files with proper naming
713 import shutil
715 for item in coverage_path.iterdir():
716 # Map directory names to cleaner package names
717 dest_name = item.name
718 if item.is_dir():
719 if "htmlcov-loader" in item.name:
720 dest_name = "loader"
721 elif "htmlcov-mcp" in item.name:
722 dest_name = "mcp"
723 elif "htmlcov-website" in item.name:
724 dest_name = "website"
725 elif (
726 "htmlcov-core" in item.name
727 or "htmlcov-qdrant-loader-core" in item.name
728 ):
729 dest_name = "core"
730 elif "htmlcov" in item.name:
731 dest_name = item.name.replace("htmlcov-", "").replace(
732 "htmlcov_", ""
733 )
735 dest_path = coverage_output_dir / dest_name
736 try:
737 if item.is_file():
738 shutil.copy2(item, dest_path)
739 elif item.is_dir():
740 if dest_path.exists():
741 shutil.rmtree(dest_path)
742 shutil.copytree(item, dest_path)
743 print(f"📁 Copied coverage: {item.name} -> {dest_name}")
744 except Exception as e:
745 print(f"⚠️ Failed to copy coverage file {item}: {e}")
747 # Build reports list using the renamed directories
748 reports = []
749 for subdir in coverage_output_dir.iterdir():
750 if subdir.is_dir():
751 index_file = subdir / "index.html"
752 if index_file.exists():
753 reports.append(
754 {
755 "name": subdir.name,
756 "path": f"{subdir.name}/index.html",
757 "url": f"coverage/{subdir.name}/index.html",
758 }
759 )
761 # Create main coverage index page using site template when reports exist
762 if reports:
763 # Build coverage index with Bootstrap styling
764 index_content = """
765<section class=\"py-5\">
766 <div class=\"container\">
767 <h1 class=\"display-5 fw-bold text-primary mb-4\"><i class=\"bi bi-graph-up me-2\"></i>Coverage Reports</h1>
768 <div class=\"row g-4\">"""
770 for report in reports:
771 if report["name"] == "loader":
772 index_content += """
773 <div class="col-lg-6">
774 <div class="card">
775 <div class="card-header">
776 <h4>QDrant Loader Core</h4>
777 <span id="loader-test-indicator" class="badge">Loading...</span>
778 </div>
779 <div class="card-body">
780 <div id="loader-coverage">Loader coverage data</div>
781 <a href="loader/" class="btn btn-primary">View Detailed Report</a>
782 </div>
783 </div>
784 </div>"""
785 elif report["name"] == "mcp":
786 index_content += """
787 <div class="col-lg-6">
788 <div class="card">
789 <div class="card-header">
790 <h4>MCP Server</h4>
791 <span id="mcp-test-indicator" class="badge">Loading...</span>
792 </div>
793 <div class="card-body">
794 <div id="mcp-coverage">MCP Server coverage data</div>
795 <a href="mcp/" class="btn btn-success">View Detailed Report</a>
796 </div>
797 </div>
798 </div>"""
799 elif report["name"] == "website":
800 index_content += """
801 <div class="col-lg-6">
802 <div class="card">
803 <div class="card-header">
804 <h4>Website</h4>
805 <span id="website-test-indicator" class="badge">Loading...</span>
806 </div>
807 <div class="card-body">
808 <div id="website-coverage">Website coverage data</div>
809 <a href="website/" class="btn btn-info">View Detailed Report</a>
810 </div>
811 </div>
812 </div>"""
813 elif report["name"] == "core":
814 index_content += """
815 <div class="col-lg-6">
816 <div class="card">
817 <div class="card-header">
818 <h4>Core Library</h4>
819 <span id="core-test-indicator" class="badge">Loading...</span>
820 </div>
821 <div class="card-body">
822 <div id="core-coverage">Core library coverage data</div>
823 <a href="core/" class="btn btn-warning">View Detailed Report</a>
824 </div>
825 </div>
826 </div>"""
828 index_content += """
829 </div>
830 </div>
831</section>
833<script>
834// Compute and render coverage summary from status.json
835function coverageSummary(data){
836 try{
837 let total = 0, missing = 0;
838 if (data && data.files){
839 for (const k in data.files){
840 const f = data.files[k];
841 const nums = f && f.index && f.index.nums ? f.index.nums : (f.index && f.index.numbers ? f.index.numbers : null);
842 if (nums && typeof nums.n_statements === 'number'){
843 total += (nums.n_statements||0);
844 missing += (nums.n_missing||0);
845 }
846 }
847 }
848 // Fallback if a totals object exists
849 if (total === 0 && data && data.totals){
850 if (typeof data.totals.n_statements === 'number'){
851 total = data.totals.n_statements||0;
852 missing = data.totals.n_missing||0;
853 } else if (typeof data.totals.covered_lines === 'number' && typeof data.totals.num_statements === 'number'){
854 total = data.totals.num_statements;
855 missing = total - data.totals.covered_lines;
856 }
857 }
858 if (total > 0){
859 const covered = Math.max(0, total - missing);
860 const pct = Math.round((covered/total)*1000)/10; // one decimal
861 return {pct, covered, total};
862 }
863 } catch(e){}
864 return null;
865}
867function renderCoverage(id, summary){
868 const el = document.getElementById(id);
869 if (!el) return;
870 if (!summary){ el.textContent = 'Loaded'; return; }
871 const {pct, covered, total} = summary;
872 el.innerHTML = `
873 <div class="d-flex align-items-center">
874 <div class="progress flex-grow-1 me-2" style="height: 10px;">
875 <div class="progress-bar bg-success" role="progressbar" style="width: ${pct}%" aria-valuenow="${pct}" aria-valuemin="0" aria-valuemax="100"></div>
876 </div>
877 <span class="small fw-semibold">${pct}% (${covered}/${total})</span>
878 </div>`;
879}
881fetch('loader/status.json').then(r=>r.json()).then(d=>renderCoverage('loader-coverage', coverageSummary(d))).catch(()=>{});
882fetch('mcp/status.json').then(r=>r.json()).then(d=>renderCoverage('mcp-coverage', coverageSummary(d))).catch(()=>{});
883fetch('website/status.json').then(r=>r.json()).then(d=>renderCoverage('website-coverage', coverageSummary(d))).catch(()=>{});
884fetch('core/status.json').then(r=>r.json()).then(d=>renderCoverage('core-coverage', coverageSummary(d))).catch(()=>{});
885</script>
886"""
887 # Render through site template for full styling/navigation
888 self.build_page(
889 "base.html",
890 "coverage/index.html",
891 "Coverage Reports",
892 "Test coverage analysis",
893 "coverage/index.html",
894 content=index_content,
895 )
896 print("📄 Generated coverage index.html")
898 return {"coverage_reports": reports}
900 def build_package_docs(self) -> None:
901 """Build documentation pages from package README files into docs/packages.
903 Maps package README.md files to site docs under:
904 - packages/qdrant-loader -> docs/packages/qdrant-loader/README.html
905 - packages/qdrant-loader-mcp-server -> docs/packages/mcp-server/README.html
906 - packages/qdrant-loader-core -> docs/packages/core/README.html
907 """
908 package_mappings: list[tuple[str, str, str]] = [
909 ("qdrant-loader", "qdrant-loader", "QDrant Loader"),
910 ("qdrant-loader-mcp-server", "mcp-server", "MCP Server"),
911 ("qdrant-loader-core", "core", "Core Library"),
912 ]
914 for pkg_name, alias, display_name in package_mappings:
915 readme_path = Path("packages") / pkg_name / "README.md"
916 if not readme_path.exists():
917 continue
919 try:
920 with open(readme_path, encoding="utf-8") as f:
921 markdown_content = f.read()
923 # Normalize links in markdown before conversion
924 normalized_md = self.markdown_processor.convert_markdown_links_to_html(
925 markdown_content
926 )
928 html_content = self.markdown_to_html(
929 normalized_md,
930 str(readme_path),
931 f"docs/packages/{alias}/README.html",
932 )
933 # Normalize any remaining HTML hrefs
934 html_content = self.markdown_processor.convert_markdown_links_to_html(
935 html_content, str(readme_path), f"docs/packages/{alias}/README.html"
936 )
938 # Final hardening for package README links: collapse relative ../../docs to /docs
939 try:
940 html_content = re.sub(
941 r'href="(?:\.{2}/)+docs/', 'href="/docs/', html_content
942 )
943 # Convert README root files and .md links under docs to .html
944 html_content = re.sub(
945 r'href="(?:\.{2}/)+CONTRIBUTING\.md"',
946 'href="/docs/CONTRIBUTING.html"',
947 html_content,
948 )
949 html_content = re.sub(
950 r'href="(?:\.{2}/)+LICENSE(\.html)?"',
951 'href="/docs/LICENSE.html"',
952 html_content,
953 )
954 html_content = re.sub(
955 r'href="(?:\.{2}/)+docs/([^"#]+)\.md(#[^"]*)?"',
956 r'href="/docs/\1.html\2"',
957 html_content,
958 )
959 except Exception:
960 pass
962 # Build a Table of Contents and wrap with standard docs layout for consistent look
963 toc_html = self.render_toc(html_content)
964 if toc_html:
965 toc_html = self.add_bootstrap_classes(toc_html)
967 wrapped_content = f"""
968<section class=\"py-5\">
969 <div class=\"container\">
970 <div class=\"row\">
971 <aside class=\"col-lg-3 d-none d-lg-block\">
972 <div class=\"position-sticky\" style=\"top: 6rem;\">
973 {toc_html or '<div class=\"text-muted small\">No sections</div>'}
974 </div>
975 </aside>
976 <div class=\"col-lg-9\">
977 {html_content}
978 </div>
979 </div>
980 </div>
981</section>
982"""
984 output_path = f"docs/packages/{alias}/README.html"
985 self.build_page(
986 "base.html",
987 output_path,
988 f"{display_name} - README",
989 f"{display_name} Documentation",
990 output_path,
991 content=wrapped_content,
992 )
993 except Exception as e:
994 print(f"⚠️ Failed to build docs for package {pkg_name}: {e}")
996 def generate_directory_indexes(self) -> None:
997 """Generate index files for directories."""
998 # Look in both source docs and output site docs directories
999 source_docs_dir = Path("docs")
1000 site_docs_dir = self.output_dir / "docs"
1002 # Process directories in both locations
1003 for docs_dir in [source_docs_dir, site_docs_dir]:
1004 if not docs_dir.exists():
1005 continue
1007 for directory in docs_dir.rglob("*"):
1008 if directory.is_dir():
1009 # Look for README or index files in various formats
1010 readme_md = directory / "README.md"
1011 readme_html = directory / "README.html"
1012 index_md = directory / "index.md"
1013 index_html = directory / "index.html"
1015 # Determine source file
1016 source_file = None
1017 if readme_md.exists():
1018 source_file = readme_md
1019 elif index_md.exists():
1020 source_file = index_md
1021 elif readme_html.exists():
1022 source_file = readme_html
1023 elif index_html.exists():
1024 source_file = index_html
1026 if source_file:
1027 try:
1028 if docs_dir == site_docs_dir:
1029 # For files in site directory, create/overwrite index.html directly there
1030 index_file = directory / "index.html"
1031 if source_file.suffix == ".html":
1032 # Copy HTML file content directly (always overwrite to avoid stale links)
1033 content = source_file.read_text()
1034 index_file.write_text(content)
1035 print(
1036 f"📄 Generated index.html from {source_file.name}"
1037 )
1038 else:
1039 # For source files, process through normal build pipeline
1040 relative_dir = directory.relative_to(docs_dir)
1041 output_path = f"docs/{relative_dir}/index.html"
1043 if source_file.suffix == ".html":
1044 # Copy HTML file content directly
1045 content = source_file.read_text()
1046 self.build_page(
1047 "base.html",
1048 output_path,
1049 self._humanize_title(directory.name),
1050 f"{self._humanize_title(directory.name)} Documentation",
1051 output_path,
1052 content=content,
1053 )
1054 else:
1055 # Process markdown file
1056 self.build_markdown_page(
1057 str(source_file),
1058 output_path,
1059 title=self._humanize_title(directory.name),
1060 )
1061 except Exception as e:
1062 print(f"⚠️ Failed to generate index for {directory}: {e}")
1064 def build_license_page(
1065 self,
1066 source_file: str = "LICENSE",
1067 output_file: str = "license.html",
1068 title: str = "License",
1069 description: str = "License",
1070 ) -> None:
1071 """Build license page from LICENSE file."""
1072 license_path = Path(source_file)
1073 if not license_path.exists():
1074 print(f"⚠️ License file not found: {source_file}, skipping license page")
1075 return
1077 try:
1078 with open(license_path, encoding="utf-8") as f:
1079 license_content = f.read()
1081 # Create license page with heading
1082 html_content = f"""
1083 <h1>License Information</h1>
1084 <div class="license-content">
1085 <pre>{license_content}</pre>
1086 </div>
1087 """
1089 self.build_page(
1090 "base.html",
1091 output_file,
1092 title,
1093 description,
1094 output_file,
1095 content=html_content,
1096 )
1097 except Exception as e:
1098 print(f"⚠️ Failed to build license page: {e}")