Coverage for website/builder/markdown.py: 89%

274 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:03 +0000

1""" 

2Markdown Processing - Markdown-to-HTML Conversion. 

3 

4This module handles markdown processing, HTML conversion, 

5and content formatting for the website builder. 

6""" 

7 

8import re 

9 

10 

11class MarkdownProcessor: 

12 """Handles markdown processing and HTML conversion.""" 

13 

14 def markdown_to_html( 

15 self, markdown_content: str, source_file: str = "", output_file: str = "" 

16 ) -> str: 

17 """Convert markdown to HTML with Bootstrap styling.""" 

18 # Normalize empty/whitespace-only content consistently across code paths 

19 if not markdown_content.strip(): 

20 return "" 

21 try: 

22 import markdown 

23 

24 md = markdown.Markdown( 

25 extensions=[ 

26 "fenced_code", 

27 "codehilite", 

28 "tables", 

29 "toc", 

30 "attr_list", 

31 "def_list", 

32 "footnotes", 

33 "md_in_html", 

34 "sane_lists", 

35 ], 

36 extension_configs={ 

37 "codehilite": { 

38 "css_class": "codehilite", 

39 "use_pygments": False, # Use simple highlighting without Pygments 

40 "guess_lang": True, 

41 } 

42 }, 

43 ) 

44 html = md.convert(markdown_content) 

45 

46 # Fix any remaining malformed code blocks 

47 html = self.fix_malformed_code_blocks(html) 

48 

49 # Add Bootstrap classes 

50 html = self.add_bootstrap_classes(html) 

51 

52 # Ensure heading IDs 

53 html = self.ensure_heading_ids(html) 

54 

55 return html 

56 

57 except ImportError: 

58 # Fallback to basic conversion 

59 html = self._basic_markdown_to_html_no_regex(markdown_content) 

60 # Apply Bootstrap classes to fallback HTML too 

61 html = self.add_bootstrap_classes(html) 

62 # Ensure heading IDs 

63 html = self.ensure_heading_ids(html) 

64 return html 

65 

66 def _basic_markdown_to_html_no_regex(self, markdown_content: str) -> str: 

67 """Basic markdown to HTML conversion without regex.""" 

68 content = markdown_content 

69 if not content.strip(): 

70 return "" 

71 

72 def transform_inline(text: str) -> str: 

73 # Bold (strong) and italics (em) 

74 text = re.sub( 

75 r"\*\*([^*]+)\*\*", lambda m: f"<strong>{m.group(1)}</strong>", text 

76 ) 

77 text = re.sub(r"\*([^*]+)\*", lambda m: f"<em>{m.group(1)}</em>", text) 

78 # Inline code 

79 text = re.sub(r"`([^`]+)`", lambda m: f"<code>{m.group(1)}</code>", text) 

80 # Links [text](url) 

81 text = re.sub( 

82 r"\[([^\]]+)\]\(([^)]+)\)", 

83 lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>', 

84 text, 

85 ) 

86 return text 

87 

88 lines = content.split("\n") 

89 html_lines: list[str] = [] 

90 in_code_block = False 

91 in_list = False 

92 

93 for line in lines: 

94 raw = line.rstrip("\n") 

95 if raw.startswith("```"): 

96 if in_code_block: 

97 html_lines.append("</code></pre>") 

98 in_code_block = False 

99 else: 

100 # close any open list before starting code block 

101 if in_list: 

102 html_lines.append("</ul>") 

103 in_list = False 

104 html_lines.append("<pre><code>") 

105 in_code_block = True 

106 continue 

107 

108 if in_code_block: 

109 html_lines.append(raw) 

110 continue 

111 

112 # Headings 

113 if raw.startswith("# "): 

114 if in_list: 

115 html_lines.append("</ul>") 

116 in_list = False 

117 html_lines.append(f"<h1>{transform_inline(raw[2:])}</h1>") 

118 continue 

119 if raw.startswith("## "): 

120 if in_list: 

121 html_lines.append("</ul>") 

122 in_list = False 

123 html_lines.append(f"<h2>{transform_inline(raw[3:])}</h2>") 

124 continue 

125 if raw.startswith("### "): 

126 if in_list: 

127 html_lines.append("</ul>") 

128 in_list = False 

129 html_lines.append(f"<h3>{transform_inline(raw[4:])}</h3>") 

130 continue 

131 if raw.startswith("#### "): 

132 if in_list: 

133 html_lines.append("</ul>") 

134 in_list = False 

135 html_lines.append(f"<h4>{transform_inline(raw[5:])}</h4>") 

136 continue 

137 if raw.startswith("##### "): 

138 if in_list: 

139 html_lines.append("</ul>") 

140 in_list = False 

141 html_lines.append(f"<h5>{transform_inline(raw[6:])}</h5>") 

142 continue 

143 if raw.startswith("###### "): 

144 if in_list: 

145 html_lines.append("</ul>") 

146 in_list = False 

147 html_lines.append(f"<h6>{transform_inline(raw[7:])}</h6>") 

148 continue 

149 

150 # Lists 

151 if raw.lstrip().startswith("- "): 

152 if not in_list: 

153 html_lines.append("<ul>") 

154 in_list = True 

155 item_text = raw.lstrip()[2:] 

156 html_lines.append(f"<li>{transform_inline(item_text)}</li>") 

157 continue 

158 else: 

159 if in_list and raw.strip() == "": 

160 html_lines.append("</ul>") 

161 in_list = False 

162 

163 # Paragraphs 

164 if raw.strip(): 

165 html_lines.append(f"<p>{transform_inline(raw)}</p>") 

166 

167 # Close any open list 

168 if in_list: 

169 html_lines.append("</ul>") 

170 

171 # Join and strip extraneous blank lines 

172 html = "\n".join([h for h in html_lines if h is not None]) 

173 # Apply Bootstrap classes and heading IDs 

174 return html 

175 

176 def fix_malformed_code_blocks(self, html_content: str) -> str: 

177 """Fix code blocks that weren't properly converted by markdown.""" 

178 

179 # Fix single-line code snippets that should be code blocks 

180 # Convert paragraphs with inline code containing bash commands to proper code blocks 

181 html_content = re.sub( 

182 r'<p><code class="inline-code">(bash|sh)\s*\n\s*([^<]+)</code></p>', 

183 r'<div class="code-block-wrapper"><pre class="code-block"><code class="language-\1">\2</code></pre></div>', 

184 html_content, 

185 ) 

186 

187 # Fix paragraphs with bash/shell commands (with or without language prefix) 

188 html_content = re.sub( 

189 r'<p><code class="inline-code">(?:bash\s*\n\s*)?([^<]*(?:mkdir|cd|pip|qdrant-loader|mcp-)[^<]*)</code></p>', 

190 r'<div class="code-block-wrapper"><pre class="code-block"><code class="language-bash">\1</code></pre></div>', 

191 html_content, 

192 ) 

193 

194 # Also handle cases where there's no class attribute 

195 html_content = re.sub( 

196 r"<p><code>(?:bash\s*\n\s*)?([^<]*(?:mkdir|cd|pip|qdrant-loader|mcp-)[^<]*)</code></p>", 

197 r'<div class="code-block-wrapper"><pre class="code-block"><code class="language-bash">\1</code></pre></div>', 

198 html_content, 

199 ) 

200 

201 # Clean up stray <p> tags inside code blocks 

202 html_content = re.sub( 

203 r"(<code[^>]*>.*?)</p>\s*<p>(.*?</code>)", 

204 r"\1\n\2", 

205 html_content, 

206 flags=re.DOTALL, 

207 ) 

208 

209 # Fix paragraphs that contain triple backticks (malformed code blocks) 

210 def fix_code_block(match): 

211 content = match.group(1) 

212 # Extract language if present 

213 lines = content.split("\n") 

214 first_line = lines[0].strip() 

215 if first_line.startswith("```"): 

216 language = first_line[3:].strip() 

217 code_content = "\n".join(lines[1:]) 

218 # Remove trailing ``` if present 

219 if code_content.endswith("```"): 

220 code_content = code_content[:-3].rstrip() 

221 return f'<div class="code-block-wrapper"><pre class="code-block"><code class="language-{language}">{code_content}</code></pre></div>' 

222 return match.group(0) 

223 

224 # Match paragraphs containing code blocks 

225 html_content = re.sub( 

226 r"<p>(```[^`]*```)</p>", fix_code_block, html_content, flags=re.DOTALL 

227 ) 

228 

229 # Handle multi-paragraph code blocks 

230 html_content = re.sub( 

231 r"<p>```(\w+)\s*</p>\s*<p>(.*?)</p>\s*<p>```</p>", 

232 r'<div class="code-block-wrapper"><pre class="code-block"><code class="language-\1">\2</code></pre></div>', 

233 html_content, 

234 flags=re.DOTALL, 

235 ) 

236 

237 # Handle code blocks split across multiple paragraphs 

238 html_content = re.sub( 

239 r"<p>```(\w+)?\s*(.*?)\s*```</p>", 

240 lambda m: f'<div class="code-block-wrapper"><pre class="code-block"><code class="language-{m.group(1) or ""}">{m.group(2)}</code></pre></div>', 

241 html_content, 

242 flags=re.DOTALL, 

243 ) 

244 

245 return html_content 

246 

247 def ensure_heading_ids(self, html_content: str) -> str: 

248 """Ensure all headings have IDs for anchor links.""" 

249 

250 def slugify(text: str) -> str: 

251 """Convert text to URL-safe slug.""" 

252 import re 

253 

254 slug = re.sub(r"[^\w\s-]", "", text.lower()) 

255 return re.sub(r"[-\s]+", "-", slug).strip("-") 

256 

257 def add_id(match: re.Match) -> str: 

258 """Add ID to heading if not present.""" 

259 tag = match.group(1) 

260 attrs = match.group(2) or "" 

261 content = match.group(3) 

262 

263 if "id=" not in attrs: 

264 heading_id = slugify(content) 

265 if attrs: 

266 attrs = f' id="{heading_id}" {attrs.strip()}' 

267 else: 

268 attrs = f' id="{heading_id}"' 

269 

270 return f"<{tag}{attrs}>{content}</{tag}>" 

271 

272 # Add IDs to headings that don't have them 

273 heading_pattern = r"<(h[1-6])([^>]*)>([^<]+)</h[1-6]>" 

274 return re.sub(heading_pattern, add_id, html_content) 

275 

276 def add_bootstrap_classes(self, html_content: str) -> str: 

277 """Add Bootstrap classes to HTML elements.""" 

278 

279 # Add Bootstrap header classes 

280 html_content = re.sub( 

281 r"<h1([^>]*)>", 

282 r'<h1\1 class="display-4 fw-bold text-primary mb-4">', 

283 html_content, 

284 ) 

285 html_content = re.sub( 

286 r"<h2([^>]*)>", 

287 r'<h2\1 class="h2 fw-bold text-primary mt-5 mb-3">', 

288 html_content, 

289 ) 

290 html_content = re.sub( 

291 r"<h3([^>]*)>", 

292 r'<h3\1 class="h3 fw-bold text-primary mt-5 mb-3">', 

293 html_content, 

294 ) 

295 html_content = re.sub( 

296 r"<h4([^>]*)>", r'<h4\1 class="h4 fw-bold mt-4 mb-3">', html_content 

297 ) 

298 html_content = re.sub( 

299 r"<h5([^>]*)>", r'<h5\1 class="h5 fw-bold mt-3 mb-2">', html_content 

300 ) 

301 html_content = re.sub( 

302 r"<h6([^>]*)>", r'<h6\1 class="h6 fw-semibold mt-2 mb-1">', html_content 

303 ) 

304 

305 # Add Bootstrap code block classes - clean approach 

306 # First handle codehilite divs 

307 html_content = re.sub( 

308 r'<div class="codehilite">', 

309 '<div class="code-block-wrapper">', 

310 html_content, 

311 ) 

312 

313 # Handle standalone pre blocks (not already in wrappers) 

314 html_content = re.sub( 

315 r'(?<!<div class="code-block-wrapper">)<pre>', 

316 '<div class="code-block-wrapper"><pre class="code-block">', 

317 html_content, 

318 ) 

319 

320 # Add code-block class to pre tags that don't have it 

321 html_content = re.sub( 

322 r'<pre(?![^>]*class="code-block")([^>]*)>', 

323 r'<pre class="code-block"\1>', 

324 html_content, 

325 ) 

326 

327 # Close wrapper divs only for pre blocks that we wrapped 

328 html_content = re.sub( 

329 r'(<div class="code-block-wrapper"><pre class="code-block"[^>]*>.*?)</pre>(?!</div>)', 

330 r"\1</pre></div>", 

331 html_content, 

332 flags=re.DOTALL, 

333 ) 

334 # Add Bootstrap inline code classes 

335 # First handle code blocks, then inline code 

336 html_content = re.sub( 

337 r"<code>", 

338 '<code class="inline-code">', 

339 html_content, 

340 ) 

341 # Override inline-code class for code inside pre blocks 

342 html_content = re.sub( 

343 r'(<pre[^>]*>.*?)<code class="inline-code">', 

344 r"\1<code>", 

345 html_content, 

346 flags=re.DOTALL, 

347 ) 

348 

349 # Add Bootstrap link classes 

350 html_content = re.sub( 

351 r'<a([^>]*?)href="([^"]*)"([^>]*?)>', 

352 r'<a\1href="\2"\3 class="text-decoration-none">', 

353 html_content, 

354 ) 

355 

356 # Add Bootstrap list classes 

357 html_content = re.sub( 

358 r"<ul>", '<ul class="list-group list-group-flush">', html_content 

359 ) 

360 html_content = re.sub( 

361 r"<ol>", '<ol class="list-group list-group-numbered">', html_content 

362 ) 

363 html_content = re.sub(r"<li>", '<li class="list-group-item">', html_content) 

364 

365 # Add Bootstrap table classes 

366 html_content = re.sub( 

367 r"<table>", '<table class="table table-striped table-hover">', html_content 

368 ) 

369 

370 # Add Bootstrap alert classes for blockquotes 

371 html_content = re.sub( 

372 r"<blockquote>", '<blockquote class="alert alert-info">', html_content 

373 ) 

374 

375 # Add Bootstrap button classes to links that look like buttons 

376 html_content = re.sub( 

377 r'<a([^>]*?)class="[^"]*btn[^"]*"([^>]*?)>', 

378 r'<a\1class="btn btn-primary"\2>', 

379 html_content, 

380 ) 

381 

382 return html_content 

383 

384 def extract_title_from_markdown(self, markdown_content: str) -> str: 

385 """Extract title from markdown content.""" 

386 lines = markdown_content.split("\n") 

387 for line in lines: 

388 line = line.strip() 

389 if line.startswith("# "): 

390 return line[2:].strip() 

391 return "Documentation" # Default fallback title 

392 

393 def basic_markdown_to_html(self, markdown_content: str) -> str: 

394 """Basic markdown to HTML conversion - alias for compatibility.""" 

395 return self.markdown_to_html(markdown_content) 

396 

397 def convert_markdown_links_to_html( 

398 self, content: str, source_file: str = "", target_dir: str = "" 

399 ) -> str: 

400 """Convert markdown links to HTML format.""" 

401 

402 # Convert [text](link.md) to [text](link.html) - markdown style 

403 def replace_md_links(match): 

404 text = match.group(1) 

405 link = match.group(2) 

406 link = self._process_link_path(link, source_file) 

407 return f"[{text}]({link})" 

408 

409 # Convert href="link.md" to href="link.html" - HTML style 

410 def replace_href_links(match): 

411 prefix = match.group(1) 

412 link = match.group(2) 

413 suffix = match.group(3) 

414 link = self._process_link_path(link, source_file) 

415 return f"{prefix}{link}{suffix}" 

416 

417 # Apply conversions - expanded patterns to catch more file types 

418 # Catch .md files and well-known files without extensions 

419 content = re.sub( 

420 r"\[([^\]]+)\]\(([^)]+\.md(?:#[^)]*)?)\)", replace_md_links, content 

421 ) 

422 content = re.sub( 

423 r"\[([^\]]+)\]\(([^)]*(?:LICENSE|README|CHANGELOG|CONTRIBUTING)(?:/[^)]*)?(?:#[^)]*)?)\)", 

424 replace_md_links, 

425 content, 

426 ) 

427 content = re.sub( 

428 r'(href=")([^"]+\.md(?:#[^"]*)?)(")', replace_href_links, content 

429 ) 

430 content = re.sub( 

431 r'(href=")([^"]*(?:LICENSE|README|CHANGELOG|CONTRIBUTING)(?:/[^"]*)?(?:#[^"]*)?)(")', 

432 replace_href_links, 

433 content, 

434 ) 

435 

436 # The following normalizations are only applied during site builds (when source_file is provided). 

437 # Unit tests expect relative paths to be preserved. 

438 if source_file: 

439 # Normalize links that incorrectly include an extra "/docs/" prefix inside /docs pages 

440 # e.g., href="docs/users/..." when already under /docs/ -> make it absolute "/docs/users/..." 

441 content = re.sub(r'(href=")(docs/[^"]+)(")', r"\1/\2\3", content) 

442 content = re.sub(r"\]\((docs/[^)]+)\)", r"](/\1)", content) 

443 

444 # Collapse accidental duplicate docs/docs prefixes 

445 content = re.sub( 

446 r'(href=")/?docs/docs/([^"]+)(")', r"\1/docs/\2\3", content 

447 ) 

448 content = re.sub(r"\]\(/?docs/docs/([^\)]+)\)", r"](/docs/\1)", content) 

449 

450 # Rewrite relative ./docs/... links to absolute /docs/ (HTML and Markdown) 

451 content = re.sub( 

452 r'(href=")\./docs/([^"#]*)(#[^"]*)?(")', r"\1/docs/\2\3\4", content 

453 ) 

454 content = re.sub( 

455 r"\]\(\./docs/([^\)#]*)(#[^\)]*)?\)", r"](/docs/\1\2)", content 

456 ) 

457 

458 # Rewrite relative ../../docs/... links to absolute /docs/ (HTML and Markdown) 

459 content = re.sub( 

460 r'(href=")(?:\.{2}/)+docs/([^"#]*)(#[^"]*)?(")', 

461 r"\1/docs/\2\3\4", 

462 content, 

463 ) 

464 content = re.sub( 

465 r"\]\((?:\.{2}/)+docs/([^\)#]*)(#[^\)]*)?\)", r"](/docs/\1\2)", content 

466 ) 

467 

468 # Convert .md (with optional anchors) to .html in both HTML and Markdown links 

469 content = re.sub( 

470 r'(href=")([^"\s]+)\.md(#[^"]*)?(")', 

471 lambda m: f"{m.group(1)}{m.group(2)}.html{m.group(3) or ''}{m.group(4)}", 

472 content, 

473 ) 

474 content = re.sub( 

475 r"\]\(([^\)\s]+)\.md(#[^\)]*)?\)", 

476 lambda m: f"]({m.group(1)}.html{m.group(2) or ''})", 

477 content, 

478 ) 

479 

480 # Normalize developers relative links to directory indexes 

481 content = re.sub( 

482 r'(href=")\./(architecture|testing|deployment|extending)\.html(")', 

483 r"\1./\2/\3", 

484 content, 

485 ) 

486 # Normalize absolute developers/*.html to directory indexes 

487 content = re.sub( 

488 r'(href=")([^"\s]*/developers/)(architecture|testing|deployment|extending)\.html(")', 

489 r"\1\2\3/\4", 

490 content, 

491 ) 

492 content = re.sub( 

493 r"\]\(([^\)\s]*/developers/)(architecture|testing|deployment|extending)\.html\)", 

494 r"](\1\2/)", 

495 content, 

496 ) 

497 # Normalize parent-relative developers links like ../extending.html to ../extending/ 

498 content = re.sub( 

499 r'(href=")([^"#]*/developers/)(architecture|testing|deployment|extending)\.html(#[^"]*)?(")', 

500 r"\1\2\3/\4\5", 

501 content, 

502 ) 

503 # Normalize sibling links such as ../extending.html -> ../extending/ 

504 content = re.sub( 

505 r'(href=")\.\./(architecture|testing|deployment|extending)\.html(#[^"]*)?(")', 

506 r"\1../\2/\3\4", 

507 content, 

508 ) 

509 content = re.sub( 

510 r"\]\(\.\./(architecture|testing|deployment|extending)\.html(#[^\)]*)?\)", 

511 r"](../\1/\2)", 

512 content, 

513 ) 

514 

515 # Ensure well-known repo root files under /docs have .html extension 

516 content = re.sub( 

517 r'(href=")(/docs/(?:LICENSE|README|CHANGELOG|CONTRIBUTING))(#[^"]*)?(")', 

518 r"\1\2.html\3\4", 

519 content, 

520 ) 

521 

522 # If a target output path is provided, convert absolute /docs/... links to relative ones 

523 if target_dir: 

524 try: 

525 import posixpath 

526 

527 base_dir = target_dir 

528 if not base_dir.endswith("/"): 

529 base_dir = posixpath.dirname(base_dir) + "/" 

530 

531 def _to_relative_html(match: re.Match) -> str: 

532 prefix, path_part, anchor, suffix = ( 

533 match.group(1), 

534 match.group(2), 

535 match.group(3) or "", 

536 match.group(4), 

537 ) 

538 abs_path = "docs/" + path_part 

539 rel = posixpath.relpath(abs_path, base_dir.rstrip("/")) 

540 return f'{prefix}{rel}{anchor or ""}{suffix}' 

541 

542 def _to_relative_md(match: re.Match) -> str: 

543 path_part, anchor = match.group(1), match.group(2) or "" 

544 abs_path = "docs/" + path_part 

545 rel = posixpath.relpath(abs_path, base_dir.rstrip("/")) 

546 return f"]({rel}{anchor})" 

547 

548 content = re.sub( 

549 r'(href=")/docs/([^"#]+)(#[^"]*)?(")', 

550 _to_relative_html, 

551 content, 

552 ) 

553 content = re.sub( 

554 r"\]\(/docs/([^\)#]+)(#[^\)]*)?\)", _to_relative_md, content 

555 ) 

556 except Exception: 

557 # Fallback silently if relative conversion fails 

558 pass 

559 

560 return content 

561 

562 def _process_link_path(self, link: str, source_file: str = "") -> str: 

563 """Process a link path for conversion.""" 

564 # Preserve anchor fragments while processing 

565 anchor = "" 

566 if "#" in link: 

567 link, anchor = link.split("#", 1) 

568 anchor = "#" + anchor 

569 

570 # Only rewrite to absolute /docs when building from a source file context 

571 if source_file: 

572 # ../../docs/... -> /docs/... 

573 link = re.sub(r"^(?:\.{2}/)+docs/", "/docs/", link) 

574 # ./docs/... -> /docs/... 

575 link = re.sub(r"^\./docs/", "/docs/", link) 

576 # docs/... (relative) -> /docs/... 

577 if link.startswith("docs/"): 

578 link = "/" + link 

579 

580 # Decide whether to convert .md to .html (preserving anchors) 

581 should_convert_md = True 

582 if anchor and "/" not in link and not source_file: 

583 # Preserve bare filename.md#anchor in tests (no source context) 

584 should_convert_md = False 

585 

586 if link.endswith(".md") and should_convert_md: 

587 link = link[:-3] + ".html" 

588 else: 

589 # Handle well-known files without extensions 

590 filename = link.split("/")[-1] 

591 if ( 

592 filename.upper() in ["LICENSE", "README", "CHANGELOG", "CONTRIBUTING"] 

593 and "." not in filename 

594 ): 

595 # Ensure these resolve under /docs when referenced from packages 

596 if ( 

597 source_file 

598 and not link.startswith("/docs/") 

599 and filename.upper() 

600 in ["LICENSE", "README", "CHANGELOG", "CONTRIBUTING"] 

601 ): 

602 # Nudge to /docs root for repo-wide files 

603 link = "/docs/" + filename 

604 link = link + ".html" 

605 

606 # Collapse accidental duplicate /docs/docs prefixes 

607 link = re.sub(r"^/docs/docs/", "/docs/", link) 

608 link = link.replace("docs/docs/", "docs/") 

609 

610 # Ensure absolute /docs/ links are normalized (only when building) 

611 if source_file and link.startswith("docs/"): 

612 link = "/" + link 

613 

614 return link + anchor 

615 

616 def render_toc(self, html_content: str) -> str: 

617 """Generate table of contents from HTML headings.""" 

618 

619 # Find all headings 

620 heading_pattern = r'<(h[1-6])[^>]*id="([^"]+)"[^>]*>([^<]+)</h[1-6]>' 

621 headings = re.findall(heading_pattern, html_content) 

622 

623 if not headings: 

624 return "" 

625 

626 toc_html = '<div class="toc"><h3>Table of Contents</h3>' 

627 

628 # Build hierarchical structure 

629 current_level = 0 

630 open_lists = 0 

631 

632 for tag, heading_id, text in headings: 

633 level = int(tag[1]) # Extract number from h1, h2, etc. 

634 

635 # Handle level changes 

636 if level > current_level: 

637 # Open new nested lists for deeper levels 

638 while current_level < level: 

639 if current_level == 0: 

640 toc_html += "<ul>" 

641 else: 

642 toc_html += "<ul>" 

643 open_lists += 1 

644 current_level += 1 

645 elif level < current_level: 

646 # Close lists for shallower levels 

647 while current_level > level: 

648 toc_html += "</ul>" 

649 open_lists -= 1 

650 current_level -= 1 

651 

652 # Add the current heading 

653 toc_html += f'<li><a href="#{heading_id}">{text}</a></li>\n' 

654 

655 # Close all remaining open lists 

656 while open_lists > 0: 

657 toc_html += "</ul>" 

658 open_lists -= 1 

659 

660 toc_html += "</div>" 

661 

662 return toc_html