Coverage for src / qdrant_loader / core / file_conversion / xlsx_markdown_format.py: 100%
7 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-06-11 09:38 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-06-11 09:38 +0000
1"""Shared heading format for converted xlsx markdown.
3The xlsx converter emits sheet/subtable headings; the row-KV splitter parses
4them back to recover structure. Centralizing the format and regex here makes
5that contract explicit — change the template and the regex breaks loudly at
6import time instead of silently degrading ingestion at runtime.
7"""
9from __future__ import annotations
11import re
13SHEET_HEADING_RE = re.compile(
14 r"^##\s+Sheet:\s+(?P<sheet>.+?)(?:\s+/\s+Subtable:\s+(?P<idx>\d+))?\s*$",
15 re.MULTILINE,
16)
19def format_sheet_heading(sheet: str, subtable_idx: int | None) -> str:
20 """Render the H2 heading for a sheet (and optional subtable index)."""
21 if subtable_idx is None:
22 return f"## Sheet: {sheet}"
23 return f"## Sheet: {sheet} / Subtable: {subtable_idx}"