Coverage for src / qdrant_loader / core / file_conversion / xlsx_markdown_format.py: 100%

7 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-06-11 09:38 +0000

1"""Shared heading format for converted xlsx markdown. 

2 

3The xlsx converter emits sheet/subtable headings; the row-KV splitter parses 

4them back to recover structure. Centralizing the format and regex here makes 

5that contract explicit — change the template and the regex breaks loudly at 

6import time instead of silently degrading ingestion at runtime. 

7""" 

8 

9from __future__ import annotations 

10 

11import re 

12 

13SHEET_HEADING_RE = re.compile( 

14 r"^##\s+Sheet:\s+(?P<sheet>.+?)(?:\s+/\s+Subtable:\s+(?P<idx>\d+))?\s*$", 

15 re.MULTILINE, 

16) 

17 

18 

19def format_sheet_heading(sheet: str, subtable_idx: int | None) -> str: 

20 """Render the H2 heading for a sheet (and optional subtable index).""" 

21 if subtable_idx is None: 

22 return f"## Sheet: {sheet}" 

23 return f"## Sheet: {sheet} / Subtable: {subtable_idx}"