Coverage for src/qdrant_loader/connectors/confluence/pagination.py: 72%
36 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3import re
4from typing import Any
6_ALLOWED_TOKEN_RE = re.compile(r"^[A-Za-z0-9_-]+$")
9def _quote_cql_literal(value: str) -> str:
10 # Escape backslashes first, then double quotes
11 escaped = value.replace("\\", "\\\\").replace('"', '\\"')
12 return f'"{escaped}"'
15def _sanitize_space_key(space_key: str) -> str:
16 if not _ALLOWED_TOKEN_RE.fullmatch(space_key):
17 raise ValueError(
18 "Invalid Confluence space key. Only alphanumerics, underscore and hyphen are allowed."
19 )
20 return _quote_cql_literal(space_key)
23def _sanitize_content_types(content_types: list[str]) -> list[str]:
24 sanitized: list[str] = []
25 for content_type in content_types:
26 if not isinstance(content_type, str) or not _ALLOWED_TOKEN_RE.fullmatch(
27 content_type
28 ):
29 raise ValueError(f"Invalid Confluence content type: {content_type!r}")
30 sanitized.append(_quote_cql_literal(content_type))
31 return sanitized
34def build_cloud_search_params(
35 space_key: str, content_types: list[str] | None, cursor: str | None
36) -> dict[str, Any]:
37 params: dict[str, Any] = {
38 "expand": "body.storage,version,metadata.labels,history,space,extensions.position,children.comment.body.storage,ancestors,children.page",
39 "limit": 25,
40 }
41 cql = f"space = {_sanitize_space_key(space_key)}"
42 if content_types:
43 safe_types = _sanitize_content_types(content_types)
44 cql += f" and type in ({','.join(safe_types)})"
45 params["cql"] = cql
46 if cursor is not None:
47 params["cursor"] = cursor
48 return params
51def build_dc_search_params(
52 space_key: str, content_types: list[str] | None, start: int
53) -> dict[str, Any]:
54 params: dict[str, Any] = {
55 "expand": "body.storage,version,metadata.labels,history,space,extensions.position,children.comment.body.storage,ancestors,children.page",
56 "limit": 25,
57 "start": start,
58 }
59 cql = f"space = {_sanitize_space_key(space_key)}"
60 if content_types:
61 safe_types = _sanitize_content_types(content_types)
62 cql += f" and type in ({','.join(safe_types)})"
63 params["cql"] = cql
64 return params