Coverage for src/qdrant_loader/connectors/confluence/pagination.py: 72%

36 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1from __future__ import annotations 

2 

3import re 

4from typing import Any 

5 

6_ALLOWED_TOKEN_RE = re.compile(r"^[A-Za-z0-9_-]+$") 

7 

8 

9def _quote_cql_literal(value: str) -> str: 

10 # Escape backslashes first, then double quotes 

11 escaped = value.replace("\\", "\\\\").replace('"', '\\"') 

12 return f'"{escaped}"' 

13 

14 

15def _sanitize_space_key(space_key: str) -> str: 

16 if not _ALLOWED_TOKEN_RE.fullmatch(space_key): 

17 raise ValueError( 

18 "Invalid Confluence space key. Only alphanumerics, underscore and hyphen are allowed." 

19 ) 

20 return _quote_cql_literal(space_key) 

21 

22 

23def _sanitize_content_types(content_types: list[str]) -> list[str]: 

24 sanitized: list[str] = [] 

25 for content_type in content_types: 

26 if not isinstance(content_type, str) or not _ALLOWED_TOKEN_RE.fullmatch( 

27 content_type 

28 ): 

29 raise ValueError(f"Invalid Confluence content type: {content_type!r}") 

30 sanitized.append(_quote_cql_literal(content_type)) 

31 return sanitized 

32 

33 

34def build_cloud_search_params( 

35 space_key: str, content_types: list[str] | None, cursor: str | None 

36) -> dict[str, Any]: 

37 params: dict[str, Any] = { 

38 "expand": "body.storage,version,metadata.labels,history,space,extensions.position,children.comment.body.storage,ancestors,children.page", 

39 "limit": 25, 

40 } 

41 cql = f"space = {_sanitize_space_key(space_key)}" 

42 if content_types: 

43 safe_types = _sanitize_content_types(content_types) 

44 cql += f" and type in ({','.join(safe_types)})" 

45 params["cql"] = cql 

46 if cursor is not None: 

47 params["cursor"] = cursor 

48 return params 

49 

50 

51def build_dc_search_params( 

52 space_key: str, content_types: list[str] | None, start: int 

53) -> dict[str, Any]: 

54 params: dict[str, Any] = { 

55 "expand": "body.storage,version,metadata.labels,history,space,extensions.position,children.comment.body.storage,ancestors,children.page", 

56 "limit": 25, 

57 "start": start, 

58 } 

59 cql = f"space = {_sanitize_space_key(space_key)}" 

60 if content_types: 

61 safe_types = _sanitize_content_types(content_types) 

62 cql += f" and type in ({','.join(safe_types)})" 

63 params["cql"] = cql 

64 return params