Coverage for src/qdrant_loader_core/llm/tokenization.py: 91%
23 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:01 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:01 +0000
1from __future__ import annotations
3from .types import TokenCounter
5try: # Optional dependency
6 import tiktoken # type: ignore
7except Exception: # pragma: no cover - absence is acceptable
8 tiktoken = None # type: ignore
11class CharCountTokenCounter(TokenCounter):
12 def count(self, text: str) -> int:
13 return len(text)
16class TiktokenTokenCounter(TokenCounter):
17 """Token counter backed by tiktoken; falls back gracefully when unavailable.
19 If the requested encoding cannot be loaded or encode fails, falls back to
20 a simple character count to avoid runtime errors.
21 """
23 def __init__(self, encoding_name: str):
24 self._encoding_name = encoding_name
25 self._encoding = None
26 if tiktoken is not None:
27 try:
28 self._encoding = tiktoken.get_encoding(encoding_name)
29 except Exception:
30 self._encoding = None
32 def count(self, text: str) -> int:
33 if self._encoding is not None:
34 try:
35 return len(self._encoding.encode(text))
36 except Exception:
37 pass
38 return len(text)