Coverage for src/qdrant_loader_core/llm/tokenization.py: 91%

23 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:01 +0000

1from __future__ import annotations 

2 

3from .types import TokenCounter 

4 

5try: # Optional dependency 

6 import tiktoken # type: ignore 

7except Exception: # pragma: no cover - absence is acceptable 

8 tiktoken = None # type: ignore 

9 

10 

11class CharCountTokenCounter(TokenCounter): 

12 def count(self, text: str) -> int: 

13 return len(text) 

14 

15 

16class TiktokenTokenCounter(TokenCounter): 

17 """Token counter backed by tiktoken; falls back gracefully when unavailable. 

18 

19 If the requested encoding cannot be loaded or encode fails, falls back to 

20 a simple character count to avoid runtime errors. 

21 """ 

22 

23 def __init__(self, encoding_name: str): 

24 self._encoding_name = encoding_name 

25 self._encoding = None 

26 if tiktoken is not None: 

27 try: 

28 self._encoding = tiktoken.get_encoding(encoding_name) 

29 except Exception: 

30 self._encoding = None 

31 

32 def count(self, text: str) -> int: 

33 if self._encoding is not None: 

34 try: 

35 return len(self._encoding.encode(text)) 

36 except Exception: 

37 pass 

38 return len(text)