Coverage for src / qdrant_loader / config / embedding.py: 100%

11 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-10 09:40 +0000

1"""Configuration for embedding generation.""" 

2 

3from pydantic import Field 

4 

5from qdrant_loader.config.base import BaseConfig 

6 

7 

8class EmbeddingConfig(BaseConfig): 

9 """Configuration for embedding generation.""" 

10 

11 model: str = Field( 

12 default="argus-ai/pplx-embed-v1-0.6b:fp32", 

13 description="Embedding model to use", 

14 ) 

15 api_key: str | None = Field( 

16 default=None, description="API key for the embedding service" 

17 ) 

18 batch_size: int = Field( 

19 default=100, description="Number of texts to embed in a single batch" 

20 ) 

21 endpoint: str = Field( 

22 default="http://localhost:11434/v1", 

23 description="Base URL for the embedding API endpoint", 

24 ) 

25 tokenizer: str = Field( 

26 default="none", 

27 description="Tokenizer to use for token counting. Use 'none' for Ollama local models", 

28 ) 

29 vector_size: int | None = Field( 

30 default=1024, 

31 description="Vector size for the embedding model (384 for BAAI/bge-small-en-v1.5, 1024 for argus-ai/pplx-embed-v1-0.6b:fp32)", 

32 ) 

33 max_tokens_per_request: int = Field( 

34 default=8000, 

35 description="Maximum total tokens allowed per embedding API request (leave buffer below model limit)", 

36 ) 

37 max_tokens_per_chunk: int = Field( 

38 default=8000, 

39 description="Maximum tokens allowed for a single chunk (should match or be below model's context limit)", 

40 )