Coverage for src / qdrant_loader / config / embedding.py: 100%
11 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
1"""Configuration for embedding generation."""
3from pydantic import Field
5from qdrant_loader.config.base import BaseConfig
8class EmbeddingConfig(BaseConfig):
9 """Configuration for embedding generation."""
11 model: str = Field(
12 default="argus-ai/pplx-embed-v1-0.6b:fp32",
13 description="Embedding model to use",
14 )
15 api_key: str | None = Field(
16 default=None, description="API key for the embedding service"
17 )
18 batch_size: int = Field(
19 default=100, description="Number of texts to embed in a single batch"
20 )
21 endpoint: str = Field(
22 default="http://localhost:11434/v1",
23 description="Base URL for the embedding API endpoint",
24 )
25 tokenizer: str = Field(
26 default="none",
27 description="Tokenizer to use for token counting. Use 'none' for Ollama local models",
28 )
29 vector_size: int | None = Field(
30 default=1024,
31 description="Vector size for the embedding model (384 for BAAI/bge-small-en-v1.5, 1024 for argus-ai/pplx-embed-v1-0.6b:fp32)",
32 )
33 max_tokens_per_request: int = Field(
34 default=8000,
35 description="Maximum total tokens allowed per embedding API request (leave buffer below model limit)",
36 )
37 max_tokens_per_chunk: int = Field(
38 default=8000,
39 description="Maximum tokens allowed for a single chunk (should match or be below model's context limit)",
40 )