Coverage for src/qdrant_loader/config/chunking.py: 91%

11 statements  

« prev     ^ index     » next       coverage.py v7.10.0, created at 2025-07-25 11:39 +0000

1"""Configuration for text chunking.""" 

2 

3from pydantic import BaseModel, Field, ValidationInfo, field_validator 

4 

5 

6class ChunkingConfig(BaseModel): 

7 """Configuration for text chunking.""" 

8 

9 chunk_size: int = Field( 

10 default=1000, 

11 description="Size of text chunks in characters", 

12 gt=0, 

13 title="Chunk Size", 

14 ) 

15 chunk_overlap: int = Field( 

16 default=200, 

17 description="Overlap between chunks in characters", 

18 ge=0, 

19 title="Chunk Overlap", 

20 ) 

21 max_chunks_per_document: int = Field( 

22 default=1000, 

23 description="Maximum number of chunks per document (safety limit)", 

24 gt=0, 

25 title="Max Chunks Per Document", 

26 ) 

27 

28 @field_validator("chunk_overlap") 

29 def validate_chunk_overlap(cls, v: int, info: ValidationInfo) -> int: 

30 """Validate that chunk overlap is less than chunk size.""" 

31 chunk_size = info.data.get("chunk_size", 1000) 

32 if v >= chunk_size: 

33 raise ValueError("Chunk overlap must be less than chunk size") 

34 return v