Coverage for src/qdrant_loader/core/file_conversion/conversion_config.py: 100%

19 statements  

« prev     ^ index     » next       coverage.py v7.10.0, created at 2025-07-25 11:39 +0000

1"""Configuration models for file conversion settings.""" 

2 

3 

4from pydantic import BaseModel, Field 

5 

6 

7class MarkItDownConfig(BaseModel): 

8 """Configuration for MarkItDown-specific settings.""" 

9 

10 enable_llm_descriptions: bool = Field( 

11 default=False, description="Enable LLM integration for image descriptions" 

12 ) 

13 

14 llm_model: str = Field( 

15 default="gpt-4o", description="LLM model for image descriptions (when enabled)" 

16 ) 

17 

18 llm_endpoint: str = Field( 

19 default="https://api.openai.com/v1", description="LLM endpoint (when enabled)" 

20 ) 

21 

22 llm_api_key: str | None = Field( 

23 default=None, 

24 description="API key for LLM service (required when enable_llm_descriptions is True)", 

25 ) 

26 

27 

28class FileConversionConfig(BaseModel): 

29 """Configuration for file conversion operations.""" 

30 

31 max_file_size: int = Field( 

32 default=52428800, # 50MB 

33 description="Maximum file size for conversion (in bytes)", 

34 gt=0, 

35 ) 

36 

37 conversion_timeout: int = Field( 

38 default=300, # 5 minutes 

39 description="Timeout for conversion operations (in seconds)", 

40 gt=0, 

41 ) 

42 

43 markitdown: MarkItDownConfig = Field( 

44 default_factory=MarkItDownConfig, description="MarkItDown specific settings" 

45 ) 

46 

47 def get_max_file_size_mb(self) -> float: 

48 """Get maximum file size in megabytes. 

49 

50 Returns: 

51 Maximum file size in MB 

52 """ 

53 return self.max_file_size / (1024 * 1024) 

54 

55 def is_file_size_allowed(self, file_size: int) -> bool: 

56 """Check if file size is within allowed limits. 

57 

58 Args: 

59 file_size: File size in bytes 

60 

61 Returns: 

62 True if file size is allowed, False otherwise 

63 """ 

64 return file_size <= self.max_file_size 

65 

66 

67class ConnectorFileConversionConfig(BaseModel): 

68 """Configuration for file conversion at the connector level.""" 

69 

70 enable_file_conversion: bool = Field( 

71 default=False, description="Enable file conversion for this connector" 

72 ) 

73 

74 download_attachments: bool | None = Field( 

75 default=None, 

76 description="Download and process attachments (for Confluence/JIRA/PublicDocs)", 

77 ) 

78 

79 def should_download_attachments(self) -> bool: 

80 """Check if attachments should be downloaded. 

81 

82 Returns: 

83 True if attachments should be downloaded, False otherwise 

84 """ 

85 # Default to False if not specified 

86 return self.download_attachments is True