Coverage for src/qdrant_loader/connectors/attachments.py: 0%
22 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3from typing import Any
5from qdrant_loader.core.attachment_downloader import AttachmentMetadata
8def attachment_metadata_from_dict(
9 data: dict[str, Any], parent_document_id: str
10) -> AttachmentMetadata:
11 """Create AttachmentMetadata from a simple dict structure with input validation.
13 Required keys: id, download_url.
14 Optional keys: filename, size, mime_type, created_at, updated_at, author.
15 Missing optional keys default to sensible values.
16 """
17 # Validate required fields: id and download_url
18 raw_id = data.get("id", "")
19 attachment_id = str(raw_id).strip()
20 if not attachment_id:
21 raise ValueError("Attachment 'id' is required and cannot be empty.")
23 raw_url = data.get("download_url", "")
24 download_url = str(raw_url).strip()
25 if not download_url:
26 raise ValueError("Attachment 'download_url' is required and cannot be empty.")
28 # Optional fields with safe conversions/defaults
29 # Ensure None becomes default and whitespace is trimmed
30 filename = (data.get("filename") or "").strip() or "unknown"
31 mime_type = (data.get("mime_type") or "").strip() or "application/octet-stream"
33 # Safely coerce size to int
34 size_value = data.get("size", 0)
35 try:
36 size_int = int(size_value) if size_value not in (None, "") else 0
37 except Exception:
38 size_int = 0
39 if size_int < 0:
40 size_int = 0
42 return AttachmentMetadata(
43 id=attachment_id,
44 filename=filename,
45 size=size_int,
46 mime_type=mime_type,
47 download_url=download_url,
48 parent_document_id=parent_document_id,
49 created_at=data.get("created_at"),
50 updated_at=data.get("updated_at"),
51 author=data.get("author"),
52 )