Coverage for src/qdrant_loader/core/state/models.py: 99%
96 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1"""
2SQLAlchemy models for state management database.
3"""
5from datetime import UTC
7from sqlalchemy import (
8 Boolean,
9 Column,
10 Float,
11 ForeignKey,
12 Index,
13 Integer,
14 String,
15 Text,
16 TypeDecorator,
17 UniqueConstraint,
18)
19from sqlalchemy import DateTime as SQLDateTime
20from sqlalchemy.orm import declarative_base, relationship
22from qdrant_loader.utils.logging import LoggingConfig
24logger = LoggingConfig.get_logger(__name__)
27class UTCDateTime(TypeDecorator):
28 """Automatically handle timezone information for datetime columns."""
30 impl = SQLDateTime
31 cache_ok = True
33 def process_bind_param(self, value, _dialect):
34 if value is not None:
35 if not value.tzinfo:
36 value = value.replace(tzinfo=UTC)
37 return value
39 def process_result_value(self, value, _dialect):
40 if value is not None:
41 if not value.tzinfo:
42 value = value.replace(tzinfo=UTC)
43 return value
46Base = declarative_base()
49class Project(Base):
50 """Tracks project metadata and configuration."""
52 __tablename__ = "projects"
54 id = Column(String, primary_key=True) # Project identifier
55 display_name = Column(String, nullable=False) # Human-readable project name
56 description = Column(Text, nullable=True) # Project description
57 collection_name = Column(String, nullable=False) # QDrant collection name
58 config_hash = Column(String, nullable=True) # Hash of project configuration
59 created_at = Column(UTCDateTime(timezone=True), nullable=False)
60 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
62 # Relationships
63 sources = relationship(
64 "ProjectSource", back_populates="project", cascade="all, delete-orphan"
65 )
66 ingestion_histories = relationship(
67 "IngestionHistory", back_populates="project", cascade="all, delete-orphan"
68 )
69 document_states = relationship(
70 "DocumentStateRecord", back_populates="project", cascade="all, delete-orphan"
71 )
73 __table_args__ = (
74 UniqueConstraint("collection_name", name="uix_project_collection"),
75 Index("ix_project_display_name", "display_name"),
76 )
79class ProjectSource(Base):
80 """Tracks project-specific source configurations and status."""
82 __tablename__ = "project_sources"
84 id = Column(Integer, primary_key=True, autoincrement=True)
85 project_id = Column(
86 String, ForeignKey("projects.id", ondelete="CASCADE"), nullable=False
87 )
88 source_type = Column(String, nullable=False) # git, confluence, jira, etc.
89 source_name = Column(String, nullable=False) # Source identifier within project
90 config_hash = Column(String, nullable=True) # Hash of source configuration
91 last_sync_time = Column(
92 UTCDateTime(timezone=True), nullable=True
93 ) # Last successful sync
94 status = Column(
95 String, default="pending", nullable=False
96 ) # pending, syncing, completed, error
97 error_message = Column(Text, nullable=True) # Last error message if any
98 created_at = Column(UTCDateTime(timezone=True), nullable=False)
99 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
101 # Relationships
102 project = relationship("Project", back_populates="sources")
104 __table_args__ = (
105 UniqueConstraint(
106 "project_id", "source_type", "source_name", name="uix_project_source"
107 ),
108 Index("ix_project_source_status", "status"),
109 Index("ix_project_source_type", "source_type"),
110 )
113class IngestionHistory(Base):
114 """Tracks ingestion history for each source."""
116 __tablename__ = "ingestion_history"
118 id = Column(Integer, primary_key=True, autoincrement=True)
119 project_id = Column(
120 String, ForeignKey("projects.id", ondelete="CASCADE"), nullable=True
121 ) # Nullable for backward compatibility
122 source_type = Column(String, nullable=False)
123 source = Column(String, nullable=False)
124 last_successful_ingestion = Column(UTCDateTime(timezone=True), nullable=False)
125 status = Column(String, nullable=False)
126 document_count = Column(Integer, default=0)
127 error_message = Column(String)
128 created_at = Column(UTCDateTime(timezone=True), nullable=False)
129 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
131 # File conversion metrics
132 converted_files_count = Column(Integer, default=0)
133 conversion_failures_count = Column(Integer, default=0)
134 attachments_processed_count = Column(Integer, default=0)
135 total_conversion_time = Column(Float, default=0.0)
137 # Relationships
138 project = relationship("Project", back_populates="ingestion_histories")
140 __table_args__ = (
141 UniqueConstraint(
142 "project_id", "source_type", "source", name="uix_project_source_ingestion"
143 ),
144 Index("ix_ingestion_project_id", "project_id"),
145 )
148class DocumentStateRecord(Base):
149 """Tracks the state of individual documents."""
151 __tablename__ = "document_states"
153 id = Column(Integer, primary_key=True, autoincrement=True)
154 project_id = Column(
155 String, ForeignKey("projects.id", ondelete="CASCADE"), nullable=True
156 ) # Nullable for backward compatibility
157 document_id = Column(String, nullable=False)
158 source_type = Column(String, nullable=False)
159 source = Column(String, nullable=False)
160 url = Column(String, nullable=False)
161 title = Column(String, nullable=False)
162 content_hash = Column(String, nullable=False)
163 is_deleted = Column(Boolean, default=False)
164 created_at = Column(UTCDateTime(timezone=True), nullable=False)
165 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
167 # File conversion metadata
168 is_converted = Column(Boolean, default=False)
169 conversion_method = Column(
170 String, nullable=True
171 ) # 'markitdown', 'markitdown_fallback', etc.
172 original_file_type = Column(
173 String, nullable=True
174 ) # Original file extension/MIME type
175 original_filename = Column(String, nullable=True) # Original filename
176 file_size = Column(Integer, nullable=True) # File size in bytes
177 conversion_failed = Column(Boolean, default=False)
178 conversion_error = Column(Text, nullable=True) # Error message if conversion failed
179 conversion_time = Column(
180 Float, nullable=True
181 ) # Time taken for conversion in seconds
183 # Attachment metadata
184 is_attachment = Column(Boolean, default=False)
185 parent_document_id = Column(
186 String, nullable=True
187 ) # ID of parent document for attachments
188 attachment_id = Column(String, nullable=True) # Unique attachment identifier
189 attachment_filename = Column(String, nullable=True) # Original attachment filename
190 attachment_mime_type = Column(String, nullable=True) # MIME type of attachment
191 attachment_download_url = Column(String, nullable=True) # Original download URL
192 attachment_author = Column(String, nullable=True) # Author of attachment
193 attachment_created_at = Column(
194 UTCDateTime(timezone=True), nullable=True
195 ) # Attachment creation date
197 # Relationships
198 project = relationship("Project", back_populates="document_states")
200 __table_args__ = (
201 UniqueConstraint(
202 "project_id",
203 "source_type",
204 "source",
205 "document_id",
206 name="uix_project_document",
207 ),
208 Index("ix_document_url", "url"),
209 Index("ix_document_converted", "is_converted"),
210 Index("ix_document_attachment", "is_attachment"),
211 Index("ix_document_parent", "parent_document_id"),
212 Index("ix_document_conversion_method", "conversion_method"),
213 Index("ix_document_project_id", "project_id"),
214 )