Coverage for src/qdrant_loader/core/state/models.py: 99%
96 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-04 05:50 +0000
1"""
2SQLAlchemy models for state management database.
3"""
5from datetime import UTC
7from sqlalchemy import (
8 Boolean,
9 Column,
10 Index,
11 Integer,
12 String,
13 TypeDecorator,
14 UniqueConstraint,
15 Text,
16 Float,
17 ForeignKey,
18)
19from sqlalchemy import DateTime as SQLDateTime
20from sqlalchemy.orm import declarative_base, relationship
22from qdrant_loader.utils.logging import LoggingConfig
24logger = LoggingConfig.get_logger(__name__)
27class UTCDateTime(TypeDecorator):
28 """Automatically handle timezone information for datetime columns."""
30 impl = SQLDateTime
31 cache_ok = True
33 def process_bind_param(self, value, dialect):
34 if value is not None:
35 if not value.tzinfo:
36 value = value.replace(tzinfo=UTC)
37 return value
39 def process_result_value(self, value, dialect):
40 if value is not None:
41 if not value.tzinfo:
42 value = value.replace(tzinfo=UTC)
43 return value
46Base = declarative_base()
49class Project(Base):
50 """Tracks project metadata and configuration."""
52 __tablename__ = "projects"
54 id = Column(String, primary_key=True) # Project identifier
55 display_name = Column(String, nullable=False) # Human-readable project name
56 description = Column(Text, nullable=True) # Project description
57 collection_name = Column(String, nullable=False) # QDrant collection name
58 config_hash = Column(String, nullable=True) # Hash of project configuration
59 created_at = Column(UTCDateTime(timezone=True), nullable=False)
60 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
62 # Relationships
63 sources = relationship(
64 "ProjectSource", back_populates="project", cascade="all, delete-orphan"
65 )
66 ingestion_histories = relationship(
67 "IngestionHistory", back_populates="project", cascade="all, delete-orphan"
68 )
69 document_states = relationship(
70 "DocumentStateRecord", back_populates="project", cascade="all, delete-orphan"
71 )
73 __table_args__ = (
74 UniqueConstraint("collection_name", name="uix_project_collection"),
75 Index("ix_project_display_name", "display_name"),
76 )
79class ProjectSource(Base):
80 """Tracks project-specific source configurations and status."""
82 __tablename__ = "project_sources"
84 id = Column(Integer, primary_key=True, autoincrement=True)
85 project_id = Column(
86 String, ForeignKey("projects.id", ondelete="CASCADE"), nullable=False
87 )
88 source_type = Column(String, nullable=False) # git, confluence, jira, etc.
89 source_name = Column(String, nullable=False) # Source identifier within project
90 config_hash = Column(String, nullable=True) # Hash of source configuration
91 last_sync_time = Column(
92 UTCDateTime(timezone=True), nullable=True
93 ) # Last successful sync
94 status = Column(
95 String, default="pending", nullable=False
96 ) # pending, syncing, completed, error
97 error_message = Column(Text, nullable=True) # Last error message if any
98 created_at = Column(UTCDateTime(timezone=True), nullable=False)
99 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
101 # Relationships
102 project = relationship("Project", back_populates="sources")
104 __table_args__ = (
105 UniqueConstraint(
106 "project_id", "source_type", "source_name", name="uix_project_source"
107 ),
108 Index("ix_project_source_status", "status"),
109 Index("ix_project_source_type", "source_type"),
110 )
113class IngestionHistory(Base):
114 """Tracks ingestion history for each source."""
116 __tablename__ = "ingestion_history"
118 id = Column(Integer, primary_key=True, autoincrement=True)
119 project_id = Column(
120 String, ForeignKey("projects.id", ondelete="CASCADE"), nullable=True
121 ) # Nullable for backward compatibility
122 source_type = Column(String, nullable=False)
123 source = Column(String, nullable=False)
124 last_successful_ingestion = Column(UTCDateTime(timezone=True), nullable=False)
125 status = Column(String, nullable=False)
126 document_count = Column(Integer, default=0)
127 error_message = Column(String)
128 created_at = Column(UTCDateTime(timezone=True), nullable=False)
129 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
131 # File conversion metrics
132 converted_files_count = Column(Integer, default=0)
133 conversion_failures_count = Column(Integer, default=0)
134 attachments_processed_count = Column(Integer, default=0)
135 total_conversion_time = Column(Float, default=0.0)
137 # Relationships
138 project = relationship("Project", back_populates="ingestion_histories")
140 __table_args__ = (
141 UniqueConstraint(
142 "project_id", "source_type", "source", name="uix_project_source_ingestion"
143 ),
144 # Keep legacy constraint for backward compatibility
145 UniqueConstraint("source_type", "source", name="uix_source"),
146 Index("ix_ingestion_project_id", "project_id"),
147 )
150class DocumentStateRecord(Base):
151 """Tracks the state of individual documents."""
153 __tablename__ = "document_states"
155 id = Column(Integer, primary_key=True, autoincrement=True)
156 project_id = Column(
157 String, ForeignKey("projects.id", ondelete="CASCADE"), nullable=True
158 ) # Nullable for backward compatibility
159 document_id = Column(String, nullable=False)
160 source_type = Column(String, nullable=False)
161 source = Column(String, nullable=False)
162 url = Column(String, nullable=False)
163 title = Column(String, nullable=False)
164 content_hash = Column(String, nullable=False)
165 is_deleted = Column(Boolean, default=False)
166 created_at = Column(UTCDateTime(timezone=True), nullable=False)
167 updated_at = Column(UTCDateTime(timezone=True), nullable=False)
169 # File conversion metadata
170 is_converted = Column(Boolean, default=False)
171 conversion_method = Column(
172 String, nullable=True
173 ) # 'markitdown', 'markitdown_fallback', etc.
174 original_file_type = Column(
175 String, nullable=True
176 ) # Original file extension/MIME type
177 original_filename = Column(String, nullable=True) # Original filename
178 file_size = Column(Integer, nullable=True) # File size in bytes
179 conversion_failed = Column(Boolean, default=False)
180 conversion_error = Column(Text, nullable=True) # Error message if conversion failed
181 conversion_time = Column(
182 Float, nullable=True
183 ) # Time taken for conversion in seconds
185 # Attachment metadata
186 is_attachment = Column(Boolean, default=False)
187 parent_document_id = Column(
188 String, nullable=True
189 ) # ID of parent document for attachments
190 attachment_id = Column(String, nullable=True) # Unique attachment identifier
191 attachment_filename = Column(String, nullable=True) # Original attachment filename
192 attachment_mime_type = Column(String, nullable=True) # MIME type of attachment
193 attachment_download_url = Column(String, nullable=True) # Original download URL
194 attachment_author = Column(String, nullable=True) # Author of attachment
195 attachment_created_at = Column(
196 UTCDateTime(timezone=True), nullable=True
197 ) # Attachment creation date
199 # Relationships
200 project = relationship("Project", back_populates="document_states")
202 __table_args__ = (
203 UniqueConstraint(
204 "project_id",
205 "source_type",
206 "source",
207 "document_id",
208 name="uix_project_document",
209 ),
210 # Keep legacy constraint for backward compatibility
211 UniqueConstraint("source_type", "source", "document_id", name="uix_document"),
212 Index("ix_document_url", "url"),
213 Index("ix_document_converted", "is_converted"),
214 Index("ix_document_attachment", "is_attachment"),
215 Index("ix_document_parent", "parent_document_id"),
216 Index("ix_document_conversion_method", "conversion_method"),
217 Index("ix_document_project_id", "project_id"),
218 )