Coverage for src/qdrant_loader_mcp_server/search/components/search_result_models.py: 87%

451 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-13 09:20 +0000

1"""Modular search result data structures for hybrid search.""" 

2 

3from dataclasses import dataclass, field 

4 

5 

6@dataclass 

7class BaseSearchResult: 

8 """Core search result fields.""" 

9 

10 score: float 

11 text: str 

12 source_type: str 

13 source_title: str 

14 source_url: str | None = None 

15 file_path: str | None = None 

16 repo_name: str | None = None 

17 vector_score: float = 0.0 

18 keyword_score: float = 0.0 

19 # Document identification 

20 document_id: str | None = None 

21 created_at: str | None = None 

22 last_modified: str | None = None 

23 

24 

25@dataclass 

26class ProjectInfo: 

27 """Project-related information.""" 

28 

29 project_id: str | None = None 

30 project_name: str | None = None 

31 project_description: str | None = None 

32 collection_name: str | None = None 

33 

34 

35@dataclass 

36class HierarchyInfo: 

37 """Hierarchy information (primarily for Confluence).""" 

38 

39 parent_id: str | None = None 

40 parent_title: str | None = None 

41 breadcrumb_text: str | None = None 

42 depth: int | None = None 

43 children_count: int | None = None 

44 hierarchy_context: str | None = None 

45 

46 

47@dataclass 

48class AttachmentInfo: 

49 """Attachment information for files attached to documents.""" 

50 

51 is_attachment: bool = False 

52 parent_document_id: str | None = None 

53 parent_document_title: str | None = None 

54 attachment_id: str | None = None 

55 original_filename: str | None = None 

56 file_size: int | None = None 

57 mime_type: str | None = None 

58 attachment_author: str | None = None 

59 attachment_context: str | None = None 

60 

61 

62@dataclass 

63class SectionInfo: 

64 """Section-level intelligence.""" 

65 

66 section_title: str | None = None 

67 section_type: str | None = None # e.g., "h1", "h2", "content" 

68 section_level: int | None = None 

69 section_anchor: str | None = None 

70 section_breadcrumb: str | None = None 

71 section_depth: int | None = None 

72 

73 

74@dataclass 

75class ContentAnalysis: 

76 """Content analysis information.""" 

77 

78 has_code_blocks: bool = False 

79 has_tables: bool = False 

80 has_images: bool = False 

81 has_links: bool = False 

82 word_count: int | None = None 

83 char_count: int | None = None 

84 estimated_read_time: int | None = None # minutes 

85 paragraph_count: int | None = None 

86 

87 

88@dataclass 

89class SemanticAnalysis: 

90 """Semantic analysis (NLP results).""" 

91 

92 entities: list[dict | str] = field(default_factory=list) 

93 topics: list[dict | str] = field(default_factory=list) 

94 key_phrases: list[dict | str] = field(default_factory=list) 

95 pos_tags: list[dict] = field(default_factory=list) 

96 

97 

98@dataclass 

99class NavigationContext: 

100 """Navigation context information.""" 

101 

102 previous_section: str | None = None 

103 next_section: str | None = None 

104 sibling_sections: list[str] = field(default_factory=list) 

105 subsections: list[str] = field(default_factory=list) 

106 document_hierarchy: list[str] = field(default_factory=list) 

107 

108 

109@dataclass 

110class ChunkingContext: 

111 """Chunking context information.""" 

112 

113 chunk_index: int | None = None 

114 total_chunks: int | None = None 

115 chunking_strategy: str | None = None 

116 

117 

118@dataclass 

119class ConversionInfo: 

120 """File conversion intelligence.""" 

121 

122 original_file_type: str | None = None 

123 conversion_method: str | None = None 

124 is_excel_sheet: bool = False 

125 is_converted: bool = False 

126 

127 

128@dataclass 

129class CrossReferenceInfo: 

130 """Cross-references and enhanced context.""" 

131 

132 cross_references: list[dict] = field(default_factory=list) 

133 topic_analysis: dict | None = None 

134 content_type_context: str | None = None # Human-readable content description 

135 

136 

137@dataclass 

138class HybridSearchResult: 

139 """Complete hybrid search result combining all components.""" 

140 

141 # Core fields 

142 base: BaseSearchResult 

143 

144 # Optional components 

145 project: ProjectInfo | None = None 

146 hierarchy: HierarchyInfo | None = None 

147 attachment: AttachmentInfo | None = None 

148 section: SectionInfo | None = None 

149 content: ContentAnalysis | None = None 

150 semantic: SemanticAnalysis | None = None 

151 navigation: NavigationContext | None = None 

152 chunking: ChunkingContext | None = None 

153 conversion: ConversionInfo | None = None 

154 cross_reference: CrossReferenceInfo | None = None 

155 

156 # Convenience properties for backward compatibility 

157 @property 

158 def score(self) -> float: 

159 """Get result score.""" 

160 return self.base.score 

161 

162 @property 

163 def text(self) -> str: 

164 """Get result text.""" 

165 return self.base.text 

166 

167 @property 

168 def source_type(self) -> str: 

169 """Get source type.""" 

170 return self.base.source_type 

171 

172 @property 

173 def source_title(self) -> str: 

174 """Get source title.""" 

175 return self.base.source_title 

176 

177 @property 

178 def source_url(self) -> str | None: 

179 """Get source URL.""" 

180 return self.base.source_url 

181 

182 @property 

183 def file_path(self) -> str | None: 

184 """Get file path.""" 

185 return self.base.file_path 

186 

187 @property 

188 def repo_name(self) -> str | None: 

189 """Get repository name.""" 

190 return self.base.repo_name 

191 

192 @property 

193 def vector_score(self) -> float: 

194 """Get vector search score.""" 

195 return self.base.vector_score 

196 

197 @property 

198 def keyword_score(self) -> float: 

199 """Get keyword search score.""" 

200 return self.base.keyword_score 

201 

202 @property 

203 def document_id(self) -> str | None: 

204 """Get document ID.""" 

205 return self.base.document_id 

206 

207 @property 

208 def created_at(self) -> str | None: 

209 """Get creation timestamp.""" 

210 return self.base.created_at 

211 

212 @property 

213 def last_modified(self) -> str | None: 

214 """Get last modified timestamp.""" 

215 return self.base.last_modified 

216 

217 # Project info properties 

218 @property 

219 def project_id(self) -> str | None: 

220 """Get project ID.""" 

221 return self.project.project_id if self.project else None 

222 

223 @property 

224 def project_name(self) -> str | None: 

225 """Get project name.""" 

226 return self.project.project_name if self.project else None 

227 

228 @property 

229 def project_description(self) -> str | None: 

230 """Get project description.""" 

231 return self.project.project_description if self.project else None 

232 

233 @property 

234 def collection_name(self) -> str | None: 

235 """Get collection name.""" 

236 return self.project.collection_name if self.project else None 

237 

238 # Hierarchy info properties 

239 @property 

240 def parent_id(self) -> str | None: 

241 """Get parent ID.""" 

242 return self.hierarchy.parent_id if self.hierarchy else None 

243 

244 @property 

245 def parent_title(self) -> str | None: 

246 """Get parent title.""" 

247 return self.hierarchy.parent_title if self.hierarchy else None 

248 

249 @property 

250 def breadcrumb_text(self) -> str | None: 

251 """Get breadcrumb text.""" 

252 return self.hierarchy.breadcrumb_text if self.hierarchy else None 

253 

254 @property 

255 def depth(self) -> int | None: 

256 """Get depth.""" 

257 return self.hierarchy.depth if self.hierarchy else None 

258 

259 @property 

260 def children_count(self) -> int | None: 

261 """Get children count.""" 

262 return self.hierarchy.children_count if self.hierarchy else None 

263 

264 @property 

265 def hierarchy_context(self) -> str | None: 

266 """Get hierarchy context.""" 

267 return self.hierarchy.hierarchy_context if self.hierarchy else None 

268 

269 # Attachment info properties 

270 @property 

271 def is_attachment(self) -> bool: 

272 """Check if this is an attachment.""" 

273 return self.attachment.is_attachment if self.attachment else False 

274 

275 @property 

276 def parent_document_id(self) -> str | None: 

277 """Get parent document ID.""" 

278 return self.attachment.parent_document_id if self.attachment else None 

279 

280 @property 

281 def parent_document_title(self) -> str | None: 

282 """Get parent document title.""" 

283 return self.attachment.parent_document_title if self.attachment else None 

284 

285 @property 

286 def attachment_id(self) -> str | None: 

287 """Get attachment ID.""" 

288 return self.attachment.attachment_id if self.attachment else None 

289 

290 @property 

291 def original_filename(self) -> str | None: 

292 """Get original filename.""" 

293 return self.attachment.original_filename if self.attachment else None 

294 

295 @property 

296 def file_size(self) -> int | None: 

297 """Get file size.""" 

298 return self.attachment.file_size if self.attachment else None 

299 

300 @property 

301 def mime_type(self) -> str | None: 

302 """Get MIME type.""" 

303 return self.attachment.mime_type if self.attachment else None 

304 

305 @property 

306 def attachment_author(self) -> str | None: 

307 """Get attachment author.""" 

308 return self.attachment.attachment_author if self.attachment else None 

309 

310 @property 

311 def attachment_context(self) -> str | None: 

312 """Get attachment context.""" 

313 return self.attachment.attachment_context if self.attachment else None 

314 

315 # Section info properties 

316 @property 

317 def section_title(self) -> str | None: 

318 """Get section title.""" 

319 return self.section.section_title if self.section else None 

320 

321 @property 

322 def section_type(self) -> str | None: 

323 """Get section type.""" 

324 return self.section.section_type if self.section else None 

325 

326 @property 

327 def section_level(self) -> int | None: 

328 """Get section level.""" 

329 return self.section.section_level if self.section else None 

330 

331 @property 

332 def section_anchor(self) -> str | None: 

333 """Get section anchor.""" 

334 return self.section.section_anchor if self.section else None 

335 

336 @property 

337 def section_breadcrumb(self) -> str | None: 

338 """Get section breadcrumb.""" 

339 return self.section.section_breadcrumb if self.section else None 

340 

341 @property 

342 def section_depth(self) -> int | None: 

343 """Get section depth.""" 

344 return self.section.section_depth if self.section else None 

345 

346 # Content analysis properties 

347 @property 

348 def has_code_blocks(self) -> bool: 

349 """Check if content has code blocks.""" 

350 return self.content.has_code_blocks if self.content else False 

351 

352 @property 

353 def has_tables(self) -> bool: 

354 """Check if content has tables.""" 

355 return self.content.has_tables if self.content else False 

356 

357 @property 

358 def has_images(self) -> bool: 

359 """Check if content has images.""" 

360 return self.content.has_images if self.content else False 

361 

362 @property 

363 def has_links(self) -> bool: 

364 """Check if content has links.""" 

365 return self.content.has_links if self.content else False 

366 

367 @property 

368 def word_count(self) -> int | None: 

369 """Get word count.""" 

370 return self.content.word_count if self.content else None 

371 

372 @property 

373 def char_count(self) -> int | None: 

374 """Get character count.""" 

375 return self.content.char_count if self.content else None 

376 

377 @property 

378 def estimated_read_time(self) -> int | None: 

379 """Get estimated read time.""" 

380 return self.content.estimated_read_time if self.content else None 

381 

382 @property 

383 def paragraph_count(self) -> int | None: 

384 """Get paragraph count.""" 

385 return self.content.paragraph_count if self.content else None 

386 

387 # Semantic analysis properties 

388 @property 

389 def entities(self) -> list[dict | str]: 

390 """Get entities.""" 

391 return self.semantic.entities if self.semantic else [] 

392 

393 @property 

394 def topics(self) -> list[dict | str]: 

395 """Get topics.""" 

396 return self.semantic.topics if self.semantic else [] 

397 

398 @property 

399 def key_phrases(self) -> list[dict | str]: 

400 """Get key phrases.""" 

401 return self.semantic.key_phrases if self.semantic else [] 

402 

403 @property 

404 def pos_tags(self) -> list[dict]: 

405 """Get POS tags.""" 

406 return self.semantic.pos_tags if self.semantic else [] 

407 

408 # Navigation context properties 

409 @property 

410 def previous_section(self) -> str | None: 

411 """Get previous section.""" 

412 return self.navigation.previous_section if self.navigation else None 

413 

414 @property 

415 def next_section(self) -> str | None: 

416 """Get next section.""" 

417 return self.navigation.next_section if self.navigation else None 

418 

419 @property 

420 def sibling_sections(self) -> list[str]: 

421 """Get sibling sections.""" 

422 return self.navigation.sibling_sections if self.navigation else [] 

423 

424 @property 

425 def subsections(self) -> list[str]: 

426 """Get subsections.""" 

427 return self.navigation.subsections if self.navigation else [] 

428 

429 @property 

430 def document_hierarchy(self) -> list[str]: 

431 """Get document hierarchy.""" 

432 return self.navigation.document_hierarchy if self.navigation else [] 

433 

434 # Chunking context properties 

435 @property 

436 def chunk_index(self) -> int | None: 

437 """Get chunk index.""" 

438 return self.chunking.chunk_index if self.chunking else None 

439 

440 @property 

441 def total_chunks(self) -> int | None: 

442 """Get total chunks.""" 

443 return self.chunking.total_chunks if self.chunking else None 

444 

445 @property 

446 def chunking_strategy(self) -> str | None: 

447 """Get chunking strategy.""" 

448 return self.chunking.chunking_strategy if self.chunking else None 

449 

450 # Conversion info properties 

451 @property 

452 def original_file_type(self) -> str | None: 

453 """Get original file type.""" 

454 return self.conversion.original_file_type if self.conversion else None 

455 

456 @property 

457 def conversion_method(self) -> str | None: 

458 """Get conversion method.""" 

459 return self.conversion.conversion_method if self.conversion else None 

460 

461 @property 

462 def is_excel_sheet(self) -> bool: 

463 """Check if this is an Excel sheet.""" 

464 return self.conversion.is_excel_sheet if self.conversion else False 

465 

466 @property 

467 def is_converted(self) -> bool: 

468 """Check if this content is converted.""" 

469 return self.conversion.is_converted if self.conversion else False 

470 

471 # Cross-reference properties 

472 @property 

473 def cross_references(self) -> list[dict]: 

474 """Get cross references.""" 

475 return self.cross_reference.cross_references if self.cross_reference else [] 

476 

477 @property 

478 def topic_analysis(self) -> dict | None: 

479 """Get topic analysis.""" 

480 return self.cross_reference.topic_analysis if self.cross_reference else None 

481 

482 @property 

483 def content_type_context(self) -> str | None: 

484 """Get content type context.""" 

485 return ( 

486 self.cross_reference.content_type_context if self.cross_reference else None 

487 ) 

488 

489 # Helper methods for display (like SearchResult) 

490 def get_display_title(self) -> str: 

491 """Get the display title with enhanced hierarchy context.""" 

492 # Use source_title as base, but if empty, derive from other fields 

493 base_title = self.source_title 

494 if not base_title or base_title.strip() == "": 

495 # Try to create title from available data 

496 if self.file_path: 

497 import os 

498 

499 base_title = os.path.basename(self.file_path) 

500 elif self.repo_name: 

501 base_title = self.repo_name 

502 else: 

503 base_title = "Untitled" 

504 

505 # 🔥 ENHANCED: Use section breadcrumb for better context 

506 if self.section_breadcrumb: 

507 return f"{self.section_title or base_title} ({self.section_breadcrumb})" 

508 elif self.breadcrumb_text and self.source_type == "confluence": 

509 return f"{base_title} ({self.breadcrumb_text})" 

510 elif self.section_title and self.section_title != base_title: 

511 return f"{base_title} > {self.section_title}" 

512 return base_title 

513 

514 def get_project_info(self) -> str | None: 

515 """Get formatted project information for display.""" 

516 if not self.project_id: 

517 return None 

518 

519 project_info = f"Project: {self.project_name or self.project_id}" 

520 if self.project_description: 

521 project_info += f" - {self.project_description}" 

522 if self.collection_name: 

523 project_info += f" (Collection: {self.collection_name})" 

524 return project_info 

525 

526 def get_hierarchy_info(self) -> str | None: 

527 """Get formatted hierarchy information for display.""" 

528 if self.source_type != "confluence": 

529 return None 

530 

531 parts = [] 

532 

533 if self.hierarchy_context: 

534 parts.append(self.hierarchy_context) 

535 

536 if self.section_breadcrumb: 

537 parts.append(f"Section: {self.section_breadcrumb}") 

538 

539 if self.chunk_index is not None and self.total_chunks is not None: 

540 parts.append(f"Chunk: {self.chunk_index + 1}/{self.total_chunks}") 

541 

542 return " | ".join(parts) if parts else None 

543 

544 def get_content_info(self) -> str | None: 

545 """Get formatted content analysis information.""" 

546 if not any( 

547 [self.has_code_blocks, self.has_tables, self.has_images, self.has_links] 

548 ): 

549 return None 

550 

551 content_parts = [] 

552 if self.has_code_blocks: 

553 content_parts.append("Code") 

554 if self.has_tables: 

555 content_parts.append("Tables") 

556 if self.has_images: 

557 content_parts.append("Images") 

558 if self.has_links: 

559 content_parts.append("Links") 

560 

561 content_info = f"Contains: {', '.join(content_parts)}" 

562 

563 if self.word_count: 

564 content_info += f" | {self.word_count} words" 

565 if self.estimated_read_time: 

566 content_info += f" | ~{self.estimated_read_time}min read" 

567 

568 return content_info 

569 

570 def get_semantic_info(self) -> str | None: 

571 """Get formatted semantic analysis information.""" 

572 parts = [] 

573 

574 if self.entities: 

575 entity_count = len(self.entities) 

576 parts.append(f"{entity_count} entities") 

577 

578 if self.topics: 

579 # Handle both string and dict formats for topics 

580 topic_texts = [] 

581 for topic in self.topics[:3]: 

582 if isinstance(topic, str): 

583 topic_texts.append(topic) 

584 elif isinstance(topic, dict): 

585 topic_texts.append(topic.get("text", str(topic))) 

586 else: 

587 topic_texts.append(str(topic)) 

588 

589 topic_list = ", ".join(topic_texts) 

590 if len(self.topics) > 3: 

591 topic_list += f" (+{len(self.topics) - 3} more)" 

592 parts.append(f"Topics: {topic_list}") 

593 

594 if self.key_phrases: 

595 phrase_count = len(self.key_phrases) 

596 parts.append(f"{phrase_count} key phrases") 

597 

598 return " | ".join(parts) if parts else None 

599 

600 def get_section_context(self) -> str | None: 

601 """Get section context for enhanced display.""" 

602 if not self.section_title: 

603 return None 

604 

605 context = self.section_title 

606 if self.section_type and self.section_level: 

607 context = f"[{self.section_type.upper()}] {context}" 

608 if self.section_anchor: 

609 context += f" (#{self.section_anchor})" 

610 

611 return context 

612 

613 def get_attachment_info(self) -> str | None: 

614 """Get formatted attachment information for display.""" 

615 if not self.is_attachment or not self.attachment_context: 

616 return None 

617 return self.attachment_context 

618 

619 def get_file_type(self) -> str | None: 

620 """Get the file type from MIME type or filename.""" 

621 if self.original_file_type: 

622 file_type = self.original_file_type 

623 if self.is_converted and self.conversion_method: 

624 file_type += f" (converted via {self.conversion_method})" 

625 return file_type 

626 elif self.mime_type: 

627 return self.mime_type 

628 elif self.original_filename: 

629 import os 

630 

631 _, ext = os.path.splitext(self.original_filename) 

632 return ext.lower().lstrip(".") if ext else None 

633 return None 

634 

635 def is_root_document(self) -> bool: 

636 """Check if this is a root document (no parent).""" 

637 return self.parent_id is None and self.parent_document_id is None 

638 

639 def has_children(self) -> bool: 

640 """Check if this document has children.""" 

641 return (self.children_count is not None and self.children_count > 0) or bool( 

642 self.subsections 

643 ) 

644 

645 def is_file_attachment(self) -> bool: 

646 """Check if this is a file attachment.""" 

647 return self.is_attachment 

648 

649 def belongs_to_project(self, project_id: str) -> bool: 

650 """Check if this result belongs to a specific project.""" 

651 return self.project_id == project_id 

652 

653 def belongs_to_any_project(self, project_ids: list[str]) -> bool: 

654 """Check if this result belongs to any of the specified projects.""" 

655 return self.project_id is not None and self.project_id in project_ids 

656 

657 def is_code_content(self) -> bool: 

658 """Check if this result contains code.""" 

659 return self.has_code_blocks or self.section_type == "code" 

660 

661 def is_documentation(self) -> bool: 

662 """Check if this result is documentation content.""" 

663 return ( 

664 self.source_type in ["confluence", "localfile"] and not self.has_code_blocks 

665 ) 

666 

667 def is_structured_data(self) -> bool: 

668 """Check if this result contains structured data.""" 

669 return self.has_tables or self.is_excel_sheet 

670 

671 

672def create_hybrid_search_result( 

673 score: float, 

674 text: str, 

675 source_type: str, 

676 source_title: str, 

677 vector_score: float = 0.0, 

678 keyword_score: float = 0.0, 

679 **kwargs, 

680) -> HybridSearchResult: 

681 """Factory function to create a HybridSearchResult with optional components. 

682 

683 Args: 

684 score: Overall search score 

685 text: Result text content 

686 source_type: Type of source 

687 source_title: Title of the source 

688 vector_score: Vector search score 

689 keyword_score: Keyword search score 

690 **kwargs: Additional component fields 

691 

692 Returns: 

693 HybridSearchResult with appropriate components 

694 """ 

695 # Create base result 

696 base = BaseSearchResult( 

697 score=score, 

698 text=text, 

699 source_type=source_type, 

700 source_title=source_title, 

701 source_url=kwargs.get("source_url"), 

702 file_path=kwargs.get("file_path"), 

703 repo_name=kwargs.get("repo_name"), 

704 vector_score=vector_score, 

705 keyword_score=keyword_score, 

706 document_id=kwargs.get("document_id"), 

707 created_at=kwargs.get("created_at"), 

708 last_modified=kwargs.get("last_modified"), 

709 ) 

710 

711 # Create optional components based on provided data 

712 project = None 

713 if any(key.startswith("project_") for key in kwargs): 

714 project = ProjectInfo( 

715 project_id=kwargs.get("project_id"), 

716 project_name=kwargs.get("project_name"), 

717 project_description=kwargs.get("project_description"), 

718 collection_name=kwargs.get("collection_name"), 

719 ) 

720 

721 hierarchy = None 

722 hierarchy_fields = [ 

723 "parent_id", 

724 "parent_title", 

725 "breadcrumb_text", 

726 "depth", 

727 "children_count", 

728 "hierarchy_context", 

729 ] 

730 if any(field in kwargs for field in hierarchy_fields): 

731 hierarchy = HierarchyInfo( 

732 parent_id=kwargs.get("parent_id"), 

733 parent_title=kwargs.get("parent_title"), 

734 breadcrumb_text=kwargs.get("breadcrumb_text"), 

735 depth=kwargs.get("depth"), 

736 children_count=kwargs.get("children_count"), 

737 hierarchy_context=kwargs.get("hierarchy_context"), 

738 ) 

739 

740 attachment = None 

741 attachment_fields = [ 

742 "is_attachment", 

743 "parent_document_id", 

744 "parent_document_title", 

745 "attachment_id", 

746 "original_filename", 

747 "file_size", 

748 "mime_type", 

749 "attachment_author", 

750 "attachment_context", 

751 ] 

752 if any(field in kwargs for field in attachment_fields): 

753 attachment = AttachmentInfo( 

754 is_attachment=kwargs.get("is_attachment", False), 

755 parent_document_id=kwargs.get("parent_document_id"), 

756 parent_document_title=kwargs.get("parent_document_title"), 

757 attachment_id=kwargs.get("attachment_id"), 

758 original_filename=kwargs.get("original_filename"), 

759 file_size=kwargs.get("file_size"), 

760 mime_type=kwargs.get("mime_type"), 

761 attachment_author=kwargs.get("attachment_author"), 

762 attachment_context=kwargs.get("attachment_context"), 

763 ) 

764 

765 section = None 

766 section_fields = [ 

767 "section_title", 

768 "section_type", 

769 "section_level", 

770 "section_anchor", 

771 "section_breadcrumb", 

772 "section_depth", 

773 ] 

774 if any(field in kwargs for field in section_fields): 

775 section = SectionInfo( 

776 section_title=kwargs.get("section_title"), 

777 section_type=kwargs.get("section_type"), 

778 section_level=kwargs.get("section_level"), 

779 section_anchor=kwargs.get("section_anchor"), 

780 section_breadcrumb=kwargs.get("section_breadcrumb"), 

781 section_depth=kwargs.get("section_depth"), 

782 ) 

783 

784 content = None 

785 content_fields = [ 

786 "has_code_blocks", 

787 "has_tables", 

788 "has_images", 

789 "has_links", 

790 "word_count", 

791 "char_count", 

792 "estimated_read_time", 

793 "paragraph_count", 

794 ] 

795 if any(field in kwargs for field in content_fields): 

796 content = ContentAnalysis( 

797 has_code_blocks=kwargs.get("has_code_blocks", False), 

798 has_tables=kwargs.get("has_tables", False), 

799 has_images=kwargs.get("has_images", False), 

800 has_links=kwargs.get("has_links", False), 

801 word_count=kwargs.get("word_count"), 

802 char_count=kwargs.get("char_count"), 

803 estimated_read_time=kwargs.get("estimated_read_time"), 

804 paragraph_count=kwargs.get("paragraph_count"), 

805 ) 

806 

807 semantic = None 

808 semantic_fields = ["entities", "topics", "key_phrases", "pos_tags"] 

809 if any(field in kwargs for field in semantic_fields): 

810 semantic = SemanticAnalysis( 

811 entities=kwargs.get("entities", []), 

812 topics=kwargs.get("topics", []), 

813 key_phrases=kwargs.get("key_phrases", []), 

814 pos_tags=kwargs.get("pos_tags", []), 

815 ) 

816 

817 navigation = None 

818 navigation_fields = [ 

819 "previous_section", 

820 "next_section", 

821 "sibling_sections", 

822 "subsections", 

823 "document_hierarchy", 

824 ] 

825 if any(field in kwargs for field in navigation_fields): 

826 navigation = NavigationContext( 

827 previous_section=kwargs.get("previous_section"), 

828 next_section=kwargs.get("next_section"), 

829 sibling_sections=kwargs.get("sibling_sections", []), 

830 subsections=kwargs.get("subsections", []), 

831 document_hierarchy=kwargs.get("document_hierarchy", []), 

832 ) 

833 

834 chunking = None 

835 chunking_fields = ["chunk_index", "total_chunks", "chunking_strategy"] 

836 if any(field in kwargs for field in chunking_fields): 

837 chunking = ChunkingContext( 

838 chunk_index=kwargs.get("chunk_index"), 

839 total_chunks=kwargs.get("total_chunks"), 

840 chunking_strategy=kwargs.get("chunking_strategy"), 

841 ) 

842 

843 conversion = None 

844 conversion_fields = [ 

845 "original_file_type", 

846 "conversion_method", 

847 "is_excel_sheet", 

848 "is_converted", 

849 ] 

850 if any(field in kwargs for field in conversion_fields): 

851 conversion = ConversionInfo( 

852 original_file_type=kwargs.get("original_file_type"), 

853 conversion_method=kwargs.get("conversion_method"), 

854 is_excel_sheet=kwargs.get("is_excel_sheet", False), 

855 is_converted=kwargs.get("is_converted", False), 

856 ) 

857 

858 cross_reference = None 

859 cross_ref_fields = ["cross_references", "topic_analysis", "content_type_context"] 

860 if any(field in kwargs for field in cross_ref_fields): 

861 cross_reference = CrossReferenceInfo( 

862 cross_references=kwargs.get("cross_references", []), 

863 topic_analysis=kwargs.get("topic_analysis"), 

864 content_type_context=kwargs.get("content_type_context"), 

865 ) 

866 

867 return HybridSearchResult( 

868 base=base, 

869 project=project, 

870 hierarchy=hierarchy, 

871 attachment=attachment, 

872 section=section, 

873 content=content, 

874 semantic=semantic, 

875 navigation=navigation, 

876 chunking=chunking, 

877 conversion=conversion, 

878 cross_reference=cross_reference, 

879 )