Coverage for src/qdrant_loader_mcp_server/mcp/schemas.py: 69%

32 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1"""MCP Tool Schema Definitions - refactored into modular files.""" 

2 

3from typing import Any 

4 

5# Note: This module defines the MCP tool schemas directly. Duplicate 

6# import-based aliases have been removed to avoid redefinitions. 

7 

8 

9class MCPSchemas: 

10 """Backward-compatible wrapper exposing static methods.""" 

11 

12 # Static methods implemented below 

13 

14 @staticmethod 

15 def get_hierarchy_search_tool_schema() -> dict[str, Any]: 

16 """Get the hierarchy search tool schema.""" 

17 return { 

18 "name": "hierarchy_search", 

19 "description": "Search Confluence documents with hierarchy-aware filtering and organization", 

20 "annotations": {"read-only": True}, 

21 "inputSchema": { 

22 "type": "object", 

23 "properties": { 

24 "query": { 

25 "type": "string", 

26 "description": "The search query in natural language", 

27 }, 

28 "hierarchy_filter": { 

29 "type": "object", 

30 "properties": { 

31 "depth": { 

32 "type": "integer", 

33 "description": "Filter by specific hierarchy depth (0 = root pages)", 

34 }, 

35 "parent_title": { 

36 "type": "string", 

37 "description": "Filter by parent page title", 

38 }, 

39 "root_only": { 

40 "type": "boolean", 

41 "description": "Show only root pages (no parent)", 

42 }, 

43 "has_children": { 

44 "type": "boolean", 

45 "description": "Filter by whether pages have children", 

46 }, 

47 }, 

48 }, 

49 "organize_by_hierarchy": { 

50 "type": "boolean", 

51 "description": "Group results by hierarchy structure", 

52 "default": False, 

53 }, 

54 "limit": { 

55 "type": "integer", 

56 "description": "Maximum number of results to return", 

57 "default": 10, 

58 }, 

59 }, 

60 "required": ["query"], 

61 }, 

62 "outputSchema": { 

63 "type": "object", 

64 "properties": { 

65 "results": { 

66 "type": "array", 

67 "items": { 

68 "type": "object", 

69 "properties": { 

70 "score": {"type": "number"}, 

71 "title": {"type": "string"}, 

72 "content": {"type": "string"}, 

73 "hierarchy_path": {"type": "string"}, 

74 "parent_title": {"type": "string"}, 

75 "metadata": { 

76 "type": "object", 

77 "properties": { 

78 "space_key": {"type": "string"}, 

79 "project_id": {"type": "string"}, 

80 "page_id": {"type": "string"}, 

81 "hierarchy_level": {"type": "integer"}, 

82 }, 

83 }, 

84 }, 

85 }, 

86 }, 

87 "total_found": {"type": "integer"}, 

88 "hierarchy_organization": { 

89 "type": "object", 

90 "properties": { 

91 "organized_by_hierarchy": {"type": "boolean"}, 

92 "hierarchy_groups": { 

93 "type": "array", 

94 "items": {"type": "object"}, 

95 }, 

96 }, 

97 }, 

98 }, 

99 }, 

100 } 

101 

102 @staticmethod 

103 def get_attachment_search_tool_schema() -> dict[str, Any]: 

104 """Get the attachment search tool schema.""" 

105 return { 

106 "name": "attachment_search", 

107 "description": "Search for file attachments and their parent documents across Confluence, Jira, and other sources", 

108 "annotations": {"read-only": True}, 

109 "inputSchema": { 

110 "type": "object", 

111 "properties": { 

112 "query": { 

113 "type": "string", 

114 "description": "The search query in natural language", 

115 }, 

116 "attachment_filter": { 

117 "type": "object", 

118 "properties": { 

119 "attachments_only": { 

120 "type": "boolean", 

121 "description": "Show only file attachments", 

122 }, 

123 "parent_document_title": { 

124 "type": "string", 

125 "description": "Filter by parent document title", 

126 }, 

127 "file_type": { 

128 "type": "string", 

129 "description": "Filter by file type (e.g., 'pdf', 'xlsx', 'png')", 

130 }, 

131 "file_size_min": { 

132 "type": "integer", 

133 "description": "Minimum file size in bytes", 

134 }, 

135 "file_size_max": { 

136 "type": "integer", 

137 "description": "Maximum file size in bytes", 

138 }, 

139 "author": { 

140 "type": "string", 

141 "description": "Filter by attachment author", 

142 }, 

143 }, 

144 }, 

145 "include_parent_context": { 

146 "type": "boolean", 

147 "description": "Include parent document information in results", 

148 "default": True, 

149 }, 

150 "limit": { 

151 "type": "integer", 

152 "description": "Maximum number of results to return", 

153 "default": 10, 

154 }, 

155 }, 

156 "required": ["query"], 

157 }, 

158 "outputSchema": { 

159 "type": "object", 

160 "properties": { 

161 "results": { 

162 "type": "array", 

163 "items": { 

164 "type": "object", 

165 "properties": { 

166 "score": {"type": "number"}, 

167 "title": {"type": "string"}, 

168 "content": {"type": "string"}, 

169 "attachment_info": { 

170 "type": "object", 

171 "properties": { 

172 "filename": {"type": "string"}, 

173 "file_type": {"type": "string"}, 

174 "file_size": {"type": "integer"}, 

175 "parent_document": {"type": "string"}, 

176 }, 

177 }, 

178 "metadata": { 

179 "type": "object", 

180 "properties": { 

181 "file_path": {"type": "string"}, 

182 "project_id": {"type": "string"}, 

183 "upload_date": {"type": "string"}, 

184 "author": {"type": "string"}, 

185 }, 

186 }, 

187 }, 

188 }, 

189 }, 

190 "total_found": {"type": "integer"}, 

191 "attachment_summary": { 

192 "type": "object", 

193 "properties": { 

194 "total_attachments": {"type": "integer"}, 

195 "file_types": { 

196 "type": "array", 

197 "items": {"type": "string"}, 

198 }, 

199 "attachments_only": {"type": "boolean"}, 

200 }, 

201 }, 

202 }, 

203 }, 

204 } 

205 

206 @staticmethod 

207 def get_analyze_relationships_tool_schema() -> dict[str, Any]: 

208 """Get the analyze document relationships tool schema.""" 

209 return { 

210 "name": "analyze_relationships", 

211 "description": "Analyze relationships between documents", 

212 "annotations": {"read-only": True}, 

213 "inputSchema": { 

214 "type": "object", 

215 "properties": { 

216 "query": { 

217 "type": "string", 

218 "description": "Search query to get documents for analysis", 

219 }, 

220 "limit": { 

221 "type": "integer", 

222 "description": "Maximum number of documents to analyze", 

223 "default": 20, 

224 }, 

225 "source_types": { 

226 "type": "array", 

227 "items": {"type": "string"}, 

228 "description": "Optional list of source types to filter by", 

229 }, 

230 "project_ids": { 

231 "type": "array", 

232 "items": {"type": "string"}, 

233 "description": "Optional list of project IDs to filter by", 

234 }, 

235 "use_llm": { 

236 "type": "boolean", 

237 "description": "Enable LLM validation for top pairs (budgeted)", 

238 }, 

239 "max_llm_pairs": { 

240 "type": "integer", 

241 "description": "Maximum number of pairs to analyze with LLM", 

242 }, 

243 "overall_timeout_s": { 

244 "type": "number", 

245 "description": "Overall analysis budget in seconds", 

246 }, 

247 "max_pairs_total": { 

248 "type": "integer", 

249 "description": "Maximum candidate pairs to analyze after tiering", 

250 }, 

251 "text_window_chars": { 

252 "type": "integer", 

253 "description": "Per-document text window size for lexical analysis", 

254 }, 

255 }, 

256 "required": ["query"], 

257 }, 

258 "outputSchema": { 

259 "type": "object", 

260 "properties": { 

261 "relationships": { 

262 "type": "array", 

263 "items": { 

264 "type": "object", 

265 "properties": { 

266 "document_1": {"type": "string"}, 

267 "document_2": {"type": "string"}, 

268 "relationship_type": {"type": "string"}, 

269 "score": {"type": "number"}, 

270 "description": {"type": "string"}, 

271 }, 

272 }, 

273 }, 

274 "total_analyzed": {"type": "integer"}, 

275 "summary": {"type": "string"}, 

276 }, 

277 }, 

278 } 

279 

280 @staticmethod 

281 def get_find_similar_tool_schema() -> dict[str, Any]: 

282 """Get the find similar documents tool schema.""" 

283 return { 

284 "name": "find_similar_documents", 

285 "description": "Find documents similar to a target document using multiple similarity metrics", 

286 "annotations": {"read-only": True}, 

287 "inputSchema": { 

288 "type": "object", 

289 "properties": { 

290 "target_query": { 

291 "type": "string", 

292 "description": "Query to find the target document", 

293 }, 

294 "comparison_query": { 

295 "type": "string", 

296 "description": "Query to get documents to compare against", 

297 }, 

298 "similarity_metrics": { 

299 "type": "array", 

300 "items": { 

301 "type": "string", 

302 "enum": [ 

303 "entity_overlap", 

304 "topic_overlap", 

305 "semantic_similarity", 

306 "metadata_similarity", 

307 "hierarchical_distance", 

308 "content_features", 

309 ], 

310 }, 

311 "description": "Similarity metrics to use", 

312 }, 

313 "max_similar": { 

314 "type": "integer", 

315 "description": "Maximum number of similar documents to return", 

316 "default": 5, 

317 }, 

318 "source_types": { 

319 "type": "array", 

320 "items": {"type": "string"}, 

321 "description": "Optional list of source types to filter by", 

322 }, 

323 "project_ids": { 

324 "type": "array", 

325 "items": {"type": "string"}, 

326 "description": "Optional list of project IDs to filter by", 

327 }, 

328 }, 

329 "required": ["target_query", "comparison_query"], 

330 }, 

331 "outputSchema": { 

332 "type": "object", 

333 "properties": { 

334 "similar_documents": { 

335 "type": "array", 

336 "items": { 

337 "type": "object", 

338 "properties": { 

339 "document_id": {"type": "string"}, 

340 "title": {"type": "string"}, 

341 "similarity_score": {"type": "number"}, 

342 "similarity_metrics": { 

343 "type": "object", 

344 "properties": { 

345 "entity_overlap": {"type": "number"}, 

346 "topic_overlap": {"type": "number"}, 

347 "semantic_similarity": {"type": "number"}, 

348 "metadata_similarity": {"type": "number"}, 

349 }, 

350 }, 

351 "similarity_reason": {"type": "string"}, 

352 "content_preview": {"type": "string"}, 

353 }, 

354 }, 

355 }, 

356 "target_document": { 

357 "type": "object", 

358 "properties": { 

359 "title": {"type": "string"}, 

360 "content_preview": {"type": "string"}, 

361 "source_type": {"type": "string"}, 

362 }, 

363 }, 

364 "similarity_summary": { 

365 "type": "object", 

366 "properties": { 

367 "total_compared": {"type": "integer"}, 

368 "similar_found": {"type": "integer"}, 

369 "highest_similarity": {"type": "number"}, 

370 "metrics_used": { 

371 "type": "array", 

372 "items": {"type": "string"}, 

373 }, 

374 }, 

375 }, 

376 }, 

377 }, 

378 } 

379 

380 @staticmethod 

381 def get_detect_conflicts_tool_schema() -> dict[str, Any]: 

382 """Get the detect conflicts tool schema.""" 

383 return { 

384 "name": "detect_document_conflicts", 

385 "description": "Detect conflicts and contradictions between documents", 

386 "annotations": {"read-only": True, "compute-intensive": True}, 

387 "inputSchema": { 

388 "type": "object", 

389 "properties": { 

390 "query": { 

391 "type": "string", 

392 "description": "Search query to get documents for conflict analysis", 

393 }, 

394 "limit": { 

395 "type": "integer", 

396 "description": "Maximum number of documents to analyze", 

397 "default": 10, 

398 }, 

399 "source_types": { 

400 "type": "array", 

401 "items": {"type": "string"}, 

402 "description": "Optional list of source types to filter by", 

403 }, 

404 "project_ids": { 

405 "type": "array", 

406 "items": {"type": "string"}, 

407 "description": "Optional list of project IDs to filter by", 

408 }, 

409 }, 

410 "required": ["query"], 

411 }, 

412 "outputSchema": { 

413 "type": "object", 

414 "properties": { 

415 "conflicts_detected": { 

416 "type": "array", 

417 "items": { 

418 "type": "object", 

419 "properties": { 

420 "conflict_id": {"type": "string"}, 

421 "document_1": { 

422 "type": "object", 

423 "properties": { 

424 "title": {"type": "string"}, 

425 "content_preview": {"type": "string"}, 

426 "source_type": {"type": "string"}, 

427 }, 

428 }, 

429 "document_2": { 

430 "type": "object", 

431 "properties": { 

432 "title": {"type": "string"}, 

433 "content_preview": {"type": "string"}, 

434 "source_type": {"type": "string"}, 

435 }, 

436 }, 

437 "conflict_type": {"type": "string"}, 

438 "conflict_score": {"type": "number"}, 

439 "conflict_description": {"type": "string"}, 

440 "conflicting_statements": { 

441 "type": "array", 

442 "items": { 

443 "type": "object", 

444 "properties": { 

445 "from_doc1": {"type": "string"}, 

446 "from_doc2": {"type": "string"}, 

447 }, 

448 }, 

449 }, 

450 }, 

451 }, 

452 }, 

453 "conflict_summary": { 

454 "type": "object", 

455 "properties": { 

456 "total_documents_analyzed": {"type": "integer"}, 

457 "conflicts_found": {"type": "integer"}, 

458 "conflict_types": { 

459 "type": "array", 

460 "items": {"type": "string"}, 

461 }, 

462 "highest_conflict_score": {"type": "number"}, 

463 }, 

464 }, 

465 "analysis_metadata": { 

466 "type": "object", 

467 "properties": { 

468 "query_used": {"type": "string"}, 

469 "analysis_date": {"type": "string"}, 

470 "processing_time_ms": {"type": "number"}, 

471 }, 

472 }, 

473 }, 

474 }, 

475 } 

476 

477 @staticmethod 

478 def get_find_complementary_tool_schema() -> dict[str, Any]: 

479 """Get the find complementary content tool schema.""" 

480 return { 

481 "name": "find_complementary_content", 

482 "description": "Find content that complements a target document", 

483 "annotations": {"read-only": True}, 

484 "inputSchema": { 

485 "type": "object", 

486 "properties": { 

487 "target_query": { 

488 "type": "string", 

489 "description": "Query to find the target document", 

490 }, 

491 "context_query": { 

492 "type": "string", 

493 "description": "Query to get contextual documents", 

494 }, 

495 "max_recommendations": { 

496 "type": "integer", 

497 "description": "Maximum number of recommendations", 

498 "default": 5, 

499 }, 

500 "source_types": { 

501 "type": "array", 

502 "items": {"type": "string"}, 

503 "description": "Optional list of source types to filter by", 

504 }, 

505 "project_ids": { 

506 "type": "array", 

507 "items": {"type": "string"}, 

508 "description": "Optional list of project IDs to filter by", 

509 }, 

510 }, 

511 "required": ["target_query", "context_query"], 

512 }, 

513 "outputSchema": { 

514 "type": "object", 

515 "properties": { 

516 "complementary_content": { 

517 "type": "array", 

518 "items": { 

519 "type": "object", 

520 "properties": { 

521 "document_id": {"type": "string"}, 

522 "title": {"type": "string"}, 

523 "content_preview": {"type": "string"}, 

524 "complementary_score": {"type": "number"}, 

525 "complementary_reason": {"type": "string"}, 

526 "relationship_type": {"type": "string"}, 

527 "source_type": {"type": "string"}, 

528 "metadata": { 

529 "type": "object", 

530 "properties": { 

531 "project_id": {"type": "string"}, 

532 "created_date": {"type": "string"}, 

533 "author": {"type": "string"}, 

534 }, 

535 }, 

536 }, 

537 }, 

538 }, 

539 "target_document": { 

540 "type": "object", 

541 "properties": { 

542 "title": {"type": "string"}, 

543 "content_preview": {"type": "string"}, 

544 "source_type": {"type": "string"}, 

545 }, 

546 }, 

547 "complementary_summary": { 

548 "type": "object", 

549 "properties": { 

550 "total_analyzed": {"type": "integer"}, 

551 "complementary_found": {"type": "integer"}, 

552 "highest_score": {"type": "number"}, 

553 "relationship_types": { 

554 "type": "array", 

555 "items": {"type": "string"}, 

556 }, 

557 }, 

558 }, 

559 }, 

560 }, 

561 } 

562 

563 @staticmethod 

564 def get_cluster_documents_tool_schema() -> dict[str, Any]: 

565 """Get the cluster documents tool schema.""" 

566 return { 

567 "name": "cluster_documents", 

568 "description": "Cluster documents based on similarity and relationships", 

569 "annotations": {"read-only": True, "compute-intensive": True}, 

570 "inputSchema": { 

571 "type": "object", 

572 "properties": { 

573 "query": { 

574 "type": "string", 

575 "description": "Search query to get documents for clustering", 

576 }, 

577 "strategy": { 

578 "type": "string", 

579 "enum": [ 

580 "mixed_features", 

581 "entity_based", 

582 "topic_based", 

583 "project_based", 

584 "hierarchical", 

585 "adaptive", 

586 ], 

587 "description": "Clustering strategy to use (adaptive automatically selects the best strategy)", 

588 "default": "mixed_features", 

589 }, 

590 "max_clusters": { 

591 "type": "integer", 

592 "description": "Maximum number of clusters to create", 

593 "default": 10, 

594 }, 

595 "min_cluster_size": { 

596 "type": "integer", 

597 "description": "Minimum size for a cluster", 

598 "default": 2, 

599 }, 

600 "limit": { 

601 "type": "integer", 

602 "description": "Maximum number of documents to cluster", 

603 "default": 25, 

604 }, 

605 "source_types": { 

606 "type": "array", 

607 "items": {"type": "string"}, 

608 "description": "Optional list of source types to filter by", 

609 }, 

610 "project_ids": { 

611 "type": "array", 

612 "items": {"type": "string"}, 

613 "description": "Optional list of project IDs to filter by", 

614 }, 

615 }, 

616 "required": ["query"], 

617 }, 

618 "outputSchema": { 

619 "type": "object", 

620 "properties": { 

621 "clusters": { 

622 "type": "array", 

623 "items": { 

624 "type": "object", 

625 "properties": { 

626 "cluster_id": {"type": "string"}, 

627 "cluster_name": {"type": "string"}, 

628 "cluster_theme": {"type": "string"}, 

629 "document_count": {"type": "integer"}, 

630 "cohesion_score": {"type": "number"}, 

631 "documents": { 

632 "type": "array", 

633 "items": { 

634 "type": "object", 

635 "properties": { 

636 "document_id": {"type": "string"}, 

637 "title": {"type": "string"}, 

638 "content_preview": {"type": "string"}, 

639 "source_type": {"type": "string"}, 

640 "cluster_relevance": {"type": "number"}, 

641 }, 

642 }, 

643 }, 

644 "cluster_keywords": { 

645 "type": "array", 

646 "items": {"type": "string"}, 

647 }, 

648 "cluster_summary": {"type": "string"}, 

649 }, 

650 }, 

651 }, 

652 "clustering_metadata": { 

653 "type": "object", 

654 "properties": { 

655 "total_documents": {"type": "integer"}, 

656 "clusters_created": {"type": "integer"}, 

657 "strategy": {"type": "string"}, 

658 "unclustered_documents": {"type": "integer"}, 

659 "clustering_quality": {"type": "number"}, 

660 "processing_time_ms": {"type": "number"}, 

661 }, 

662 }, 

663 "cluster_relationships": { 

664 "type": "array", 

665 "items": { 

666 "type": "object", 

667 "properties": { 

668 "cluster_1": {"type": "string"}, 

669 "cluster_2": {"type": "string"}, 

670 "relationship_type": {"type": "string"}, 

671 "relationship_strength": {"type": "number"}, 

672 }, 

673 }, 

674 }, 

675 }, 

676 }, 

677 } 

678 

679 @staticmethod 

680 def get_expand_cluster_tool_schema() -> dict[str, Any]: 

681 """Get the expand cluster tool schema for lazy loading cluster documents.""" 

682 return { 

683 "name": "expand_cluster", 

684 "description": "Retrieve all documents from a specific cluster for lazy loading", 

685 "annotations": {"read-only": True}, 

686 "inputSchema": { 

687 "type": "object", 

688 "properties": { 

689 "cluster_id": { 

690 "type": "string", 

691 "description": "The ID of the cluster to expand and retrieve all documents", 

692 }, 

693 "limit": { 

694 "type": "integer", 

695 "description": "Maximum number of documents to return from cluster (default: 20)", 

696 "default": 20, 

697 }, 

698 "offset": { 

699 "type": "integer", 

700 "description": "Number of documents to skip for pagination (default: 0)", 

701 "default": 0, 

702 }, 

703 "include_metadata": { 

704 "type": "boolean", 

705 "description": "Include detailed metadata for each document (default: true)", 

706 "default": True, 

707 }, 

708 }, 

709 "required": ["cluster_id"], 

710 }, 

711 "outputSchema": { 

712 "type": "object", 

713 "properties": { 

714 "cluster_id": { 

715 "type": "string", 

716 "description": "The expanded cluster ID", 

717 }, 

718 "cluster_info": { 

719 "type": "object", 

720 "description": "Detailed cluster information", 

721 }, 

722 "documents": { 

723 "type": "array", 

724 "description": "Full list of documents in the cluster", 

725 }, 

726 "pagination": { 

727 "type": "object", 

728 "description": "Pagination information", 

729 }, 

730 }, 

731 }, 

732 } 

733 

734 @staticmethod 

735 def get_expand_document_tool_schema() -> dict[str, Any]: 

736 """Get the expand document tool schema for lazy loading - uses same format as search.""" 

737 return { 

738 "name": "expand_document", 

739 "description": "Retrieve full document content by document ID for lazy loading", 

740 "annotations": {"read-only": True}, 

741 "inputSchema": { 

742 "type": "object", 

743 "properties": { 

744 "document_id": { 

745 "type": "string", 

746 "description": "The ID of the document to expand and retrieve full content", 

747 }, 

748 "include_metadata": { 

749 "type": "boolean", 

750 "description": "Include detailed metadata (default: true)", 

751 "default": True, 

752 }, 

753 "include_hierarchy": { 

754 "type": "boolean", 

755 "description": "Include hierarchy information for Confluence documents (default: true)", 

756 "default": True, 

757 }, 

758 "include_attachments": { 

759 "type": "boolean", 

760 "description": "Include attachment information if available (default: true)", 

761 "default": True, 

762 }, 

763 }, 

764 "required": ["document_id"], 

765 }, 

766 "outputSchema": { 

767 "type": "object", 

768 "properties": { 

769 "results": { 

770 "type": "array", 

771 "items": { 

772 "type": "object", 

773 "properties": { 

774 "score": {"type": "number"}, 

775 "title": {"type": "string"}, 

776 "content": {"type": "string"}, 

777 "source_type": {"type": "string"}, 

778 "metadata": { 

779 "type": "object", 

780 "properties": { 

781 "file_path": {"type": "string"}, 

782 "project_id": {"type": "string"}, 

783 "created_at": {"type": "string"}, 

784 "last_modified": {"type": "string"}, 

785 }, 

786 }, 

787 }, 

788 }, 

789 }, 

790 "total_found": {"type": "integer"}, 

791 "query_context": { 

792 "type": "object", 

793 "properties": { 

794 "original_query": {"type": "string"}, 

795 "source_types_filtered": { 

796 "type": "array", 

797 "items": {"type": "string"}, 

798 }, 

799 "project_ids_filtered": { 

800 "type": "array", 

801 "items": {"type": "string"}, 

802 }, 

803 }, 

804 }, 

805 }, 

806 }, 

807 } 

808 

809 @classmethod 

810 def get_all_tool_schemas(cls) -> list[dict[str, Any]]: 

811 """Get all tool schemas.""" 

812 return [ 

813 cls.get_search_tool_schema(), 

814 cls.get_hierarchy_search_tool_schema(), 

815 cls.get_attachment_search_tool_schema(), 

816 cls.get_analyze_relationships_tool_schema(), 

817 cls.get_find_similar_tool_schema(), 

818 cls.get_detect_conflicts_tool_schema(), 

819 cls.get_find_complementary_tool_schema(), 

820 cls.get_cluster_documents_tool_schema(), 

821 cls.get_expand_document_tool_schema(), # ✅ Add expand_document tool 

822 cls.get_expand_cluster_tool_schema(), # ✅ Add expand_cluster tool 

823 ]