Coverage for src/qdrant_loader/core/chunking/strategy/code/parser/tree_sitter.py: 79%
28 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3from typing import Any
5from qdrant_loader.core.chunking.strategy.code.parser.common import (
6 CodeElement,
7 CodeElementType,
8)
11def extract_tree_sitter_elements(
12 root_node: Any,
13 content_bytes: bytes,
14 *,
15 language: str,
16 max_recursion_depth: int,
17 max_element_size: int,
18) -> list[CodeElement]:
19 elements: list[CodeElement] = []
21 def _walk(node, level: int = 0):
22 if level > max_recursion_depth:
23 return
24 for child in getattr(node, "children", []):
25 try:
26 start_line = child.start_point[0] + 1
27 end_line = child.end_point[0] + 1
28 except Exception:
29 continue
30 if end_line - start_line > max_element_size:
31 continue
32 try:
33 snippet = content_bytes[child.start_byte : child.end_byte].decode(
34 "utf-8", errors="ignore"
35 )
36 except Exception:
37 snippet = ""
38 if not snippet.strip():
39 continue
40 elem_type = (
41 CodeElementType.FUNCTION
42 if getattr(child, "type", "")
43 in ("function_declaration", "method_definition")
44 else CodeElementType.MODULE
45 )
46 element = CodeElement(
47 name=getattr(child, "field_name", None)
48 or getattr(child, "type", "node"),
49 element_type=elem_type,
50 content=snippet,
51 start_line=start_line,
52 end_line=end_line,
53 level=level,
54 )
55 elements.append(element)
56 _walk(child, level + 1)
58 _walk(root_node, 0)
59 return elements