Coverage for src/qdrant_loader/core/chunking/strategy/code/parser/tree_sitter.py: 79%

28 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1from __future__ import annotations 

2 

3from typing import Any 

4 

5from qdrant_loader.core.chunking.strategy.code.parser.common import ( 

6 CodeElement, 

7 CodeElementType, 

8) 

9 

10 

11def extract_tree_sitter_elements( 

12 root_node: Any, 

13 content_bytes: bytes, 

14 *, 

15 language: str, 

16 max_recursion_depth: int, 

17 max_element_size: int, 

18) -> list[CodeElement]: 

19 elements: list[CodeElement] = [] 

20 

21 def _walk(node, level: int = 0): 

22 if level > max_recursion_depth: 

23 return 

24 for child in getattr(node, "children", []): 

25 try: 

26 start_line = child.start_point[0] + 1 

27 end_line = child.end_point[0] + 1 

28 except Exception: 

29 continue 

30 if end_line - start_line > max_element_size: 

31 continue 

32 try: 

33 snippet = content_bytes[child.start_byte : child.end_byte].decode( 

34 "utf-8", errors="ignore" 

35 ) 

36 except Exception: 

37 snippet = "" 

38 if not snippet.strip(): 

39 continue 

40 elem_type = ( 

41 CodeElementType.FUNCTION 

42 if getattr(child, "type", "") 

43 in ("function_declaration", "method_definition") 

44 else CodeElementType.MODULE 

45 ) 

46 element = CodeElement( 

47 name=getattr(child, "field_name", None) 

48 or getattr(child, "type", "node"), 

49 element_type=elem_type, 

50 content=snippet, 

51 start_line=start_line, 

52 end_line=end_line, 

53 level=level, 

54 ) 

55 elements.append(element) 

56 _walk(child, level + 1) 

57 

58 _walk(root_node, 0) 

59 return elements