Coverage for src/qdrant_loader/core/chunking/strategy/code/processor/utils.py: 62%

48 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1from __future__ import annotations 

2 

3import re 

4from typing import Any 

5 

6 

7def is_minified_code(content: str, *, threshold: float = 0.1) -> bool: 

8 lines = content.split("\n") 

9 non_empty = [line for line in lines if line.strip()] 

10 if not non_empty: 

11 return False 

12 avg_len = sum(len(line) for line in non_empty) / len(non_empty) 

13 specials = sum(1 for line in non_empty if any(ch in line for ch in ["{", "}", ";"])) 

14 ratio = specials / len(non_empty) 

15 return avg_len > 200 and ratio > threshold 

16 

17 

18def is_generated_code(content: str, *, patterns: list[str] | None = None) -> bool: 

19 patterns = patterns or ["auto-generated", "do not edit", "generated by"] 

20 lower = content.lower() 

21 return any(pat in lower for pat in patterns) 

22 

23 

24def is_mostly_comments(content: str) -> bool: 

25 lines = content.split("\n") 

26 if not lines: 

27 return False 

28 comment_lines = [ 

29 line for line in lines if line.strip().startswith(("#", "//", "/*", "--")) 

30 ] 

31 return len(comment_lines) / len(lines) > 0.6 

32 

33 

34def has_meaningful_names(content: str) -> bool: 

35 bad_names = ["tmp", "foo", "bar", "baz", "var", "data", "x", "y", "z"] 

36 text = content.lower() 

37 return not any(re.search(rf"\b{re.escape(n)}\b", text) for n in bad_names) 

38 

39 

40def determine_learning_level(complexity: int) -> str: 

41 if complexity < 2: 

42 return "beginner" 

43 if complexity < 6: 

44 return "intermediate" 

45 return "advanced" 

46 

47 

48def identify_programming_concepts(content: str) -> list[str]: 

49 concepts: list[str] = [] 

50 lower = content.lower() 

51 for k in ["recursion", "memoization", "concurrency", "polymorphism", "inheritance"]: 

52 if k in lower: 

53 concepts.append(k) 

54 return concepts 

55 

56 

57def extract_element_context(content: str, element_type: str) -> dict[str, Any]: 

58 context: dict[str, Any] = {"element_type": element_type} 

59 if element_type in ["function", "method"]: 

60 context["has_return_statement"] = "return" in content 

61 context["param_count_estimate"] = ( 

62 content.split("(", 1)[-1].split(")")[0].count(",") + 1 

63 if "(" in content and ")" in content 

64 else 0 

65 ) 

66 elif element_type == "class": 

67 context["has_init"] = "__init__" in content 

68 context["method_count_estimate"] = content.count("def ") 

69 return context