Coverage for src/qdrant_loader/core/chunking/strategy/code/metadata/complexity.py: 100%

39 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1from __future__ import annotations 

2 

3import math 

4 

5 

6def calculate_complexity_metrics(content: str) -> dict[str, float | int]: 

7 lines = content.split("\n") 

8 non_empty_lines = [line for line in lines if line.strip()] 

9 

10 indicators = [ 

11 "if ", 

12 "elif ", 

13 "else:", 

14 "while ", 

15 "for ", 

16 "try:", 

17 "except:", 

18 "case ", 

19 "&&", 

20 "||", 

21 "?", 

22 "and ", 

23 "or ", 

24 "switch", 

25 ] 

26 

27 cyclomatic_complexity = 1 

28 for indicator in indicators: 

29 cyclomatic_complexity += content.lower().count(indicator.lower()) 

30 

31 max_nesting = 0 

32 current_nesting = 0 

33 for line in lines: 

34 stripped = line.strip() 

35 if any(k in stripped for k in ["if", "for", "while", "try", "def", "class"]): 

36 current_nesting += 1 

37 max_nesting = max(max_nesting, current_nesting) 

38 elif stripped in ["end", "}"] or ( 

39 stripped.startswith("except") or stripped.startswith("finally") 

40 ): 

41 current_nesting = max(0, current_nesting - 1) 

42 

43 return { 

44 "cyclomatic_complexity": cyclomatic_complexity, 

45 "lines_of_code": len(non_empty_lines), 

46 "total_lines": len(lines), 

47 "nesting_depth": max_nesting, 

48 "complexity_density": cyclomatic_complexity / max(len(non_empty_lines), 1), 

49 "maintainability_index": calculate_maintainability_index(content), 

50 } 

51 

52 

53def calculate_maintainability_index(content: str) -> float: 

54 if not content.strip(): 

55 return 50 

56 

57 lines = content.split("\n") 

58 non_empty_lines = [line for line in lines if line.strip()] 

59 loc = len(non_empty_lines) 

60 

61 complexity = 1 

62 for indicator in [ 

63 "if ", 

64 "elif ", 

65 "else:", 

66 "while ", 

67 "for ", 

68 "try:", 

69 "except:", 

70 "case ", 

71 ]: 

72 complexity += content.lower().count(indicator.lower()) 

73 

74 operators = len(__import__("re").findall(r"[+\-*/=<>!&|%^~]", content)) 

75 operands = len(__import__("re").findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", content)) 

76 

77 if operands == 0: 

78 halstead_volume = 0 

79 else: 

80 vocabulary = operators + operands 

81 length = operators + operands 

82 halstead_volume = length * math.log2(vocabulary) if vocabulary > 1 else 0 

83 

84 if loc > 0 and halstead_volume > 0: 

85 mi = ( 

86 171 

87 - 5.2 * math.log(halstead_volume) 

88 - 0.23 * complexity 

89 - 16.2 * math.log(loc) 

90 ) 

91 return max(0, min(100, mi)) 

92 

93 return 50