Coverage for src/qdrant_loader/core/chunking/strategy/code/metadata/complexity.py: 100%
39 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3import math
6def calculate_complexity_metrics(content: str) -> dict[str, float | int]:
7 lines = content.split("\n")
8 non_empty_lines = [line for line in lines if line.strip()]
10 indicators = [
11 "if ",
12 "elif ",
13 "else:",
14 "while ",
15 "for ",
16 "try:",
17 "except:",
18 "case ",
19 "&&",
20 "||",
21 "?",
22 "and ",
23 "or ",
24 "switch",
25 ]
27 cyclomatic_complexity = 1
28 for indicator in indicators:
29 cyclomatic_complexity += content.lower().count(indicator.lower())
31 max_nesting = 0
32 current_nesting = 0
33 for line in lines:
34 stripped = line.strip()
35 if any(k in stripped for k in ["if", "for", "while", "try", "def", "class"]):
36 current_nesting += 1
37 max_nesting = max(max_nesting, current_nesting)
38 elif stripped in ["end", "}"] or (
39 stripped.startswith("except") or stripped.startswith("finally")
40 ):
41 current_nesting = max(0, current_nesting - 1)
43 return {
44 "cyclomatic_complexity": cyclomatic_complexity,
45 "lines_of_code": len(non_empty_lines),
46 "total_lines": len(lines),
47 "nesting_depth": max_nesting,
48 "complexity_density": cyclomatic_complexity / max(len(non_empty_lines), 1),
49 "maintainability_index": calculate_maintainability_index(content),
50 }
53def calculate_maintainability_index(content: str) -> float:
54 if not content.strip():
55 return 50
57 lines = content.split("\n")
58 non_empty_lines = [line for line in lines if line.strip()]
59 loc = len(non_empty_lines)
61 complexity = 1
62 for indicator in [
63 "if ",
64 "elif ",
65 "else:",
66 "while ",
67 "for ",
68 "try:",
69 "except:",
70 "case ",
71 ]:
72 complexity += content.lower().count(indicator.lower())
74 operators = len(__import__("re").findall(r"[+\-*/=<>!&|%^~]", content))
75 operands = len(__import__("re").findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", content))
77 if operands == 0:
78 halstead_volume = 0
79 else:
80 vocabulary = operators + operands
81 length = operators + operands
82 halstead_volume = length * math.log2(vocabulary) if vocabulary > 1 else 0
84 if loc > 0 and halstead_volume > 0:
85 mi = (
86 171
87 - 5.2 * math.log(halstead_volume)
88 - 0.23 * complexity
89 - 16.2 * math.log(loc)
90 )
91 return max(0, min(100, mi))
93 return 50