Coverage for src/qdrant_loader/core/chunking/strategy/code/processor/utils.py: 62%
48 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3import re
4from typing import Any
7def is_minified_code(content: str, *, threshold: float = 0.1) -> bool:
8 lines = content.split("\n")
9 non_empty = [line for line in lines if line.strip()]
10 if not non_empty:
11 return False
12 avg_len = sum(len(line) for line in non_empty) / len(non_empty)
13 specials = sum(1 for line in non_empty if any(ch in line for ch in ["{", "}", ";"]))
14 ratio = specials / len(non_empty)
15 return avg_len > 200 and ratio > threshold
18def is_generated_code(content: str, *, patterns: list[str] | None = None) -> bool:
19 patterns = patterns or ["auto-generated", "do not edit", "generated by"]
20 lower = content.lower()
21 return any(pat in lower for pat in patterns)
24def is_mostly_comments(content: str) -> bool:
25 lines = content.split("\n")
26 if not lines:
27 return False
28 comment_lines = [
29 line for line in lines if line.strip().startswith(("#", "//", "/*", "--"))
30 ]
31 return len(comment_lines) / len(lines) > 0.6
34def has_meaningful_names(content: str) -> bool:
35 bad_names = ["tmp", "foo", "bar", "baz", "var", "data", "x", "y", "z"]
36 text = content.lower()
37 return not any(re.search(rf"\b{re.escape(n)}\b", text) for n in bad_names)
40def determine_learning_level(complexity: int) -> str:
41 if complexity < 2:
42 return "beginner"
43 if complexity < 6:
44 return "intermediate"
45 return "advanced"
48def identify_programming_concepts(content: str) -> list[str]:
49 concepts: list[str] = []
50 lower = content.lower()
51 for k in ["recursion", "memoization", "concurrency", "polymorphism", "inheritance"]:
52 if k in lower:
53 concepts.append(k)
54 return concepts
57def extract_element_context(content: str, element_type: str) -> dict[str, Any]:
58 context: dict[str, Any] = {"element_type": element_type}
59 if element_type in ["function", "method"]:
60 context["has_return_statement"] = "return" in content
61 context["param_count_estimate"] = (
62 content.split("(", 1)[-1].split(")")[0].count(",") + 1
63 if "(" in content and ")" in content
64 else 0
65 )
66 elif element_type == "class":
67 context["has_init"] = "__init__" in content
68 context["method_count_estimate"] = content.count("def ")
69 return context