Coverage for src/qdrant_loader/core/chunking/strategy/code/processor/quality.py: 80%
55 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3from typing import Any
5from qdrant_loader.core.chunking.strategy.code.processor.utils import (
6 determine_learning_level as _determine_learning_level,
7)
8from qdrant_loader.core.chunking.strategy.code.processor.utils import (
9 has_meaningful_names as _has_meaningful_names,
10)
11from qdrant_loader.core.chunking.strategy.code.processor.utils import (
12 identify_programming_concepts as _identify_programming_concepts,
13)
16def assess_code_quality(content: str, chunk_metadata: dict[str, Any]) -> dict[str, Any]:
17 complexity = chunk_metadata.get("complexity", 0)
18 quality_score = 100
19 if complexity > 10:
20 quality_score -= 20
21 elif complexity > 5:
22 quality_score -= 10
23 lines = content.split("\n")
24 long_lines = [line for line in lines if len(line) > 120]
25 if len(long_lines) > len(lines) * 0.3:
26 quality_score -= 15
27 has_docs = '"""' in content or "'''" in content
28 if not has_docs and len(content) > 500:
29 quality_score -= 10
30 meaningful = _has_meaningful_names(content)
31 quality_score += 5 if meaningful else -10
32 return {
33 "quality_score": max(0, quality_score),
34 "complexity_level": (
35 "low" if complexity < 3 else "medium" if complexity < 8 else "high"
36 ),
37 "readability_indicators": {
38 "has_documentation": has_docs,
39 "reasonable_line_length": (
40 len(long_lines) / len(lines) < 0.1 if lines else True
41 ),
42 "meaningful_names": meaningful,
43 },
44 }
47def assess_educational_value(
48 content: str, chunk_metadata: dict[str, Any]
49) -> dict[str, Any]:
50 educational_indicators: list[str] = []
51 if "example" in content.lower() or "demo" in content.lower():
52 educational_indicators.append("example_code")
53 if '"""' in content or "'''" in content:
54 educational_indicators.append("well_documented")
55 if "TODO" in content or "FIXME" in content:
56 educational_indicators.append("learning_opportunity")
57 complexity = chunk_metadata.get("complexity", 0)
58 if 2 <= complexity <= 6:
59 educational_indicators.append("good_complexity_for_learning")
60 element_type = chunk_metadata.get("element_type", "unknown")
61 if element_type in ["class", "interface"]:
62 educational_indicators.append("object_oriented_concepts")
63 return {
64 "educational_indicators": educational_indicators,
65 "learning_level": _determine_learning_level(complexity),
66 "concepts_demonstrated": _identify_programming_concepts(content),
67 }
70def calculate_reusability_score(content: str, chunk_metadata: dict[str, Any]) -> int:
71 score = 50
72 element_type = chunk_metadata.get("element_type", "unknown")
73 if element_type in ["function", "class", "interface"]:
74 score += 20
75 elif element_type == "method":
76 score += 10
77 if '"""' in content or "'''" in content:
78 score += 15
79 if "def " in content and "(" in content:
80 param_count = content.count(",") + 1 if "(" in content else 0
81 if param_count > 0:
82 score += min(15, param_count * 3)
83 if any(p in content for p in ["localhost", "127.0.0.1", "C:\\", "/tmp/"]):
84 score -= 10
85 if any(k in content.lower() for k in ["specific", "hardcode", "hack", "temporary"]):
86 score -= 15
87 return max(0, min(100, score))