Coverage for src/qdrant_loader/core/chunking/strategy/code/processor/quality.py: 80%

55 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:05 +0000

1from __future__ import annotations 

2 

3from typing import Any 

4 

5from qdrant_loader.core.chunking.strategy.code.processor.utils import ( 

6 determine_learning_level as _determine_learning_level, 

7) 

8from qdrant_loader.core.chunking.strategy.code.processor.utils import ( 

9 has_meaningful_names as _has_meaningful_names, 

10) 

11from qdrant_loader.core.chunking.strategy.code.processor.utils import ( 

12 identify_programming_concepts as _identify_programming_concepts, 

13) 

14 

15 

16def assess_code_quality(content: str, chunk_metadata: dict[str, Any]) -> dict[str, Any]: 

17 complexity = chunk_metadata.get("complexity", 0) 

18 quality_score = 100 

19 if complexity > 10: 

20 quality_score -= 20 

21 elif complexity > 5: 

22 quality_score -= 10 

23 lines = content.split("\n") 

24 long_lines = [line for line in lines if len(line) > 120] 

25 if len(long_lines) > len(lines) * 0.3: 

26 quality_score -= 15 

27 has_docs = '"""' in content or "'''" in content 

28 if not has_docs and len(content) > 500: 

29 quality_score -= 10 

30 meaningful = _has_meaningful_names(content) 

31 quality_score += 5 if meaningful else -10 

32 return { 

33 "quality_score": max(0, quality_score), 

34 "complexity_level": ( 

35 "low" if complexity < 3 else "medium" if complexity < 8 else "high" 

36 ), 

37 "readability_indicators": { 

38 "has_documentation": has_docs, 

39 "reasonable_line_length": ( 

40 len(long_lines) / len(lines) < 0.1 if lines else True 

41 ), 

42 "meaningful_names": meaningful, 

43 }, 

44 } 

45 

46 

47def assess_educational_value( 

48 content: str, chunk_metadata: dict[str, Any] 

49) -> dict[str, Any]: 

50 educational_indicators: list[str] = [] 

51 if "example" in content.lower() or "demo" in content.lower(): 

52 educational_indicators.append("example_code") 

53 if '"""' in content or "'''" in content: 

54 educational_indicators.append("well_documented") 

55 if "TODO" in content or "FIXME" in content: 

56 educational_indicators.append("learning_opportunity") 

57 complexity = chunk_metadata.get("complexity", 0) 

58 if 2 <= complexity <= 6: 

59 educational_indicators.append("good_complexity_for_learning") 

60 element_type = chunk_metadata.get("element_type", "unknown") 

61 if element_type in ["class", "interface"]: 

62 educational_indicators.append("object_oriented_concepts") 

63 return { 

64 "educational_indicators": educational_indicators, 

65 "learning_level": _determine_learning_level(complexity), 

66 "concepts_demonstrated": _identify_programming_concepts(content), 

67 } 

68 

69 

70def calculate_reusability_score(content: str, chunk_metadata: dict[str, Any]) -> int: 

71 score = 50 

72 element_type = chunk_metadata.get("element_type", "unknown") 

73 if element_type in ["function", "class", "interface"]: 

74 score += 20 

75 elif element_type == "method": 

76 score += 10 

77 if '"""' in content or "'''" in content: 

78 score += 15 

79 if "def " in content and "(" in content: 

80 param_count = content.count(",") + 1 if "(" in content else 0 

81 if param_count > 0: 

82 score += min(15, param_count * 3) 

83 if any(p in content for p in ["localhost", "127.0.0.1", "C:\\", "/tmp/"]): 

84 score -= 10 

85 if any(k in content.lower() for k in ["specific", "hardcode", "hack", "temporary"]): 

86 score -= 15 

87 return max(0, min(100, score))