Coverage for src/qdrant_loader/core/chunking/strategy/code/processor/quality.py: 80%

1from __future__ import annotations

3from typing import Any

5from qdrant_loader.core.chunking.strategy.code.processor.utils import (

6 determine_learning_level as _determine_learning_level,

8from qdrant_loader.core.chunking.strategy.code.processor.utils import (

9 has_meaningful_names as _has_meaningful_names,

10)

11from qdrant_loader.core.chunking.strategy.code.processor.utils import (

12 identify_programming_concepts as _identify_programming_concepts,

13)

16def assess_code_quality(content: str, chunk_metadata: dict[str, Any]) -> dict[str, Any]:

17 complexity = chunk_metadata.get("complexity", 0)

18 quality_score = 100

19 if complexity > 10:

20 quality_score -= 20

21 elif complexity > 5:

22 quality_score -= 10

23 lines = content.split("\n")

24 long_lines = [line for line in lines if len(line) > 120]

25 if len(long_lines) > len(lines) * 0.3:

26 quality_score -= 15

27 has_docs = '"""' in content or "'''" in content

28 if not has_docs and len(content) > 500:

29 quality_score -= 10

30 meaningful = _has_meaningful_names(content)

31 quality_score += 5 if meaningful else -10

32 return {

33 "quality_score": max(0, quality_score),

34 "complexity_level": (

35 "low" if complexity < 3 else "medium" if complexity < 8 else "high"

36 ),

37 "readability_indicators": {

38 "has_documentation": has_docs,

39 "reasonable_line_length": (

40 len(long_lines) / len(lines) < 0.1 if lines else True

41 ),

42 "meaningful_names": meaningful,

43 },

44 }

47def assess_educational_value(

48 content: str, chunk_metadata: dict[str, Any]

49) -> dict[str, Any]:

50 educational_indicators: list[str] = []

51 if "example" in content.lower() or "demo" in content.lower():

52 educational_indicators.append("example_code")

53 if '"""' in content or "'''" in content:

54 educational_indicators.append("well_documented")

55 if "TODO" in content or "FIXME" in content:

56 educational_indicators.append("learning_opportunity")

57 complexity = chunk_metadata.get("complexity", 0)

58 if 2 <= complexity <= 6:

59 educational_indicators.append("good_complexity_for_learning")

60 element_type = chunk_metadata.get("element_type", "unknown")

61 if element_type in ["class", "interface"]:

62 educational_indicators.append("object_oriented_concepts")

63 return {

64 "educational_indicators": educational_indicators,

65 "learning_level": _determine_learning_level(complexity),

66 "concepts_demonstrated": _identify_programming_concepts(content),

67 }

70def calculate_reusability_score(content: str, chunk_metadata: dict[str, Any]) -> int:

71 score = 50

72 element_type = chunk_metadata.get("element_type", "unknown")

73 if element_type in ["function", "class", "interface"]:

74 score += 20

75 elif element_type == "method":

76 score += 10

77 if '"""' in content or "'''" in content:

78 score += 15

79 if "def " in content and "(" in content:

80 param_count = content.count(",") + 1 if "(" in content else 0

81 if param_count > 0:

82 score += min(15, param_count * 3)

83 if any(p in content for p in ["localhost", "127.0.0.1", "C:\\", "/tmp/"]):

84 score -= 10

85 if any(k in content.lower() for k in ["specific", "hardcode", "hack", "temporary"]):

86 score -= 15

87 return max(0, min(100, score))

Coverage for src / qdrant_loader / core / chunking / strategy / code / processor / quality.py: 80%

55 statements