Coverage for src/qdrant_loader/core/chunking/strategy/code/metadata/language_specific.py: 63%
93 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3from typing import Any
6def extract_language_specific_metadata(content: str, language: str) -> dict[str, Any]:
7 if language == "python":
8 return extract_python_metadata(content)
9 elif language in ["javascript", "typescript"]:
10 return extract_javascript_metadata(content)
11 elif language == "java":
12 return extract_java_metadata(content)
13 elif language in ["cpp", "c"]:
14 return extract_c_cpp_metadata(content)
15 else:
16 return {}
19def extract_python_metadata(content: str) -> dict[str, Any]:
20 features: list[str] = []
21 if "async def" in content or ("async" in content and "await" in content):
22 features.append("async_await")
23 if "@" in content:
24 features.append("decorators")
25 if "typing" in content or "Type" in content or ":" in content:
26 features.append("type_hints")
27 if "yield" in content:
28 features.append("generators")
29 if "__enter__" in content and "__exit__" in content:
30 features.append("context_managers")
31 if "__" in content:
32 features.append("dunder_methods")
33 if "lambda" in content:
34 features.append("lambda_functions")
35 if "dataclass" in content or "@dataclass" in content:
36 features.append("dataclasses")
38 return {
39 "python_features": features,
40 "python_version_indicators": detect_python_version_features(content),
41 }
44def extract_javascript_metadata(content: str) -> dict[str, Any]:
45 features: list[str] = []
46 if "async" in content and "await" in content:
47 features.append("async_await")
48 if "=>" in content:
49 features.append("arrow_functions")
50 if "const" in content or "let" in content:
51 features.append("es6_variables")
52 if "class" in content:
53 features.append("es6_classes")
54 if "import" in content and "from" in content:
55 features.append("es6_modules")
56 if "${" in content:
57 features.append("template_literals")
58 if "{" in content and "}" in content and ("=" in content or "const" in content):
59 features.append("destructuring")
60 if "function*" in content or "yield" in content:
61 features.append("generators")
62 return {"javascript_features": features}
65def extract_java_metadata(content: str) -> dict[str, Any]:
66 features: list[str] = []
67 if "interface" in content:
68 features.append("interfaces")
69 if "extends" in content:
70 features.append("inheritance")
71 if "implements" in content:
72 features.append("interface_implementation")
73 if "synchronized" in content:
74 features.append("thread_synchronization")
75 if "generic" in content or "<" in content and ">" in content:
76 features.append("generics")
77 if "@Override" in content or "@" in content:
78 features.append("annotations")
79 return {"language_features": features}
82def extract_c_cpp_metadata(content: str) -> dict[str, Any]:
83 features: list[str] = []
84 if "#include" in content:
85 features.append("header_includes")
86 if "malloc" in content or "free" in content:
87 features.append("manual_memory_management")
88 if "pointer" in content or "->" in content:
89 features.append("pointer_usage")
90 if "template" in content:
91 features.append("templates")
92 if "namespace" in content:
93 features.append("namespaces")
94 if "inline" in content:
95 features.append("inline_functions")
96 return {"language_features": features}
99def detect_python_version_features(content: str) -> list[str]:
100 features: list[str] = []
101 if ":=" in content:
102 features.append("walrus_operator_py38")
103 if "match " in content and "case " in content:
104 features.append("pattern_matching_py310")
105 if 'f"' in content or "f'" in content:
106 features.append("f_strings_py36")
107 if "pathlib" in content:
108 features.append("pathlib_py34")
109 if "dataclass" in content:
110 features.append("dataclasses_py37")
111 return features