Coverage for src/qdrant_loader/core/chunking/strategy/code/metadata/dependencies.py: 93%
29 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:05 +0000
1from __future__ import annotations
3import re
6def build_dependency_graph(content: str) -> dict[str, list[str]]:
7 dependencies: dict[str, list[str]] = {
8 "imports": [],
9 "internal_references": [],
10 "third_party_imports": [],
11 "stdlib_imports": [],
12 }
14 import_patterns = [
15 r"import\s+([a-zA-Z_][a-zA-Z0-9_.]*)",
16 r"from\s+([a-zA-Z_][a-zA-Z0-9_.]*)\s+import",
17 r'#include\s*[<"]([^>"]+)[>"]',
18 r"require\s*\([\'\"]([^\'\"]+)[\'\"]\)",
19 r"import\s+.*\s+from\s+[\'\"]([^\'\"]+)[\'\"]",
20 ]
22 for pattern in import_patterns:
23 imports = re.findall(pattern, content)
24 dependencies["imports"].extend(imports)
26 python_stdlib = {
27 "os",
28 "sys",
29 "json",
30 "math",
31 "random",
32 "datetime",
33 "collections",
34 "itertools",
35 "functools",
36 "operator",
37 "re",
38 "urllib",
39 "http",
40 "pathlib",
41 "typing",
42 "dataclasses",
43 "abc",
44 "enum",
45 "logging",
46 "threading",
47 "multiprocessing",
48 "subprocess",
49 "socket",
50 "sqlite3",
51 "csv",
52 "pickle",
53 "gzip",
54 "zipfile",
55 "tarfile",
56 "shutil",
57 "tempfile",
58 }
60 for imp in dependencies["imports"]:
61 base_module = imp.split(".")[0]
62 if base_module in python_stdlib:
63 dependencies["stdlib_imports"].append(imp)
64 elif is_third_party_import(imp):
65 dependencies["third_party_imports"].append(imp)
66 else:
67 dependencies["internal_references"].append(imp)
69 return dependencies
72def is_third_party_import(import_name: str) -> bool:
73 base_module = import_name.split(".")[0].lower()
74 known_third_party = {
75 "requests",
76 "numpy",
77 "pandas",
78 "flask",
79 "django",
80 "fastapi",
81 "tensorflow",
82 "torch",
83 "pytorch",
84 "sklearn",
85 "scipy",
86 "matplotlib",
87 "seaborn",
88 "plotly",
89 "streamlit",
90 "dash",
91 "celery",
92 "redis",
93 "sqlalchemy",
94 "alembic",
95 "pydantic",
96 "marshmallow",
97 "click",
98 "typer",
99 "pytest",
100 "unittest2",
101 "mock",
102 "httpx",
103 "aiohttp",
104 "websockets",
105 "uvicorn",
106 "gunicorn",
107 "jinja2",
108 "mako",
109 "babel",
110 "pillow",
111 "opencv",
112 "cv2",
113 "boto3",
114 "azure",
115 "google",
116 }
117 if base_module in known_third_party:
118 return True
119 if any(pattern in base_module for pattern in ["lib", "client", "sdk", "api"]):
120 return True
121 if "_" in base_module and not base_module.startswith("_"):
122 return True
123 if (
124 base_module.islower()
125 and not base_module.startswith("test")
126 and base_module not in ["main", "app", "config", "utils", "helpers"]
127 ):
128 return True
129 return False