Coverage for src/qdrant_loader_mcp_server/config_loader.py: 73%
109 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-08 06:06 +0000
1"""File-based configuration loader for the MCP server.
3Precedence:
4- CLI --config
5- MCP_CONFIG environment variable
6- ./config.yaml
7- ~/.config/qdrant-loader/config.yaml
8- /etc/qdrant-loader/config.yaml
10Environment variables overlay values from file. CLI flags override env.
11"""
13from __future__ import annotations
15import os
16from pathlib import Path
17from typing import Any
19import yaml
21from .config import Config, OpenAIConfig, QdrantConfig, SearchConfig
22from .utils.logging import LoggingConfig
24logger = LoggingConfig.get_logger(__name__)
27def _first_existing(paths: list[Path]) -> Path | None:
28 for p in paths:
29 if p and p.exists() and p.is_file():
30 return p
31 return None
34def resolve_config_path(cli_config: Path | None) -> Path | None:
35 if cli_config:
36 return cli_config
37 env_cfg = os.getenv("MCP_CONFIG")
38 if env_cfg:
39 p = Path(env_cfg).expanduser()
40 if p.exists():
41 return p
42 candidates = [
43 Path.cwd() / "config.yaml",
44 Path.home() / ".config" / "qdrant-loader" / "config.yaml",
45 Path("/etc/qdrant-loader/config.yaml"),
46 ]
47 return _first_existing(candidates)
50def _get_section(config_data: dict[str, Any], name: str) -> dict[str, Any]:
51 # Only support "global" root going forward
52 return config_data.get(name, {}) or {}
55def _overlay_env_llm(llm: dict[str, Any]) -> None:
56 # LLM env overrides
57 if os.getenv("LLM_PROVIDER"):
58 llm.setdefault("provider", os.getenv("LLM_PROVIDER"))
59 llm["provider"] = os.getenv("LLM_PROVIDER")
60 if os.getenv("LLM_BASE_URL"):
61 llm["base_url"] = os.getenv("LLM_BASE_URL")
62 if os.getenv("LLM_API_KEY"):
63 llm["api_key"] = os.getenv("LLM_API_KEY")
64 # models
65 models = dict(llm.get("models") or {})
66 if os.getenv("LLM_EMBEDDING_MODEL"):
67 models["embeddings"] = os.getenv("LLM_EMBEDDING_MODEL")
68 if os.getenv("LLM_CHAT_MODEL"):
69 models["chat"] = os.getenv("LLM_CHAT_MODEL")
70 if models:
71 llm["models"] = models
74def _overlay_env_qdrant(qdrant: dict[str, Any]) -> None:
75 if os.getenv("QDRANT_URL"):
76 qdrant["url"] = os.getenv("QDRANT_URL")
77 if os.getenv("QDRANT_API_KEY"):
78 qdrant["api_key"] = os.getenv("QDRANT_API_KEY")
79 if os.getenv("QDRANT_COLLECTION_NAME"):
80 qdrant["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME")
83def _overlay_env_search(search: dict[str, Any]) -> None:
84 # Only a subset for Phase 0; SearchConfig has its own env fallbacks as well
85 if os.getenv("SEARCH_CONFLICT_USE_LLM"):
86 raw = os.getenv("SEARCH_CONFLICT_USE_LLM", "true").strip().lower()
87 search["conflict_use_llm"] = raw in {"1", "true", "t", "yes", "y", "on"}
88 if os.getenv("SEARCH_CONFLICT_LLM_MODEL"):
89 search["conflict_llm_model"] = os.getenv("SEARCH_CONFLICT_LLM_MODEL")
92def load_file_config(path: Path) -> dict[str, Any]:
93 with path.open("r", encoding="utf-8") as f:
94 return yaml.safe_load(f) or {}
97def build_config_from_dict(config_data: dict[str, Any]) -> Config:
98 global_data = _get_section(config_data, "global")
99 llm = dict(global_data.get("llm") or {})
100 qdrant = dict(global_data.get("qdrant") or {})
101 search = dict(config_data.get("search") or {})
103 # Deprecation: detect legacy blocks and log a warning once
104 legacy_embedding = global_data.get("embedding")
105 legacy_markit = (
106 (config_data.get("file_conversion") or {}).get("markitdown")
107 if isinstance(config_data.get("file_conversion"), dict)
108 else None
109 )
110 try:
111 if legacy_embedding or legacy_markit:
112 logger.warning(
113 "Legacy configuration fields detected; please migrate to global.llm",
114 legacy_embedding=bool(legacy_embedding),
115 legacy_markitdown=bool(legacy_markit),
116 )
117 except Exception:
118 pass
120 # Apply environment overrides
121 _overlay_env_llm(llm)
122 _overlay_env_qdrant(qdrant)
123 _overlay_env_search(search)
125 # Derive OpenAIConfig for now (Phase 0); will be replaced by core LLM provider later
126 api_key = llm.get("api_key") or os.getenv("OPENAI_API_KEY")
127 models = dict(llm.get("models") or {})
128 embedding_model = (
129 models.get("embeddings")
130 or os.getenv("LLM_EMBEDDING_MODEL")
131 or "text-embedding-3-small"
132 )
133 chat_model = models.get("chat") or os.getenv("LLM_CHAT_MODEL") or "gpt-3.5-turbo"
135 cfg = Config(
136 qdrant=QdrantConfig(**qdrant) if qdrant else QdrantConfig(),
137 openai=OpenAIConfig(
138 api_key=api_key, model=embedding_model, chat_model=chat_model
139 ),
140 search=SearchConfig(**search) if search else SearchConfig(),
141 )
142 return cfg
145def redact_effective_config(effective: dict[str, Any]) -> dict[str, Any]:
146 def _redact(obj: Any) -> Any:
147 if isinstance(obj, dict):
148 redacted = {}
149 for k, v in obj.items():
150 if k in {"api_key", "Authorization"} and isinstance(v, str) and v:
151 redacted[k] = "***REDACTED***"
152 else:
153 redacted[k] = _redact(v)
154 return redacted
155 if isinstance(obj, list):
156 return [_redact(i) for i in obj]
157 return obj
159 return _redact(effective)
162def load_config(cli_config: Path | None) -> tuple[Config, dict[str, Any], bool]:
163 """Load effective configuration.
165 Returns (config_obj, effective_dict, used_file: bool)
166 """
167 config_path = resolve_config_path(cli_config)
168 used_file = False
169 if config_path:
170 try:
171 data = load_file_config(config_path)
172 cfg = build_config_from_dict(data)
173 used_file = True
174 # Effective dict for printing (merge file data with derived)
175 effective = {
176 "global": {
177 "llm": data.get("global", {}).get("llm"),
178 "qdrant": data.get("global", {}).get("qdrant"),
179 },
180 "search": data.get("search"),
181 "derived": {
182 "openai": {
183 "model": cfg.openai.model,
184 "chat_model": cfg.openai.chat_model,
185 "api_key": cfg.openai.api_key,
186 }
187 },
188 }
189 return cfg, effective, used_file
190 except Exception as e:
191 logger.warning(
192 "Failed to load config file; falling back to env-only", error=str(e)
193 )
195 # Fallback to legacy env-only mode (deprecated)
196 cfg = Config()
197 effective = {
198 "global": {
199 "llm": {
200 "provider": os.getenv("LLM_PROVIDER"),
201 "base_url": os.getenv("LLM_BASE_URL"),
202 "api_key": os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY"),
203 "models": {
204 "embeddings": os.getenv("LLM_EMBEDDING_MODEL"),
205 "chat": os.getenv("LLM_CHAT_MODEL"),
206 },
207 },
208 "qdrant": {
209 "url": os.getenv("QDRANT_URL"),
210 "api_key": os.getenv("QDRANT_API_KEY"),
211 "collection_name": os.getenv("QDRANT_COLLECTION_NAME"),
212 },
213 },
214 "search": None,
215 "derived": {
216 "openai": {
217 "model": cfg.openai.model,
218 "chat_model": cfg.openai.chat_model,
219 "api_key": cfg.openai.api_key,
220 }
221 },
222 "warning": "Using legacy env-only mode; providing a config file is recommended and will be required in a future release.",
223 }
224 try:
225 logger.warning(
226 "Running in legacy env-only mode; provide --config or MCP_CONFIG file",
227 )
228 except Exception:
229 pass
230 return cfg, effective, used_file