Coverage for src / qdrant_loader_mcp_server / config_loader.py: 72%
115 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:51 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-18 04:51 +0000
1"""File-based configuration loader for the MCP server.
3Precedence:
4- CLI --config
5- MCP_CONFIG environment variable
6- ./config.yaml
7- ~/.config/qdrant-loader/config.yaml
8- /etc/qdrant-loader/config.yaml
10Environment variables overlay values from file. CLI flags override env.
11"""
13from __future__ import annotations
15import os
16from pathlib import Path
17from typing import Any
19import yaml
21from qdrant_loader_mcp_server.config_reranking import MCPReranking
23from .config import Config, OpenAIConfig, QdrantConfig, SearchConfig
24from .utils.logging import LoggingConfig
26logger = LoggingConfig.get_logger(__name__)
29def _first_existing(paths: list[Path]) -> Path | None:
30 for p in paths:
31 if p and p.exists() and p.is_file():
32 return p
33 return None
36def resolve_config_path(cli_config: Path | None) -> Path | None:
37 if cli_config:
38 return cli_config
39 env_cfg = os.getenv("MCP_CONFIG")
40 if env_cfg:
41 p = Path(env_cfg).expanduser()
42 if p.exists():
43 return p
44 candidates = [
45 Path.cwd() / "config.yaml",
46 Path.home() / ".config" / "qdrant-loader" / "config.yaml",
47 Path("/etc/qdrant-loader/config.yaml"),
48 ]
49 return _first_existing(candidates)
52def _get_section(config_data: dict[str, Any], name: str) -> dict[str, Any]:
53 # Only support "global" root going forward
54 return config_data.get(name, {}) or {}
57def _overlay_env_llm(llm: dict[str, Any]) -> None:
58 # LLM env overrides
59 if os.getenv("LLM_PROVIDER"):
60 llm.setdefault("provider", os.getenv("LLM_PROVIDER"))
61 llm["provider"] = os.getenv("LLM_PROVIDER")
62 if os.getenv("LLM_BASE_URL"):
63 llm["base_url"] = os.getenv("LLM_BASE_URL")
64 if os.getenv("LLM_API_KEY"):
65 llm["api_key"] = os.getenv("LLM_API_KEY")
66 # models
67 models = dict(llm.get("models") or {})
68 if os.getenv("LLM_EMBEDDING_MODEL"):
69 models["embeddings"] = os.getenv("LLM_EMBEDDING_MODEL")
70 if os.getenv("LLM_CHAT_MODEL"):
71 models["chat"] = os.getenv("LLM_CHAT_MODEL")
72 if models:
73 llm["models"] = models
76def _overlay_env_qdrant(qdrant: dict[str, Any]) -> None:
77 # Override Qdrant settings with environment variables (unconditional).
78 # Note: differs from qdrant_loader's _auto_resolve_env_vars() which only
79 # applies env vars when the config value equals the default.
80 # Priority: environment variable > config file value > QdrantConfig default.
81 if os.getenv("QDRANT_URL"):
82 qdrant["url"] = os.getenv("QDRANT_URL")
83 if os.getenv("QDRANT_API_KEY"):
84 qdrant["api_key"] = os.getenv("QDRANT_API_KEY")
85 if os.getenv("QDRANT_COLLECTION_NAME"):
86 qdrant["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME")
89def _overlay_env_search(search: dict[str, Any]) -> None:
90 # Only a subset for Phase 0; SearchConfig has its own env fallbacks as well
91 if os.getenv("SEARCH_CONFLICT_USE_LLM"):
92 raw = os.getenv("SEARCH_CONFLICT_USE_LLM", "true").strip().lower()
93 search["conflict_use_llm"] = raw in {"1", "true", "t", "yes", "y", "on"}
94 if os.getenv("SEARCH_CONFLICT_LLM_MODEL"):
95 search["conflict_llm_model"] = os.getenv("SEARCH_CONFLICT_LLM_MODEL")
98def load_file_config(path: Path) -> dict[str, Any]:
99 with path.open("r", encoding="utf-8") as f:
100 return yaml.safe_load(f) or {}
103def build_config_from_dict(config_data: dict[str, Any]) -> Config:
104 global_data = _get_section(config_data, "global")
105 llm = dict(global_data.get("llm") or {})
106 qdrant = dict(global_data.get("qdrant") or {})
107 search = dict(config_data.get("search") or {})
109 # Deprecation: detect legacy blocks and log a warning once
110 legacy_embedding = global_data.get("embedding")
111 legacy_markit = (
112 (config_data.get("file_conversion") or {}).get("markitdown")
113 if isinstance(config_data.get("file_conversion"), dict)
114 else None
115 )
116 try:
117 if legacy_embedding or legacy_markit:
118 logger.warning(
119 "Legacy configuration fields detected; please migrate to global.llm",
120 legacy_embedding=bool(legacy_embedding),
121 legacy_markitdown=bool(legacy_markit),
122 )
123 except Exception:
124 pass
126 # Apply environment overrides
127 _overlay_env_llm(llm)
128 _overlay_env_qdrant(qdrant)
129 _overlay_env_search(search)
131 # Derive OpenAIConfig for now (Phase 0); will be replaced by core LLM provider later
132 api_key = llm.get("api_key") or os.getenv("OPENAI_API_KEY")
133 models = dict(llm.get("models") or {})
134 embedding_model = (
135 models.get("embeddings")
136 or os.getenv("LLM_EMBEDDING_MODEL")
137 or "text-embedding-3-small"
138 )
139 chat_model = models.get("chat") or os.getenv("LLM_CHAT_MODEL") or "gpt-3.5-turbo"
141 # Build reranking config from global section if present
142 reranking_cfg = None
143 if "reranking" in global_data:
144 if not isinstance(global_data["reranking"], dict):
145 raise ValueError("global.reranking must be a mapping")
146 reranking_cfg = MCPReranking(**global_data["reranking"])
148 cfg = Config(
149 qdrant=QdrantConfig(**qdrant) if qdrant else QdrantConfig(),
150 openai=OpenAIConfig(
151 api_key=api_key, model=embedding_model, chat_model=chat_model
152 ),
153 search=SearchConfig(**search) if search else SearchConfig(),
154 reranking=reranking_cfg if reranking_cfg is not None else MCPReranking(),
155 )
156 return cfg
159def redact_effective_config(effective: dict[str, Any]) -> dict[str, Any]:
160 def _redact(obj: Any) -> Any:
161 if isinstance(obj, dict):
162 redacted = {}
163 for k, v in obj.items():
164 if k in {"api_key", "Authorization"} and isinstance(v, str) and v:
165 redacted[k] = "***REDACTED***"
166 else:
167 redacted[k] = _redact(v)
168 return redacted
169 if isinstance(obj, list):
170 return [_redact(i) for i in obj]
171 return obj
173 return _redact(effective)
176def load_config(cli_config: Path | None) -> tuple[Config, dict[str, Any], bool]:
177 """Load effective configuration.
179 Returns (config_obj, effective_dict, used_file: bool)
180 """
181 config_path = resolve_config_path(cli_config)
182 used_file = False
183 if config_path:
184 try:
185 data = load_file_config(config_path)
186 cfg = build_config_from_dict(data)
187 used_file = True
188 # Effective dict for printing (merge file data with derived)
189 effective = {
190 "global": {
191 "llm": data.get("global", {}).get("llm"),
192 "qdrant": data.get("global", {}).get("qdrant"),
193 },
194 "search": data.get("search"),
195 "derived": {
196 "openai": {
197 "model": cfg.openai.model,
198 "chat_model": cfg.openai.chat_model,
199 "api_key": cfg.openai.api_key,
200 }
201 },
202 }
203 return cfg, effective, used_file
204 except Exception as e:
205 logger.warning(
206 "Failed to load config file; falling back to env-only", error=str(e)
207 )
209 # Fallback to legacy env-only mode (deprecated)
210 cfg = Config()
211 effective = {
212 "global": {
213 "llm": {
214 "provider": os.getenv("LLM_PROVIDER"),
215 "base_url": os.getenv("LLM_BASE_URL"),
216 "api_key": os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY"),
217 "models": {
218 "embeddings": os.getenv("LLM_EMBEDDING_MODEL"),
219 "chat": os.getenv("LLM_CHAT_MODEL"),
220 },
221 },
222 "qdrant": {
223 "url": os.getenv("QDRANT_URL"),
224 "api_key": os.getenv("QDRANT_API_KEY"),
225 "collection_name": os.getenv("QDRANT_COLLECTION_NAME"),
226 },
227 },
228 "search": None,
229 "derived": {
230 "openai": {
231 "model": cfg.openai.model,
232 "chat_model": cfg.openai.chat_model,
233 "api_key": cfg.openai.api_key,
234 }
235 },
236 "warning": "Using legacy env-only mode; providing a config file is recommended and will be required in a future release.",
237 }
238 try:
239 logger.warning(
240 "Running in legacy env-only mode; provide --config or MCP_CONFIG file",
241 )
242 except Exception:
243 pass
244 return cfg, effective, used_file