Coverage for src / qdrant_loader_mcp_server / config_loader.py: 72%

115 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-18 04:51 +0000

1"""File-based configuration loader for the MCP server. 

2 

3Precedence: 

4- CLI --config 

5- MCP_CONFIG environment variable 

6- ./config.yaml 

7- ~/.config/qdrant-loader/config.yaml 

8- /etc/qdrant-loader/config.yaml 

9 

10Environment variables overlay values from file. CLI flags override env. 

11""" 

12 

13from __future__ import annotations 

14 

15import os 

16from pathlib import Path 

17from typing import Any 

18 

19import yaml 

20 

21from qdrant_loader_mcp_server.config_reranking import MCPReranking 

22 

23from .config import Config, OpenAIConfig, QdrantConfig, SearchConfig 

24from .utils.logging import LoggingConfig 

25 

26logger = LoggingConfig.get_logger(__name__) 

27 

28 

29def _first_existing(paths: list[Path]) -> Path | None: 

30 for p in paths: 

31 if p and p.exists() and p.is_file(): 

32 return p 

33 return None 

34 

35 

36def resolve_config_path(cli_config: Path | None) -> Path | None: 

37 if cli_config: 

38 return cli_config 

39 env_cfg = os.getenv("MCP_CONFIG") 

40 if env_cfg: 

41 p = Path(env_cfg).expanduser() 

42 if p.exists(): 

43 return p 

44 candidates = [ 

45 Path.cwd() / "config.yaml", 

46 Path.home() / ".config" / "qdrant-loader" / "config.yaml", 

47 Path("/etc/qdrant-loader/config.yaml"), 

48 ] 

49 return _first_existing(candidates) 

50 

51 

52def _get_section(config_data: dict[str, Any], name: str) -> dict[str, Any]: 

53 # Only support "global" root going forward 

54 return config_data.get(name, {}) or {} 

55 

56 

57def _overlay_env_llm(llm: dict[str, Any]) -> None: 

58 # LLM env overrides 

59 if os.getenv("LLM_PROVIDER"): 

60 llm.setdefault("provider", os.getenv("LLM_PROVIDER")) 

61 llm["provider"] = os.getenv("LLM_PROVIDER") 

62 if os.getenv("LLM_BASE_URL"): 

63 llm["base_url"] = os.getenv("LLM_BASE_URL") 

64 if os.getenv("LLM_API_KEY"): 

65 llm["api_key"] = os.getenv("LLM_API_KEY") 

66 # models 

67 models = dict(llm.get("models") or {}) 

68 if os.getenv("LLM_EMBEDDING_MODEL"): 

69 models["embeddings"] = os.getenv("LLM_EMBEDDING_MODEL") 

70 if os.getenv("LLM_CHAT_MODEL"): 

71 models["chat"] = os.getenv("LLM_CHAT_MODEL") 

72 if models: 

73 llm["models"] = models 

74 

75 

76def _overlay_env_qdrant(qdrant: dict[str, Any]) -> None: 

77 # Override Qdrant settings with environment variables (unconditional). 

78 # Note: differs from qdrant_loader's _auto_resolve_env_vars() which only 

79 # applies env vars when the config value equals the default. 

80 # Priority: environment variable > config file value > QdrantConfig default. 

81 if os.getenv("QDRANT_URL"): 

82 qdrant["url"] = os.getenv("QDRANT_URL") 

83 if os.getenv("QDRANT_API_KEY"): 

84 qdrant["api_key"] = os.getenv("QDRANT_API_KEY") 

85 if os.getenv("QDRANT_COLLECTION_NAME"): 

86 qdrant["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME") 

87 

88 

89def _overlay_env_search(search: dict[str, Any]) -> None: 

90 # Only a subset for Phase 0; SearchConfig has its own env fallbacks as well 

91 if os.getenv("SEARCH_CONFLICT_USE_LLM"): 

92 raw = os.getenv("SEARCH_CONFLICT_USE_LLM", "true").strip().lower() 

93 search["conflict_use_llm"] = raw in {"1", "true", "t", "yes", "y", "on"} 

94 if os.getenv("SEARCH_CONFLICT_LLM_MODEL"): 

95 search["conflict_llm_model"] = os.getenv("SEARCH_CONFLICT_LLM_MODEL") 

96 

97 

98def load_file_config(path: Path) -> dict[str, Any]: 

99 with path.open("r", encoding="utf-8") as f: 

100 return yaml.safe_load(f) or {} 

101 

102 

103def build_config_from_dict(config_data: dict[str, Any]) -> Config: 

104 global_data = _get_section(config_data, "global") 

105 llm = dict(global_data.get("llm") or {}) 

106 qdrant = dict(global_data.get("qdrant") or {}) 

107 search = dict(config_data.get("search") or {}) 

108 

109 # Deprecation: detect legacy blocks and log a warning once 

110 legacy_embedding = global_data.get("embedding") 

111 legacy_markit = ( 

112 (config_data.get("file_conversion") or {}).get("markitdown") 

113 if isinstance(config_data.get("file_conversion"), dict) 

114 else None 

115 ) 

116 try: 

117 if legacy_embedding or legacy_markit: 

118 logger.warning( 

119 "Legacy configuration fields detected; please migrate to global.llm", 

120 legacy_embedding=bool(legacy_embedding), 

121 legacy_markitdown=bool(legacy_markit), 

122 ) 

123 except Exception: 

124 pass 

125 

126 # Apply environment overrides 

127 _overlay_env_llm(llm) 

128 _overlay_env_qdrant(qdrant) 

129 _overlay_env_search(search) 

130 

131 # Derive OpenAIConfig for now (Phase 0); will be replaced by core LLM provider later 

132 api_key = llm.get("api_key") or os.getenv("OPENAI_API_KEY") 

133 models = dict(llm.get("models") or {}) 

134 embedding_model = ( 

135 models.get("embeddings") 

136 or os.getenv("LLM_EMBEDDING_MODEL") 

137 or "text-embedding-3-small" 

138 ) 

139 chat_model = models.get("chat") or os.getenv("LLM_CHAT_MODEL") or "gpt-3.5-turbo" 

140 

141 # Build reranking config from global section if present 

142 reranking_cfg = None 

143 if "reranking" in global_data: 

144 if not isinstance(global_data["reranking"], dict): 

145 raise ValueError("global.reranking must be a mapping") 

146 reranking_cfg = MCPReranking(**global_data["reranking"]) 

147 

148 cfg = Config( 

149 qdrant=QdrantConfig(**qdrant) if qdrant else QdrantConfig(), 

150 openai=OpenAIConfig( 

151 api_key=api_key, model=embedding_model, chat_model=chat_model 

152 ), 

153 search=SearchConfig(**search) if search else SearchConfig(), 

154 reranking=reranking_cfg if reranking_cfg is not None else MCPReranking(), 

155 ) 

156 return cfg 

157 

158 

159def redact_effective_config(effective: dict[str, Any]) -> dict[str, Any]: 

160 def _redact(obj: Any) -> Any: 

161 if isinstance(obj, dict): 

162 redacted = {} 

163 for k, v in obj.items(): 

164 if k in {"api_key", "Authorization"} and isinstance(v, str) and v: 

165 redacted[k] = "***REDACTED***" 

166 else: 

167 redacted[k] = _redact(v) 

168 return redacted 

169 if isinstance(obj, list): 

170 return [_redact(i) for i in obj] 

171 return obj 

172 

173 return _redact(effective) 

174 

175 

176def load_config(cli_config: Path | None) -> tuple[Config, dict[str, Any], bool]: 

177 """Load effective configuration. 

178 

179 Returns (config_obj, effective_dict, used_file: bool) 

180 """ 

181 config_path = resolve_config_path(cli_config) 

182 used_file = False 

183 if config_path: 

184 try: 

185 data = load_file_config(config_path) 

186 cfg = build_config_from_dict(data) 

187 used_file = True 

188 # Effective dict for printing (merge file data with derived) 

189 effective = { 

190 "global": { 

191 "llm": data.get("global", {}).get("llm"), 

192 "qdrant": data.get("global", {}).get("qdrant"), 

193 }, 

194 "search": data.get("search"), 

195 "derived": { 

196 "openai": { 

197 "model": cfg.openai.model, 

198 "chat_model": cfg.openai.chat_model, 

199 "api_key": cfg.openai.api_key, 

200 } 

201 }, 

202 } 

203 return cfg, effective, used_file 

204 except Exception as e: 

205 logger.warning( 

206 "Failed to load config file; falling back to env-only", error=str(e) 

207 ) 

208 

209 # Fallback to legacy env-only mode (deprecated) 

210 cfg = Config() 

211 effective = { 

212 "global": { 

213 "llm": { 

214 "provider": os.getenv("LLM_PROVIDER"), 

215 "base_url": os.getenv("LLM_BASE_URL"), 

216 "api_key": os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY"), 

217 "models": { 

218 "embeddings": os.getenv("LLM_EMBEDDING_MODEL"), 

219 "chat": os.getenv("LLM_CHAT_MODEL"), 

220 }, 

221 }, 

222 "qdrant": { 

223 "url": os.getenv("QDRANT_URL"), 

224 "api_key": os.getenv("QDRANT_API_KEY"), 

225 "collection_name": os.getenv("QDRANT_COLLECTION_NAME"), 

226 }, 

227 }, 

228 "search": None, 

229 "derived": { 

230 "openai": { 

231 "model": cfg.openai.model, 

232 "chat_model": cfg.openai.chat_model, 

233 "api_key": cfg.openai.api_key, 

234 } 

235 }, 

236 "warning": "Using legacy env-only mode; providing a config file is recommended and will be required in a future release.", 

237 } 

238 try: 

239 logger.warning( 

240 "Running in legacy env-only mode; provide --config or MCP_CONFIG file", 

241 ) 

242 except Exception: 

243 pass 

244 return cfg, effective, used_file