Coverage for src/qdrant_loader_mcp_server/config_loader.py: 73%

109 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-08 06:06 +0000

1"""File-based configuration loader for the MCP server. 

2 

3Precedence: 

4- CLI --config 

5- MCP_CONFIG environment variable 

6- ./config.yaml 

7- ~/.config/qdrant-loader/config.yaml 

8- /etc/qdrant-loader/config.yaml 

9 

10Environment variables overlay values from file. CLI flags override env. 

11""" 

12 

13from __future__ import annotations 

14 

15import os 

16from pathlib import Path 

17from typing import Any 

18 

19import yaml 

20 

21from .config import Config, OpenAIConfig, QdrantConfig, SearchConfig 

22from .utils.logging import LoggingConfig 

23 

24logger = LoggingConfig.get_logger(__name__) 

25 

26 

27def _first_existing(paths: list[Path]) -> Path | None: 

28 for p in paths: 

29 if p and p.exists() and p.is_file(): 

30 return p 

31 return None 

32 

33 

34def resolve_config_path(cli_config: Path | None) -> Path | None: 

35 if cli_config: 

36 return cli_config 

37 env_cfg = os.getenv("MCP_CONFIG") 

38 if env_cfg: 

39 p = Path(env_cfg).expanduser() 

40 if p.exists(): 

41 return p 

42 candidates = [ 

43 Path.cwd() / "config.yaml", 

44 Path.home() / ".config" / "qdrant-loader" / "config.yaml", 

45 Path("/etc/qdrant-loader/config.yaml"), 

46 ] 

47 return _first_existing(candidates) 

48 

49 

50def _get_section(config_data: dict[str, Any], name: str) -> dict[str, Any]: 

51 # Only support "global" root going forward 

52 return config_data.get(name, {}) or {} 

53 

54 

55def _overlay_env_llm(llm: dict[str, Any]) -> None: 

56 # LLM env overrides 

57 if os.getenv("LLM_PROVIDER"): 

58 llm.setdefault("provider", os.getenv("LLM_PROVIDER")) 

59 llm["provider"] = os.getenv("LLM_PROVIDER") 

60 if os.getenv("LLM_BASE_URL"): 

61 llm["base_url"] = os.getenv("LLM_BASE_URL") 

62 if os.getenv("LLM_API_KEY"): 

63 llm["api_key"] = os.getenv("LLM_API_KEY") 

64 # models 

65 models = dict(llm.get("models") or {}) 

66 if os.getenv("LLM_EMBEDDING_MODEL"): 

67 models["embeddings"] = os.getenv("LLM_EMBEDDING_MODEL") 

68 if os.getenv("LLM_CHAT_MODEL"): 

69 models["chat"] = os.getenv("LLM_CHAT_MODEL") 

70 if models: 

71 llm["models"] = models 

72 

73 

74def _overlay_env_qdrant(qdrant: dict[str, Any]) -> None: 

75 if os.getenv("QDRANT_URL"): 

76 qdrant["url"] = os.getenv("QDRANT_URL") 

77 if os.getenv("QDRANT_API_KEY"): 

78 qdrant["api_key"] = os.getenv("QDRANT_API_KEY") 

79 if os.getenv("QDRANT_COLLECTION_NAME"): 

80 qdrant["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME") 

81 

82 

83def _overlay_env_search(search: dict[str, Any]) -> None: 

84 # Only a subset for Phase 0; SearchConfig has its own env fallbacks as well 

85 if os.getenv("SEARCH_CONFLICT_USE_LLM"): 

86 raw = os.getenv("SEARCH_CONFLICT_USE_LLM", "true").strip().lower() 

87 search["conflict_use_llm"] = raw in {"1", "true", "t", "yes", "y", "on"} 

88 if os.getenv("SEARCH_CONFLICT_LLM_MODEL"): 

89 search["conflict_llm_model"] = os.getenv("SEARCH_CONFLICT_LLM_MODEL") 

90 

91 

92def load_file_config(path: Path) -> dict[str, Any]: 

93 with path.open("r", encoding="utf-8") as f: 

94 return yaml.safe_load(f) or {} 

95 

96 

97def build_config_from_dict(config_data: dict[str, Any]) -> Config: 

98 global_data = _get_section(config_data, "global") 

99 llm = dict(global_data.get("llm") or {}) 

100 qdrant = dict(global_data.get("qdrant") or {}) 

101 search = dict(config_data.get("search") or {}) 

102 

103 # Deprecation: detect legacy blocks and log a warning once 

104 legacy_embedding = global_data.get("embedding") 

105 legacy_markit = ( 

106 (config_data.get("file_conversion") or {}).get("markitdown") 

107 if isinstance(config_data.get("file_conversion"), dict) 

108 else None 

109 ) 

110 try: 

111 if legacy_embedding or legacy_markit: 

112 logger.warning( 

113 "Legacy configuration fields detected; please migrate to global.llm", 

114 legacy_embedding=bool(legacy_embedding), 

115 legacy_markitdown=bool(legacy_markit), 

116 ) 

117 except Exception: 

118 pass 

119 

120 # Apply environment overrides 

121 _overlay_env_llm(llm) 

122 _overlay_env_qdrant(qdrant) 

123 _overlay_env_search(search) 

124 

125 # Derive OpenAIConfig for now (Phase 0); will be replaced by core LLM provider later 

126 api_key = llm.get("api_key") or os.getenv("OPENAI_API_KEY") 

127 models = dict(llm.get("models") or {}) 

128 embedding_model = ( 

129 models.get("embeddings") 

130 or os.getenv("LLM_EMBEDDING_MODEL") 

131 or "text-embedding-3-small" 

132 ) 

133 chat_model = models.get("chat") or os.getenv("LLM_CHAT_MODEL") or "gpt-3.5-turbo" 

134 

135 cfg = Config( 

136 qdrant=QdrantConfig(**qdrant) if qdrant else QdrantConfig(), 

137 openai=OpenAIConfig( 

138 api_key=api_key, model=embedding_model, chat_model=chat_model 

139 ), 

140 search=SearchConfig(**search) if search else SearchConfig(), 

141 ) 

142 return cfg 

143 

144 

145def redact_effective_config(effective: dict[str, Any]) -> dict[str, Any]: 

146 def _redact(obj: Any) -> Any: 

147 if isinstance(obj, dict): 

148 redacted = {} 

149 for k, v in obj.items(): 

150 if k in {"api_key", "Authorization"} and isinstance(v, str) and v: 

151 redacted[k] = "***REDACTED***" 

152 else: 

153 redacted[k] = _redact(v) 

154 return redacted 

155 if isinstance(obj, list): 

156 return [_redact(i) for i in obj] 

157 return obj 

158 

159 return _redact(effective) 

160 

161 

162def load_config(cli_config: Path | None) -> tuple[Config, dict[str, Any], bool]: 

163 """Load effective configuration. 

164 

165 Returns (config_obj, effective_dict, used_file: bool) 

166 """ 

167 config_path = resolve_config_path(cli_config) 

168 used_file = False 

169 if config_path: 

170 try: 

171 data = load_file_config(config_path) 

172 cfg = build_config_from_dict(data) 

173 used_file = True 

174 # Effective dict for printing (merge file data with derived) 

175 effective = { 

176 "global": { 

177 "llm": data.get("global", {}).get("llm"), 

178 "qdrant": data.get("global", {}).get("qdrant"), 

179 }, 

180 "search": data.get("search"), 

181 "derived": { 

182 "openai": { 

183 "model": cfg.openai.model, 

184 "chat_model": cfg.openai.chat_model, 

185 "api_key": cfg.openai.api_key, 

186 } 

187 }, 

188 } 

189 return cfg, effective, used_file 

190 except Exception as e: 

191 logger.warning( 

192 "Failed to load config file; falling back to env-only", error=str(e) 

193 ) 

194 

195 # Fallback to legacy env-only mode (deprecated) 

196 cfg = Config() 

197 effective = { 

198 "global": { 

199 "llm": { 

200 "provider": os.getenv("LLM_PROVIDER"), 

201 "base_url": os.getenv("LLM_BASE_URL"), 

202 "api_key": os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY"), 

203 "models": { 

204 "embeddings": os.getenv("LLM_EMBEDDING_MODEL"), 

205 "chat": os.getenv("LLM_CHAT_MODEL"), 

206 }, 

207 }, 

208 "qdrant": { 

209 "url": os.getenv("QDRANT_URL"), 

210 "api_key": os.getenv("QDRANT_API_KEY"), 

211 "collection_name": os.getenv("QDRANT_COLLECTION_NAME"), 

212 }, 

213 }, 

214 "search": None, 

215 "derived": { 

216 "openai": { 

217 "model": cfg.openai.model, 

218 "chat_model": cfg.openai.chat_model, 

219 "api_key": cfg.openai.api_key, 

220 } 

221 }, 

222 "warning": "Using legacy env-only mode; providing a config file is recommended and will be required in a future release.", 

223 } 

224 try: 

225 logger.warning( 

226 "Running in legacy env-only mode; provide --config or MCP_CONFIG file", 

227 ) 

228 except Exception: 

229 pass 

230 return cfg, effective, used_file