Coverage for src/qdrant_loader_mcp_server/config

1"""File-based configuration loader for the MCP server.

3Precedence:

4- CLI --config

5- MCP_CONFIG environment variable

6- ./config.yaml

7- ~/.config/qdrant-loader/config.yaml

8- /etc/qdrant-loader/config.yaml

10Environment variables overlay values from file. CLI flags override env.

11"""

13from __future__ import annotations

15import os

16from pathlib import Path

17from typing import Any

19import yaml

21from .config import Config, OpenAIConfig, QdrantConfig, SearchConfig

22from .utils.logging import LoggingConfig

24logger = LoggingConfig.get_logger(__name__)

27def _first_existing(paths: list[Path]) -> Path | None:

28 for p in paths:

29 if p and p.exists() and p.is_file():

30 return p

31 return None

34def resolve_config_path(cli_config: Path | None) -> Path | None:

35 if cli_config:

36 return cli_config

37 env_cfg = os.getenv("MCP_CONFIG")

38 if env_cfg:

39 p = Path(env_cfg).expanduser()

40 if p.exists():

41 return p

42 candidates = [

43 Path.cwd() / "config.yaml",

44 Path.home() / ".config" / "qdrant-loader" / "config.yaml",

45 Path("/etc/qdrant-loader/config.yaml"),

46 ]

47 return _first_existing(candidates)

50def _get_section(config_data: dict[str, Any], name: str) -> dict[str, Any]:

51 # Only support "global" root going forward

52 return config_data.get(name, {}) or {}

55def _overlay_env_llm(llm: dict[str, Any]) -> None:

56 # LLM env overrides

57 if os.getenv("LLM_PROVIDER"):

58 llm.setdefault("provider", os.getenv("LLM_PROVIDER"))

59 llm["provider"] = os.getenv("LLM_PROVIDER")

60 if os.getenv("LLM_BASE_URL"):

61 llm["base_url"] = os.getenv("LLM_BASE_URL")

62 if os.getenv("LLM_API_KEY"):

63 llm["api_key"] = os.getenv("LLM_API_KEY")

64 # models

65 models = dict(llm.get("models") or {})

66 if os.getenv("LLM_EMBEDDING_MODEL"):

67 models["embeddings"] = os.getenv("LLM_EMBEDDING_MODEL")

68 if os.getenv("LLM_CHAT_MODEL"):

69 models["chat"] = os.getenv("LLM_CHAT_MODEL")

70 if models:

71 llm["models"] = models

74def _overlay_env_qdrant(qdrant: dict[str, Any]) -> None:

75 if os.getenv("QDRANT_URL"):

76 qdrant["url"] = os.getenv("QDRANT_URL")

77 if os.getenv("QDRANT_API_KEY"):

78 qdrant["api_key"] = os.getenv("QDRANT_API_KEY")

79 if os.getenv("QDRANT_COLLECTION_NAME"):

80 qdrant["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME")

83def _overlay_env_search(search: dict[str, Any]) -> None:

84 # Only a subset for Phase 0; SearchConfig has its own env fallbacks as well

85 if os.getenv("SEARCH_CONFLICT_USE_LLM"):

86 raw = os.getenv("SEARCH_CONFLICT_USE_LLM", "true").strip().lower()

87 search["conflict_use_llm"] = raw in {"1", "true", "t", "yes", "y", "on"}

88 if os.getenv("SEARCH_CONFLICT_LLM_MODEL"):

89 search["conflict_llm_model"] = os.getenv("SEARCH_CONFLICT_LLM_MODEL")

92def load_file_config(path: Path) -> dict[str, Any]:

93 with path.open("r", encoding="utf-8") as f:

94 return yaml.safe_load(f) or {}

97def build_config_from_dict(config_data: dict[str, Any]) -> Config:

98 global_data = _get_section(config_data, "global")

99 llm = dict(global_data.get("llm") or {})

100 qdrant = dict(global_data.get("qdrant") or {})

101 search = dict(config_data.get("search") or {})

102

103 # Deprecation: detect legacy blocks and log a warning once

104 legacy_embedding = global_data.get("embedding")

105 legacy_markit = (

106 (config_data.get("file_conversion") or {}).get("markitdown")

107 if isinstance(config_data.get("file_conversion"), dict)

108 else None

109 )

110 try:

111 if legacy_embedding or legacy_markit:

112 logger.warning(

113 "Legacy configuration fields detected; please migrate to global.llm",

114 legacy_embedding=bool(legacy_embedding),

115 legacy_markitdown=bool(legacy_markit),

116 )

117 except Exception:

118 pass

119

120 # Apply environment overrides

121 _overlay_env_llm(llm)

122 _overlay_env_qdrant(qdrant)

123 _overlay_env_search(search)

124

125 # Derive OpenAIConfig for now (Phase 0); will be replaced by core LLM provider later

126 api_key = llm.get("api_key") or os.getenv("OPENAI_API_KEY")

127 models = dict(llm.get("models") or {})

128 embedding_model = (

129 models.get("embeddings")

130 or os.getenv("LLM_EMBEDDING_MODEL")

131 or "text-embedding-3-small"

132 )

133 chat_model = models.get("chat") or os.getenv("LLM_CHAT_MODEL") or "gpt-3.5-turbo"

134

135 cfg = Config(

136 qdrant=QdrantConfig(**qdrant) if qdrant else QdrantConfig(),

137 openai=OpenAIConfig(

138 api_key=api_key, model=embedding_model, chat_model=chat_model

139 ),

140 search=SearchConfig(**search) if search else SearchConfig(),

141 )

142 return cfg

143

144

145def redact_effective_config(effective: dict[str, Any]) -> dict[str, Any]:

146 def _redact(obj: Any) -> Any:

147 if isinstance(obj, dict):

148 redacted = {}

149 for k, v in obj.items():

150 if k in {"api_key", "Authorization"} and isinstance(v, str) and v:

151 redacted[k] = "***REDACTED***"

152 else:

153 redacted[k] = _redact(v)

154 return redacted

155 if isinstance(obj, list):

156 return [_redact(i) for i in obj]

157 return obj

158

159 return _redact(effective)

160

161

162def load_config(cli_config: Path | None) -> tuple[Config, dict[str, Any], bool]:

163 """Load effective configuration.

164

165 Returns (config_obj, effective_dict, used_file: bool)

166 """

167 config_path = resolve_config_path(cli_config)

168 used_file = False

169 if config_path:

170 try:

171 data = load_file_config(config_path)

172 cfg = build_config_from_dict(data)

173 used_file = True

174 # Effective dict for printing (merge file data with derived)

175 effective = {

176 "global": {

177 "llm": data.get("global", {}).get("llm"),

178 "qdrant": data.get("global", {}).get("qdrant"),

179 },

180 "search": data.get("search"),

181 "derived": {

182 "openai": {

183 "model": cfg.openai.model,

184 "chat_model": cfg.openai.chat_model,

185 "api_key": cfg.openai.api_key,

186 }

187 },

188 }

189 return cfg, effective, used_file

190 except Exception as e:

191 logger.warning(

192 "Failed to load config file; falling back to env-only", error=str(e)

193 )

194

195 # Fallback to legacy env-only mode (deprecated)

196 cfg = Config()

197 effective = {

198 "global": {

199 "llm": {

200 "provider": os.getenv("LLM_PROVIDER"),

201 "base_url": os.getenv("LLM_BASE_URL"),

202 "api_key": os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY"),

203 "models": {

204 "embeddings": os.getenv("LLM_EMBEDDING_MODEL"),

205 "chat": os.getenv("LLM_CHAT_MODEL"),

206 },

207 },

208 "qdrant": {

209 "url": os.getenv("QDRANT_URL"),

210 "api_key": os.getenv("QDRANT_API_KEY"),

211 "collection_name": os.getenv("QDRANT_COLLECTION_NAME"),

212 },

213 },

214 "search": None,

215 "derived": {

216 "openai": {

217 "model": cfg.openai.model,

218 "chat_model": cfg.openai.chat_model,

219 "api_key": cfg.openai.api_key,

220 }

221 },

222 "warning": "Using legacy env-only mode; providing a config file is recommended and will be required in a future release.",

223 }

224 try:

225 logger.warning(

226 "Running in legacy env-only mode; provide --config or MCP_CONFIG file",

227 )

228 except Exception:

229 pass

230 return cfg, effective, used_file

Coverage for src/qdrant_loader_mcp_server/config_loader.py: 73%

109 statements