Coverage for src / qdrant_loader_mcp_server / config.py: 77%

141 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-10 09:41 +0000

1"""Configuration settings for the RAG MCP Server.""" 

2 

3import json 

4import logging 

5import os 

6from typing import Annotated 

7 

8from dotenv import load_dotenv 

9from pydantic import BaseModel, Field 

10 

11# Import reranking Pydantic model from MCP models 

12from qdrant_loader_mcp_server.config_reranking import MCPReranking 

13 

14# Load environment variables from .env file 

15load_dotenv() 

16 

17# Module logger 

18logger = logging.getLogger(__name__) 

19 

20 

21# --- Helpers ----------------------------------------------------------------- 

22 

23# Accepted boolean truthy/falsey strings (case-insensitive) 

24TRUE_VALUES = {"1", "true", "t", "yes", "y", "on"} 

25FALSE_VALUES = {"0", "false", "f", "no", "n", "off"} 

26 

27 

28def parse_bool_env(var_name: str, default: bool) -> bool: 

29 """Parse a boolean from an environment variable robustly. 

30 

31 Accepted true values: 1, true, t, yes, y, on 

32 Accepted false values: 0, false, f, no, n, off 

33 

34 Raises: 

35 ValueError: If the variable is set but not a valid boolean value. 

36 """ 

37 raw_value = os.getenv(var_name) 

38 if raw_value is None: 

39 return default 

40 normalized = raw_value.strip().lower() 

41 if normalized in TRUE_VALUES: 

42 return True 

43 if normalized in FALSE_VALUES: 

44 return False 

45 raise ValueError( 

46 f"Invalid boolean for {var_name}: {raw_value!r}. " 

47 f"Expected one of {sorted(TRUE_VALUES | FALSE_VALUES)}" 

48 ) 

49 

50 

51def parse_int_env( 

52 var_name: str, 

53 default: int, 

54 *, 

55 min_value: int | None = None, 

56 max_value: int | None = None, 

57) -> int: 

58 """Parse an integer from an environment variable with bounds checking. 

59 

60 Args: 

61 var_name: Environment variable name to read. 

62 default: Value to use when the variable is not set. 

63 min_value: Optional lower bound (inclusive). 

64 max_value: Optional upper bound (inclusive). 

65 

66 Raises: 

67 ValueError: If the variable is set but not an int, or out of bounds. 

68 """ 

69 raw_value = os.getenv(var_name) 

70 if raw_value is None or raw_value.strip() == "": 

71 return default 

72 try: 

73 value = int(raw_value) 

74 except (TypeError, ValueError) as exc: 

75 raise ValueError(f"Invalid integer for {var_name}: {raw_value!r}") from exc 

76 if min_value is not None and value < min_value: 

77 raise ValueError(f"{var_name} must be >= {min_value}; got {value}") 

78 if max_value is not None and value > max_value: 

79 raise ValueError(f"{var_name} must be <= {max_value}; got {value}") 

80 return value 

81 

82 

83def parse_float_env( 

84 var_name: str, 

85 default: float, 

86 *, 

87 min_value: float | None = None, 

88 max_value: float | None = None, 

89) -> float: 

90 """Parse a float from an environment variable with bounds checking. 

91 

92 Args: 

93 var_name: Environment variable name to read. 

94 default: Value to use when the variable is not set. 

95 min_value: Optional lower bound (inclusive). 

96 max_value: Optional upper bound (inclusive). 

97 

98 Raises: 

99 ValueError: If the variable is set but not a float, or out of bounds. 

100 """ 

101 raw_value = os.getenv(var_name) 

102 if raw_value is None or raw_value.strip() == "": 

103 return default 

104 try: 

105 value = float(raw_value) 

106 except (TypeError, ValueError) as exc: 

107 raise ValueError(f"Invalid float for {var_name}: {raw_value!r}") from exc 

108 if min_value is not None and value < min_value: 

109 raise ValueError(f"{var_name} must be >= {min_value}; got {value}") 

110 if max_value is not None and value > max_value: 

111 raise ValueError(f"{var_name} must be <= {max_value}; got {value}") 

112 return value 

113 

114 

115class ServerConfig(BaseModel): 

116 """Server configuration settings.""" 

117 

118 host: str = "0.0.0.0" 

119 port: int = 8000 

120 log_level: str = "INFO" 

121 

122 

123class QdrantConfig(BaseModel): 

124 """Qdrant configuration settings. 

125 

126 Defaults are aligned with qdrant_loader.config.qdrant.QdrantConfig to ensure 

127 consistent behavior between the loader and MCP server. The MCP server depends on 

128 qdrant-loader-core (not qdrant-loader directly), so this class is kept local but 

129 mirrors the same field names, types, and defaults. 

130 """ 

131 

132 # Aligned with qdrant_loader.config.qdrant.QdrantConfig defaults 

133 url: str = "http://localhost:6333" 

134 api_key: str | None = None 

135 collection_name: str = "documents" 

136 

137 def __init__(self, **data): 

138 """Initialize with environment variables if not provided.""" 

139 if "url" not in data: 

140 data["url"] = os.getenv("QDRANT_URL", "http://localhost:6333") 

141 if "api_key" not in data: 

142 data["api_key"] = os.getenv("QDRANT_API_KEY") 

143 if "collection_name" not in data: 

144 data["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME", "documents") 

145 super().__init__(**data) 

146 

147 

148class SearchConfig(BaseModel): 

149 """Search optimization configuration settings.""" 

150 

151 # Search result caching 

152 cache_enabled: bool = True 

153 cache_ttl: Annotated[int, Field(ge=0, le=86_400)] = 300 # 0s..24h 

154 cache_max_size: Annotated[int, Field(ge=1, le=100_000)] = 500 

155 

156 # Search parameters optimization 

157 hnsw_ef: Annotated[int, Field(ge=1, le=32_768)] = 128 # HNSW search parameter 

158 use_exact_search: bool = False # Use exact search when needed 

159 

160 # Concurrency control: limits simultaneous search operations to prevent 

161 # overwhelming the shared Qdrant client connection pool under concurrent MCP calls. 

162 max_concurrent_searches: Annotated[int, Field(ge=1, le=50)] = 4 

163 

164 # Conflict detection performance controls (defaults calibrated for P95 ~8–10s) 

165 conflict_limit_default: Annotated[int, Field(ge=2, le=50)] = 10 

166 conflict_max_pairs_total: Annotated[int, Field(ge=1, le=200)] = 24 

167 conflict_tier_caps: dict = { 

168 "primary": 12, 

169 "secondary": 8, 

170 "tertiary": 4, 

171 "fallback": 0, 

172 } 

173 conflict_use_llm: bool = True 

174 conflict_max_llm_pairs: Annotated[int, Field(ge=0, le=10)] = 2 

175 conflict_llm_model: str = "gpt-4o-mini" 

176 conflict_llm_timeout_s: Annotated[float, Field(gt=0, le=60)] = 12.0 

177 conflict_overall_timeout_s: Annotated[float, Field(gt=0, le=60)] = 9.0 

178 conflict_text_window_chars: Annotated[int, Field(ge=200, le=8000)] = 2000 

179 conflict_embeddings_timeout_s: Annotated[float, Field(gt=0, le=30)] = 2.0 

180 conflict_embeddings_max_concurrency: Annotated[int, Field(ge=1, le=20)] = 5 

181 

182 def __init__(self, **data): 

183 """Initialize with environment variables if not provided. 

184 

185 Performs robust boolean parsing and strict numeric validation to avoid 

186 subtle runtime issues from malformed environment inputs. 

187 """ 

188 if "cache_enabled" not in data: 

189 data["cache_enabled"] = parse_bool_env("SEARCH_CACHE_ENABLED", True) 

190 if "cache_ttl" not in data: 

191 data["cache_ttl"] = parse_int_env( 

192 "SEARCH_CACHE_TTL", 300, min_value=0, max_value=86_400 

193 ) 

194 if "cache_max_size" not in data: 

195 data["cache_max_size"] = parse_int_env( 

196 "SEARCH_CACHE_MAX_SIZE", 500, min_value=1, max_value=100_000 

197 ) 

198 if "hnsw_ef" not in data: 

199 data["hnsw_ef"] = parse_int_env( 

200 "SEARCH_HNSW_EF", 128, min_value=1, max_value=32_768 

201 ) 

202 if "use_exact_search" not in data: 

203 data["use_exact_search"] = parse_bool_env("SEARCH_USE_EXACT", False) 

204 if "max_concurrent_searches" not in data: 

205 data["max_concurrent_searches"] = parse_int_env( 

206 "SEARCH_MAX_CONCURRENT", 4, min_value=1, max_value=50 

207 ) 

208 

209 # Conflict detection env overrides (optional; safe defaults used if unset) 

210 def _get_env_dict(name: str, default: dict) -> dict: 

211 raw = os.getenv(name) 

212 if not raw: 

213 return default 

214 try: 

215 parsed = json.loads(raw) 

216 if isinstance(parsed, dict): 

217 return parsed 

218 return default 

219 except (json.JSONDecodeError, ValueError) as exc: 

220 # Shorten raw value to avoid logging excessively large strings 

221 raw_preview = raw if len(raw) <= 200 else f"{raw[:200]}..." 

222 logger.warning( 

223 "Failed to parse JSON for env var %s; raw=%r; falling back to default. Error: %s", 

224 name, 

225 raw_preview, 

226 exc, 

227 exc_info=True, 

228 ) 

229 return default 

230 

231 if "conflict_limit_default" not in data: 

232 data["conflict_limit_default"] = parse_int_env( 

233 "SEARCH_CONFLICT_LIMIT_DEFAULT", 10, min_value=2, max_value=50 

234 ) 

235 if "conflict_max_pairs_total" not in data: 

236 data["conflict_max_pairs_total"] = parse_int_env( 

237 "SEARCH_CONFLICT_MAX_PAIRS_TOTAL", 24, min_value=1, max_value=200 

238 ) 

239 if "conflict_tier_caps" not in data: 

240 data["conflict_tier_caps"] = _get_env_dict( 

241 "SEARCH_CONFLICT_TIER_CAPS", 

242 {"primary": 12, "secondary": 8, "tertiary": 4, "fallback": 0}, 

243 ) 

244 if "conflict_use_llm" not in data: 

245 data["conflict_use_llm"] = parse_bool_env("SEARCH_CONFLICT_USE_LLM", True) 

246 if "conflict_max_llm_pairs" not in data: 

247 data["conflict_max_llm_pairs"] = parse_int_env( 

248 "SEARCH_CONFLICT_MAX_LLM_PAIRS", 2, min_value=0, max_value=10 

249 ) 

250 if "conflict_llm_model" not in data: 

251 data["conflict_llm_model"] = os.getenv( 

252 "SEARCH_CONFLICT_LLM_MODEL", "gpt-4o-mini" 

253 ) 

254 if "conflict_llm_timeout_s" not in data: 

255 data["conflict_llm_timeout_s"] = parse_float_env( 

256 "SEARCH_CONFLICT_LLM_TIMEOUT_S", 12.0, min_value=1.0, max_value=60.0 

257 ) 

258 if "conflict_overall_timeout_s" not in data: 

259 data["conflict_overall_timeout_s"] = parse_float_env( 

260 "SEARCH_CONFLICT_OVERALL_TIMEOUT_S", 9.0, min_value=1.0, max_value=60.0 

261 ) 

262 if "conflict_text_window_chars" not in data: 

263 data["conflict_text_window_chars"] = parse_int_env( 

264 "SEARCH_CONFLICT_TEXT_WINDOW_CHARS", 2000, min_value=200, max_value=8000 

265 ) 

266 if "conflict_embeddings_timeout_s" not in data: 

267 data["conflict_embeddings_timeout_s"] = parse_float_env( 

268 "SEARCH_CONFLICT_EMBEDDINGS_TIMEOUT_S", 

269 2.0, 

270 min_value=1.0, 

271 max_value=30.0, 

272 ) 

273 if "conflict_embeddings_max_concurrency" not in data: 

274 data["conflict_embeddings_max_concurrency"] = parse_int_env( 

275 "SEARCH_CONFLICT_EMBEDDINGS_MAX_CONCURRENCY", 

276 5, 

277 min_value=1, 

278 max_value=20, 

279 ) 

280 super().__init__(**data) 

281 

282 

283class OpenAIConfig(BaseModel): 

284 """OpenAI configuration settings.""" 

285 

286 # Optional to avoid startup crashes when OPENAI_API_KEY is not yet set; 

287 # downstream callers are expected to validate presence before use. 

288 api_key: str | None = None 

289 model: str = "text-embedding-3-small" 

290 chat_model: str = "gpt-3.5-turbo" 

291 vector_size: int | None = ( 

292 None # From global.llm.embeddings.vector_size or migrated from legacy 

293 ) 

294 

295 

296class Config(BaseModel): 

297 """Main configuration class. 

298 

299 Note: QdrantConfig defaults are aligned with qdrant_loader.config.qdrant.QdrantConfig 

300 to ensure consistent behavior between the loader and MCP server. The MCP server 

301 cannot import from qdrant-loader directly (it only depends on qdrant-loader-core), 

302 so alignment is maintained by convention rather than import. 

303 """ 

304 

305 server: ServerConfig = Field(default_factory=ServerConfig) 

306 qdrant: QdrantConfig = Field(default_factory=QdrantConfig) 

307 openai: OpenAIConfig = Field( 

308 default_factory=lambda: OpenAIConfig(api_key=os.getenv("OPENAI_API_KEY")) 

309 ) 

310 search: SearchConfig = Field(default_factory=SearchConfig) 

311 # Reranking configuration (loaded from global.reranking in config.yaml) 

312 reranking: MCPReranking = Field(default_factory=MCPReranking)