Coverage for src / qdrant_loader_mcp_server / config.py: 77%
141 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:41 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:41 +0000
1"""Configuration settings for the RAG MCP Server."""
3import json
4import logging
5import os
6from typing import Annotated
8from dotenv import load_dotenv
9from pydantic import BaseModel, Field
11# Import reranking Pydantic model from MCP models
12from qdrant_loader_mcp_server.config_reranking import MCPReranking
14# Load environment variables from .env file
15load_dotenv()
17# Module logger
18logger = logging.getLogger(__name__)
21# --- Helpers -----------------------------------------------------------------
23# Accepted boolean truthy/falsey strings (case-insensitive)
24TRUE_VALUES = {"1", "true", "t", "yes", "y", "on"}
25FALSE_VALUES = {"0", "false", "f", "no", "n", "off"}
28def parse_bool_env(var_name: str, default: bool) -> bool:
29 """Parse a boolean from an environment variable robustly.
31 Accepted true values: 1, true, t, yes, y, on
32 Accepted false values: 0, false, f, no, n, off
34 Raises:
35 ValueError: If the variable is set but not a valid boolean value.
36 """
37 raw_value = os.getenv(var_name)
38 if raw_value is None:
39 return default
40 normalized = raw_value.strip().lower()
41 if normalized in TRUE_VALUES:
42 return True
43 if normalized in FALSE_VALUES:
44 return False
45 raise ValueError(
46 f"Invalid boolean for {var_name}: {raw_value!r}. "
47 f"Expected one of {sorted(TRUE_VALUES | FALSE_VALUES)}"
48 )
51def parse_int_env(
52 var_name: str,
53 default: int,
54 *,
55 min_value: int | None = None,
56 max_value: int | None = None,
57) -> int:
58 """Parse an integer from an environment variable with bounds checking.
60 Args:
61 var_name: Environment variable name to read.
62 default: Value to use when the variable is not set.
63 min_value: Optional lower bound (inclusive).
64 max_value: Optional upper bound (inclusive).
66 Raises:
67 ValueError: If the variable is set but not an int, or out of bounds.
68 """
69 raw_value = os.getenv(var_name)
70 if raw_value is None or raw_value.strip() == "":
71 return default
72 try:
73 value = int(raw_value)
74 except (TypeError, ValueError) as exc:
75 raise ValueError(f"Invalid integer for {var_name}: {raw_value!r}") from exc
76 if min_value is not None and value < min_value:
77 raise ValueError(f"{var_name} must be >= {min_value}; got {value}")
78 if max_value is not None and value > max_value:
79 raise ValueError(f"{var_name} must be <= {max_value}; got {value}")
80 return value
83def parse_float_env(
84 var_name: str,
85 default: float,
86 *,
87 min_value: float | None = None,
88 max_value: float | None = None,
89) -> float:
90 """Parse a float from an environment variable with bounds checking.
92 Args:
93 var_name: Environment variable name to read.
94 default: Value to use when the variable is not set.
95 min_value: Optional lower bound (inclusive).
96 max_value: Optional upper bound (inclusive).
98 Raises:
99 ValueError: If the variable is set but not a float, or out of bounds.
100 """
101 raw_value = os.getenv(var_name)
102 if raw_value is None or raw_value.strip() == "":
103 return default
104 try:
105 value = float(raw_value)
106 except (TypeError, ValueError) as exc:
107 raise ValueError(f"Invalid float for {var_name}: {raw_value!r}") from exc
108 if min_value is not None and value < min_value:
109 raise ValueError(f"{var_name} must be >= {min_value}; got {value}")
110 if max_value is not None and value > max_value:
111 raise ValueError(f"{var_name} must be <= {max_value}; got {value}")
112 return value
115class ServerConfig(BaseModel):
116 """Server configuration settings."""
118 host: str = "0.0.0.0"
119 port: int = 8000
120 log_level: str = "INFO"
123class QdrantConfig(BaseModel):
124 """Qdrant configuration settings.
126 Defaults are aligned with qdrant_loader.config.qdrant.QdrantConfig to ensure
127 consistent behavior between the loader and MCP server. The MCP server depends on
128 qdrant-loader-core (not qdrant-loader directly), so this class is kept local but
129 mirrors the same field names, types, and defaults.
130 """
132 # Aligned with qdrant_loader.config.qdrant.QdrantConfig defaults
133 url: str = "http://localhost:6333"
134 api_key: str | None = None
135 collection_name: str = "documents"
137 def __init__(self, **data):
138 """Initialize with environment variables if not provided."""
139 if "url" not in data:
140 data["url"] = os.getenv("QDRANT_URL", "http://localhost:6333")
141 if "api_key" not in data:
142 data["api_key"] = os.getenv("QDRANT_API_KEY")
143 if "collection_name" not in data:
144 data["collection_name"] = os.getenv("QDRANT_COLLECTION_NAME", "documents")
145 super().__init__(**data)
148class SearchConfig(BaseModel):
149 """Search optimization configuration settings."""
151 # Search result caching
152 cache_enabled: bool = True
153 cache_ttl: Annotated[int, Field(ge=0, le=86_400)] = 300 # 0s..24h
154 cache_max_size: Annotated[int, Field(ge=1, le=100_000)] = 500
156 # Search parameters optimization
157 hnsw_ef: Annotated[int, Field(ge=1, le=32_768)] = 128 # HNSW search parameter
158 use_exact_search: bool = False # Use exact search when needed
160 # Concurrency control: limits simultaneous search operations to prevent
161 # overwhelming the shared Qdrant client connection pool under concurrent MCP calls.
162 max_concurrent_searches: Annotated[int, Field(ge=1, le=50)] = 4
164 # Conflict detection performance controls (defaults calibrated for P95 ~8–10s)
165 conflict_limit_default: Annotated[int, Field(ge=2, le=50)] = 10
166 conflict_max_pairs_total: Annotated[int, Field(ge=1, le=200)] = 24
167 conflict_tier_caps: dict = {
168 "primary": 12,
169 "secondary": 8,
170 "tertiary": 4,
171 "fallback": 0,
172 }
173 conflict_use_llm: bool = True
174 conflict_max_llm_pairs: Annotated[int, Field(ge=0, le=10)] = 2
175 conflict_llm_model: str = "gpt-4o-mini"
176 conflict_llm_timeout_s: Annotated[float, Field(gt=0, le=60)] = 12.0
177 conflict_overall_timeout_s: Annotated[float, Field(gt=0, le=60)] = 9.0
178 conflict_text_window_chars: Annotated[int, Field(ge=200, le=8000)] = 2000
179 conflict_embeddings_timeout_s: Annotated[float, Field(gt=0, le=30)] = 2.0
180 conflict_embeddings_max_concurrency: Annotated[int, Field(ge=1, le=20)] = 5
182 def __init__(self, **data):
183 """Initialize with environment variables if not provided.
185 Performs robust boolean parsing and strict numeric validation to avoid
186 subtle runtime issues from malformed environment inputs.
187 """
188 if "cache_enabled" not in data:
189 data["cache_enabled"] = parse_bool_env("SEARCH_CACHE_ENABLED", True)
190 if "cache_ttl" not in data:
191 data["cache_ttl"] = parse_int_env(
192 "SEARCH_CACHE_TTL", 300, min_value=0, max_value=86_400
193 )
194 if "cache_max_size" not in data:
195 data["cache_max_size"] = parse_int_env(
196 "SEARCH_CACHE_MAX_SIZE", 500, min_value=1, max_value=100_000
197 )
198 if "hnsw_ef" not in data:
199 data["hnsw_ef"] = parse_int_env(
200 "SEARCH_HNSW_EF", 128, min_value=1, max_value=32_768
201 )
202 if "use_exact_search" not in data:
203 data["use_exact_search"] = parse_bool_env("SEARCH_USE_EXACT", False)
204 if "max_concurrent_searches" not in data:
205 data["max_concurrent_searches"] = parse_int_env(
206 "SEARCH_MAX_CONCURRENT", 4, min_value=1, max_value=50
207 )
209 # Conflict detection env overrides (optional; safe defaults used if unset)
210 def _get_env_dict(name: str, default: dict) -> dict:
211 raw = os.getenv(name)
212 if not raw:
213 return default
214 try:
215 parsed = json.loads(raw)
216 if isinstance(parsed, dict):
217 return parsed
218 return default
219 except (json.JSONDecodeError, ValueError) as exc:
220 # Shorten raw value to avoid logging excessively large strings
221 raw_preview = raw if len(raw) <= 200 else f"{raw[:200]}..."
222 logger.warning(
223 "Failed to parse JSON for env var %s; raw=%r; falling back to default. Error: %s",
224 name,
225 raw_preview,
226 exc,
227 exc_info=True,
228 )
229 return default
231 if "conflict_limit_default" not in data:
232 data["conflict_limit_default"] = parse_int_env(
233 "SEARCH_CONFLICT_LIMIT_DEFAULT", 10, min_value=2, max_value=50
234 )
235 if "conflict_max_pairs_total" not in data:
236 data["conflict_max_pairs_total"] = parse_int_env(
237 "SEARCH_CONFLICT_MAX_PAIRS_TOTAL", 24, min_value=1, max_value=200
238 )
239 if "conflict_tier_caps" not in data:
240 data["conflict_tier_caps"] = _get_env_dict(
241 "SEARCH_CONFLICT_TIER_CAPS",
242 {"primary": 12, "secondary": 8, "tertiary": 4, "fallback": 0},
243 )
244 if "conflict_use_llm" not in data:
245 data["conflict_use_llm"] = parse_bool_env("SEARCH_CONFLICT_USE_LLM", True)
246 if "conflict_max_llm_pairs" not in data:
247 data["conflict_max_llm_pairs"] = parse_int_env(
248 "SEARCH_CONFLICT_MAX_LLM_PAIRS", 2, min_value=0, max_value=10
249 )
250 if "conflict_llm_model" not in data:
251 data["conflict_llm_model"] = os.getenv(
252 "SEARCH_CONFLICT_LLM_MODEL", "gpt-4o-mini"
253 )
254 if "conflict_llm_timeout_s" not in data:
255 data["conflict_llm_timeout_s"] = parse_float_env(
256 "SEARCH_CONFLICT_LLM_TIMEOUT_S", 12.0, min_value=1.0, max_value=60.0
257 )
258 if "conflict_overall_timeout_s" not in data:
259 data["conflict_overall_timeout_s"] = parse_float_env(
260 "SEARCH_CONFLICT_OVERALL_TIMEOUT_S", 9.0, min_value=1.0, max_value=60.0
261 )
262 if "conflict_text_window_chars" not in data:
263 data["conflict_text_window_chars"] = parse_int_env(
264 "SEARCH_CONFLICT_TEXT_WINDOW_CHARS", 2000, min_value=200, max_value=8000
265 )
266 if "conflict_embeddings_timeout_s" not in data:
267 data["conflict_embeddings_timeout_s"] = parse_float_env(
268 "SEARCH_CONFLICT_EMBEDDINGS_TIMEOUT_S",
269 2.0,
270 min_value=1.0,
271 max_value=30.0,
272 )
273 if "conflict_embeddings_max_concurrency" not in data:
274 data["conflict_embeddings_max_concurrency"] = parse_int_env(
275 "SEARCH_CONFLICT_EMBEDDINGS_MAX_CONCURRENCY",
276 5,
277 min_value=1,
278 max_value=20,
279 )
280 super().__init__(**data)
283class OpenAIConfig(BaseModel):
284 """OpenAI configuration settings."""
286 # Optional to avoid startup crashes when OPENAI_API_KEY is not yet set;
287 # downstream callers are expected to validate presence before use.
288 api_key: str | None = None
289 model: str = "text-embedding-3-small"
290 chat_model: str = "gpt-3.5-turbo"
291 vector_size: int | None = (
292 None # From global.llm.embeddings.vector_size or migrated from legacy
293 )
296class Config(BaseModel):
297 """Main configuration class.
299 Note: QdrantConfig defaults are aligned with qdrant_loader.config.qdrant.QdrantConfig
300 to ensure consistent behavior between the loader and MCP server. The MCP server
301 cannot import from qdrant-loader directly (it only depends on qdrant-loader-core),
302 so alignment is maintained by convention rather than import.
303 """
305 server: ServerConfig = Field(default_factory=ServerConfig)
306 qdrant: QdrantConfig = Field(default_factory=QdrantConfig)
307 openai: OpenAIConfig = Field(
308 default_factory=lambda: OpenAIConfig(api_key=os.getenv("OPENAI_API_KEY"))
309 )
310 search: SearchConfig = Field(default_factory=SearchConfig)
311 # Reranking configuration (loaded from global.reranking in config.yaml)
312 reranking: MCPReranking = Field(default_factory=MCPReranking)