Coverage for src/qdrant_loader/webhooks/server.py: 77%

1from __future__ import annotations

3import asyncio

4import os

5import time

6from contextlib import asynccontextmanager

7from typing import Any

9from fastapi import Depends, FastAPI, HTTPException, Query, Request, status

10from fastapi.responses import JSONResponse

12from qdrant_loader.utils.logging import LoggingConfig

13from qdrant_loader.webhooks.auth import (

14 WEBHOOK_AUTH_NOT_CONFIGURED_MESSAGE,

15 get_client_ip,

16 verify_cognito_token,

17 verify_ingest_auth,

18 verify_webhook_token,

19 webhook_auth_configured,

20)

21from qdrant_loader.webhooks.handlers import (

22 INGEST_SUPPORTED_SOURCE_TYPES,

23 SUPPORTED_SOURCE_TYPES,

24 enqueue_ingest_request,

25 enqueue_webhook_event,

26 normalize_source_type,

27)

28from qdrant_loader.webhooks.queue_backend import QueueBackendManager

29from qdrant_loader.webhooks.worker import run_webhook_worker

31logger = LoggingConfig.get_logger(__name__)

33WEBHOOK_RATE_LIMIT_WINDOW_SECONDS = int(

34 os.getenv("WEBHOOK_RATE_LIMIT_WINDOW_SECONDS", "60")

35)

36WEBHOOK_RATE_LIMIT_REQUESTS_PER_WINDOW = int(

37 os.getenv("WEBHOOK_RATE_LIMIT_REQUESTS_PER_WINDOW", "10")

38)

40# In-memory rate limit state (process-local).

41#

42# IMPORTANT: This is NOT a substitute for infrastructure-level rate limiting.

43#

44# LIMITATION: Each uvicorn worker, container, or ECS task has its own dict instance.

45# Multiple instances → each enforces limits independently → effective limit = N × configured.

46# Example: 2 workers with limit=10 → actual limit ≈ 20 req/min.

47#

48# DESIGN INTENT (WS-6): Primary rate-limiting should live at the ALB/WAF layer:

49# "rate-limit per IP to 1000 req/min". This app-level check is a safety net.

50#

51# SECURITY NOTE: get_client_ip() relies on X-Forwarded-For header when running

52# behind a trusted proxy. In untrusted environments, set WEBHOOK_TRUSTED_PROXY_IPS

53# or similar to validate the header origin.

54_request_timestamps: dict[str, list[float]] = {}

57@asynccontextmanager

58async def _lifespan(app: FastAPI):

59 if not webhook_auth_configured():

60 raise RuntimeError(WEBHOOK_AUTH_NOT_CONFIGURED_MESSAGE)

62 await QueueBackendManager.initialize()

63 stop_event = asyncio.Event()

64 worker_task = asyncio.create_task(run_webhook_worker(stop_event))

65 app.state.worker_stop_event = stop_event

66 app.state.worker_task = worker_task

68 logger.info(

69 "Webhook server startup",

70 rate_limit_window_seconds=WEBHOOK_RATE_LIMIT_WINDOW_SECONDS,

71 rate_limit_requests=WEBHOOK_RATE_LIMIT_REQUESTS_PER_WINDOW,

72 )

73 try:

74 yield

75 finally:

76 stop_event.set()

77 worker_task.cancel()

78 try:

79 await worker_task

80 except asyncio.CancelledError:

81 pass

82 await QueueBackendManager.shutdown()

83 logger.info("Webhook server shutdown")

86app = FastAPI(

87 title="QDrant Loader Webhook Server",

88 version="1.0.0",

89 description=(

90 "Receives connector webhooks and direct /ingest API requests; "

91 "enqueues durable ingestion jobs."

92 ),

93 lifespan=_lifespan,

94)

97def _cleanup_old_timestamps(client_key: str) -> None:

98 now = time.monotonic()

99 window = WEBHOOK_RATE_LIMIT_WINDOW_SECONDS

100 timestamps = _request_timestamps.get(client_key, [])

101 _request_timestamps[client_key] = [t for t in timestamps if now - t <= window]

102

103

104def _enforce_rate_limit(request: Request) -> None:

105 """Check rate limit for this request and reject if exceeded.

106

107 This is a process-local safety net. For true DDoS protection, rely on ALB/WAF.

108 See _request_timestamps docstring for design details.

109 """

110 client_key = get_client_ip(request)

111 _cleanup_old_timestamps(client_key)

112 timestamps = _request_timestamps.setdefault(client_key, [])

113 if len(timestamps) >= WEBHOOK_RATE_LIMIT_REQUESTS_PER_WINDOW:

114 logger.warning(

115 "Rate limit exceeded for webhook client",

116 client=client_key,

117 request_count=len(timestamps),

118 window_seconds=WEBHOOK_RATE_LIMIT_WINDOW_SECONDS,

119 )

120 raise HTTPException(

121 status_code=status.HTTP_429_TOO_MANY_REQUESTS,

122 detail="Rate limit exceeded. Try again later.",

123 )

124 timestamps.append(time.monotonic())

125

126

127async def _parse_json_request(request: Request) -> object:

128 try:

129 return await request.json()

130 except Exception as exc:

131 logger.error("Invalid webhook payload", error=str(exc))

132 raise HTTPException(

133 status_code=status.HTTP_400_BAD_REQUEST,

134 detail="Webhook body must be valid JSON.",

135 ) from exc

136

137

138async def _handle_webhook(

139 project_id: str | None,

140 source_type: str,

141 source: str,

142 request: Request,

143 force: bool = False,

144) -> JSONResponse:

145 # Check rate limit BEFORE parsing to avoid wasting CPU on flooded requests

146 _enforce_rate_limit(request)

147

148 payload = await _parse_json_request(request)

149

150 try:

151 normalized_source_type = normalize_source_type(source_type)

152 except ValueError as exc:

153 raise HTTPException(

154 status_code=status.HTTP_400_BAD_REQUEST,

155 detail=str(exc),

156 ) from exc

157

158 try:

159 result = await enqueue_webhook_event(

160 project_id,

161 normalized_source_type,

162 source,

163 payload,

164 force,

165 )

166 except Exception as exc:

167 logger.exception("Failed to enqueue webhook event", error=str(exc))

168 raise HTTPException(

169 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,

170 detail="Failed to enqueue webhook event.",

171 ) from exc

172

173 return JSONResponse(

174 status_code=status.HTTP_202_ACCEPTED,

175 content={

176 "status": "accepted",

177 "project_id": project_id,

178 "source_type": normalized_source_type,

179 "source": source,

180 "force": force,

181 "queued": True,

182 **result,

183 },

184 )

185

186

187@app.get("/health")

188async def health_check() -> dict[str, object]:

189 """Simple readiness endpoint (no auth required)."""

190 return {

191 "status": "healthy",

192 "supported_source_types": sorted(SUPPORTED_SOURCE_TYPES),

193 "ingest_source_types": sorted(INGEST_SUPPORTED_SOURCE_TYPES),

194 "rate_limit": {

195 "window_seconds": WEBHOOK_RATE_LIMIT_WINDOW_SECONDS,

196 "max_requests": WEBHOOK_RATE_LIMIT_REQUESTS_PER_WINDOW,

197 },

198 "queue": "sqlite",

199 }

200

201

202@app.get("/healthz")

203async def healthz() -> dict[str, object]:

204 """Kubernetes-compliant health check endpoint.

205

206 Returns 200 OK if the webhook server process is running.

207 No probing of dependencies (see /readyz for that).

208 """

209 return {"status": "ok"}

210

211

212@app.get("/readyz")

213async def readyz() -> dict[str, object]:

214 """Kubernetes-compliant readiness check endpoint.

215

216 Returns 200 OK if the server is ready to accept requests:

217 - Worker process is running

218 - Queue backend is initialized

219

220 Note: Currently does not probe Qdrant or DB connectivity;

221 those would be probed by ALB/WAF health checks or monitored separately.

222 """

223 worker_task = getattr(app.state, "worker_task", None)

224

225 if worker_task is None:

226 raise HTTPException(

227 status_code=status.HTTP_503_SERVICE_UNAVAILABLE,

228 detail="Worker task not initialized",

229 )

230

231 if worker_task.done():

232 # If task is done, check if it errored

233 try:

234 worker_task.result()

235 except Exception as err:

236 logger.exception("Worker task failed readiness check", error=str(err))

237 raise HTTPException(

238 status_code=status.HTTP_503_SERVICE_UNAVAILABLE,

239 detail="Worker task failed.",

240 ) from err

241

242 return {"status": "ready"}

243

244

245@app.get("/status")

246async def status_route(

247 claims: dict[str, Any] = Depends(verify_cognito_token),

248) -> dict[str, object]:

249 """Authenticated status endpoint for application clients (Cognito when enabled)."""

250 return {

251 "status": "ok",

252 "subject": claims.get("sub"),

253 }

254

255

256@app.post("/ingest")

257async def ingest_route(

258 request: Request,

259 project_id: str | None = Query(None),

260 source_type: str | None = Query(None),

261 source: str | None = Query(None),

262 force: bool = False,

263 _auth: None = Depends(verify_ingest_auth),

264) -> JSONResponse:

265 """Trigger ingestion via API (equivalent to `qdrant-loader ingest`).

266

267 Query parameters mirror the ingest CLI flags. The job is enqueued and

268 processed asynchronously by the background worker.

269 """

270 _enforce_rate_limit(request)

271

272 try:

273 result = await enqueue_ingest_request(

274 project_id=project_id,

275 source_type=source_type,

276 source=source,

277 force=force,

278 )

279 except ValueError as exc:

280 raise HTTPException(

281 status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,

282 detail=str(exc),

283 ) from exc

284 except Exception as exc:

285 logger.exception("Failed to enqueue ingest request", error=str(exc))

286 raise HTTPException(

287 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,

288 detail="Failed to enqueue ingest request.",

289 ) from exc

290

291 return JSONResponse(

292 status_code=status.HTTP_202_ACCEPTED,

293 content={

294 "status": "accepted",

295 "project_id": project_id,

296 "source_type": source_type,

297 "source": source,

298 "force": force,

299 "queued": True,

300 **result,

301 },

302 )

303

304

305@app.post("/webhooks/projects/{project_id}/{source_type}/{source}")

306async def webhook_project_route(

307 project_id: str,

308 source_type: str,

309 source: str,

310 request: Request,

311 force: bool = False,

312 _auth: None = Depends(verify_webhook_token),

313) -> JSONResponse:

314 """Receive a webhook for a specific project source."""

315 return await _handle_webhook(project_id, source_type, source, request, force)

316

317

318@app.post("/webhooks/{source_type}/{source}")

319async def webhook_source_route(

320 source_type: str,

321 source: str,

322 request: Request,

323 force: bool = False,

324 _auth: None = Depends(verify_webhook_token),

325) -> JSONResponse:

326 """Receive a webhook for a source across all configured projects."""

327 return await _handle_webhook(None, source_type, source, request, force)