Add OpenTelemetry instrumentation with distributed tracing and metrics: - Structured JSON logging with trace context correlation - Auto-instrumentation for FastAPI, asyncpg, httpx, redis - OTLP exporter for traces and Prometheus metrics endpoint Implement Celery worker and notification task system: - Celery app with Redis/SQS broker support and configurable queues - Notification tasks for incident fan-out, webhooks, and escalations - Pluggable TaskQueue abstraction with in-memory driver for testing Add Grafana observability stack (Loki, Tempo, Prometheus, Grafana): - OpenTelemetry Collector for receiving OTLP traces and logs - Tempo for distributed tracing backend - Loki for log aggregation with Promtail DaemonSet - Prometheus for metrics scraping with RBAC configuration - Grafana with pre-provisioned datasources and API overview dashboard - Helm templates for all observability components Enhance application infrastructure: - Global exception handlers with structured ErrorResponse schema - Request logging middleware with timing metrics - Health check updated to verify task queue connectivity - Non-root user in Dockerfile for security - Init containers in Helm deployments for dependency ordering - Production Helm values with autoscaling and retention policies
59 lines
1.5 KiB
TOML
59 lines
1.5 KiB
TOML
[project]
|
|
name = "incidentops"
|
|
version = "0.1.0"
|
|
description = "Incident management API with multi-tenant org support"
|
|
readme = "README.md"
|
|
requires-python = ">=3.14"
|
|
dependencies = [
|
|
"fastapi>=0.115.0",
|
|
"uvicorn[standard]>=0.32.0",
|
|
"asyncpg>=0.30.0",
|
|
"pydantic[email]>=2.0.0",
|
|
"pydantic-settings>=2.0.0",
|
|
"python-jose[cryptography]>=3.3.0",
|
|
"bcrypt>=4.0.0",
|
|
"celery[redis]>=5.4.0",
|
|
"redis>=5.0.0",
|
|
"httpx>=0.28.0",
|
|
# OpenTelemetry
|
|
"opentelemetry-api>=1.27.0",
|
|
"opentelemetry-sdk>=1.27.0",
|
|
"opentelemetry-exporter-otlp>=1.27.0",
|
|
"opentelemetry-exporter-prometheus>=0.48b0",
|
|
"opentelemetry-instrumentation-fastapi>=0.48b0",
|
|
"opentelemetry-instrumentation-asyncpg>=0.48b0",
|
|
"opentelemetry-instrumentation-httpx>=0.48b0",
|
|
"opentelemetry-instrumentation-redis>=0.48b0",
|
|
"opentelemetry-instrumentation-logging>=0.48b0",
|
|
"opentelemetry-instrumentation-system-metrics>=0.48b0",
|
|
"prometheus-client>=0.20.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
"pytest>=8.0.0",
|
|
"pytest-asyncio>=0.24.0",
|
|
"ruff>=0.8.0",
|
|
]
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["app", "migrations", "worker"]
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py314"
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E", "F", "I", "N", "W", "UP"]
|
|
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"tests/**/*.py" = ["E501"] # Allow longer lines in tests for descriptive method names
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
testpaths = ["tests"]
|