Files
incidentops/pyproject.toml
minhtrannhat 46ede7757d feat: add observability stack and background task infrastructure
Add OpenTelemetry instrumentation with distributed tracing and metrics:
- Structured JSON logging with trace context correlation
- Auto-instrumentation for FastAPI, asyncpg, httpx, redis
- OTLP exporter for traces and Prometheus metrics endpoint

Implement Celery worker and notification task system:
- Celery app with Redis/SQS broker support and configurable queues
- Notification tasks for incident fan-out, webhooks, and escalations
- Pluggable TaskQueue abstraction with in-memory driver for testing

Add Grafana observability stack (Loki, Tempo, Prometheus, Grafana):
- OpenTelemetry Collector for receiving OTLP traces and logs
- Tempo for distributed tracing backend
- Loki for log aggregation with Promtail DaemonSet
- Prometheus for metrics scraping with RBAC configuration
- Grafana with pre-provisioned datasources and API overview dashboard
- Helm templates for all observability components

Enhance application infrastructure:
- Global exception handlers with structured ErrorResponse schema
- Request logging middleware with timing metrics
- Health check updated to verify task queue connectivity
- Non-root user in Dockerfile for security
- Init containers in Helm deployments for dependency ordering
- Production Helm values with autoscaling and retention policies
2026-01-07 20:51:13 -05:00

59 lines
1.5 KiB
TOML

[project]
name = "incidentops"
version = "0.1.0"
description = "Incident management API with multi-tenant org support"
readme = "README.md"
requires-python = ">=3.14"
dependencies = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.32.0",
"asyncpg>=0.30.0",
"pydantic[email]>=2.0.0",
"pydantic-settings>=2.0.0",
"python-jose[cryptography]>=3.3.0",
"bcrypt>=4.0.0",
"celery[redis]>=5.4.0",
"redis>=5.0.0",
"httpx>=0.28.0",
# OpenTelemetry
"opentelemetry-api>=1.27.0",
"opentelemetry-sdk>=1.27.0",
"opentelemetry-exporter-otlp>=1.27.0",
"opentelemetry-exporter-prometheus>=0.48b0",
"opentelemetry-instrumentation-fastapi>=0.48b0",
"opentelemetry-instrumentation-asyncpg>=0.48b0",
"opentelemetry-instrumentation-httpx>=0.48b0",
"opentelemetry-instrumentation-redis>=0.48b0",
"opentelemetry-instrumentation-logging>=0.48b0",
"opentelemetry-instrumentation-system-metrics>=0.48b0",
"prometheus-client>=0.20.0",
]
[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
"ruff>=0.8.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["app", "migrations", "worker"]
[tool.ruff]
line-length = 100
target-version = "py314"
[tool.ruff.lint]
select = ["E", "F", "I", "N", "W", "UP"]
[tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = ["E501"] # Allow longer lines in tests for descriptive method names
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]