Files
incidentops/docker-compose.yml

231 lines
6.1 KiB
YAML
Raw Normal View History

version: "3.8"
services:
postgres:
image: postgres:16-alpine
container_name: incidentops-postgres
environment:
POSTGRES_USER: incidentops
POSTGRES_PASSWORD: incidentops
POSTGRES_DB: incidentops
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U incidentops"]
interval: 10s
timeout: 5s
retries: 5
# For Celery broker
redis:
image: redis:7-alpine
container_name: incidentops-redis
ports:
- "6379:6379"
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
# api services
api:
build:
context: .
dockerfile: Dockerfile
target: api
container_name: incidentops-api
ports:
- "8000:8000"
- "9464:9464" # Prometheus metrics
environment:
DATABASE_URL: postgresql://incidentops:incidentops@postgres:5432/incidentops
REDIS_URL: redis://redis:6379/0
JWT_SECRET_KEY: dev-secret-key-change-in-production
JWT_ALGORITHM: HS256
ACCESS_TOKEN_EXPIRE_MINUTES: 30
REFRESH_TOKEN_EXPIRE_DAYS: 30
# OpenTelemetry
OTEL_ENABLED: "true"
OTEL_SERVICE_NAME: incidentops-api
OTEL_ENVIRONMENT: development
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_INSECURE: "true"
OTEL_LOG_LEVEL: INFO
# Metrics
PROMETHEUS_PORT: "9464"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
otel-collector:
condition: service_started
prometheus:
condition: service_started
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/healthz"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
# Worker service (Celery)
worker:
build:
context: .
dockerfile: Dockerfile
target: worker
container_name: incidentops-worker
environment:
DATABASE_URL: postgresql://incidentops:incidentops@postgres:5432/incidentops
REDIS_URL: redis://redis:6379/0
CELERY_BROKER_URL: redis://redis:6379/0
CELERY_RESULT_BACKEND: redis://redis:6379/1
# OpenTelemetry
OTEL_ENABLED: "true"
OTEL_SERVICE_NAME: incidentops-worker
OTEL_ENVIRONMENT: development
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
OTEL_EXPORTER_OTLP_INSECURE: "true"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
# Web frontend (Next.js)
web:
build:
context: .
dockerfile: Dockerfile.web
container_name: incidentops-web
ports:
- "3000:3000"
environment:
NEXT_PUBLIC_API_URL: http://localhost:8000
depends_on:
- api
# Database migrations (run once)
migrate:
build:
context: .
dockerfile: Dockerfile
target: api
container_name: incidentops-migrate
command: python migrations/migrate.py apply
environment:
DATABASE_URL: postgresql://incidentops:incidentops@postgres:5432/incidentops
depends_on:
postgres:
condition: service_healthy
profiles:
- migrate
# Flower for Celery monitoring (dev only)
flower:
image: mher/flower:2.0
container_name: incidentops-flower
ports:
- "5555:5555"
environment:
CELERY_BROKER_URL: redis://redis:6379/0
FLOWER_BASIC_AUTH: admin:admin
depends_on:
- redis
profiles:
- monitoring
# ============================================
# Observability Stack
# ============================================
# OpenTelemetry Collector - receives traces/logs from apps
otel-collector:
image: otel/opentelemetry-collector-contrib:0.96.0
container_name: incidentops-otel-collector
command: ["--config=/etc/otel-collector/config.yaml"]
volumes:
- ./observability/otel-collector/config.yaml:/etc/otel-collector/config.yaml:ro
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
depends_on:
- tempo
- loki
# Tempo - distributed tracing backend
tempo:
image: grafana/tempo:2.4.1
container_name: incidentops-tempo
command: ["-config.file=/etc/tempo/config.yaml"]
volumes:
- ./observability/tempo/config.yaml:/etc/tempo/config.yaml:ro
- tempo_data:/var/tempo
ports:
- "3200:3200" # Tempo HTTP
- "4320:4317" # Tempo OTLP gRPC (different host port to avoid conflict)
# Loki - log aggregation
loki:
image: grafana/loki:2.9.6
container_name: incidentops-loki
command: ["-config.file=/etc/loki/config.yaml"]
volumes:
- ./observability/loki/config.yaml:/etc/loki/config.yaml:ro
- loki_data:/loki
ports:
- "3100:3100" # Loki HTTP
# Prometheus - metrics storage
prometheus:
image: prom/prometheus:v2.51.0
container_name: incidentops-prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--web.enable-lifecycle"
volumes:
- ./observability/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
ports:
- "9090:9090" # Prometheus UI
# Grafana - visualization
grafana:
image: grafana/grafana:10.4.1
container_name: incidentops-grafana
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: admin
GF_USERS_ALLOW_SIGN_UP: "false"
GF_EXPLORE_ENABLED: "true"
GF_FEATURE_TOGGLES_ENABLE: traceqlEditor tempoSearch tempoBackendSearch tempoApmTable
volumes:
- ./observability/grafana/provisioning:/etc/grafana/provisioning:ro
- ./observability/grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana_data:/var/lib/grafana
ports:
- "3001:3000" # Grafana UI (3001 to avoid conflict with web frontend)
depends_on:
- tempo
- loki
- prometheus
volumes:
postgres_data:
redis_data:
tempo_data:
loki_data:
prometheus_data:
grafana_data:
networks:
default:
name: incidentops-network