devops: extract composite actions for obs stack deploy steps (#603) #715
127
.gitea/actions/deploy-obs/action.yml
Normal file
127
.gitea/actions/deploy-obs/action.yml
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
name: Deploy observability stack
|
||||||
|
description: >-
|
||||||
|
Deploy observability configs + secrets to /opt/familienarchiv, validate the
|
||||||
|
compose config, start the stack, and assert the five healthchecked services
|
||||||
|
are healthy. Per-environment values arrive as inputs.
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
grafana_admin_password:
|
||||||
|
description: Grafana admin password (secret)
|
||||||
|
required: true
|
||||||
|
grafana_db_password:
|
||||||
|
description: Read-only grafana_reader DB role password (secret, issue #651)
|
||||||
|
required: true
|
||||||
|
glitchtip_secret_key:
|
||||||
|
description: GlitchTip Django secret key (secret)
|
||||||
|
required: true
|
||||||
|
postgres_password:
|
||||||
|
description: PostgreSQL password for the environment (secret)
|
||||||
|
required: true
|
||||||
|
postgres_host:
|
||||||
|
description: >-
|
||||||
|
Compose project + service hostname, e.g. archiv-staging-db-1. Derived
|
||||||
|
from the Compose project name and service name — a project rename
|
||||||
|
requires updating the caller's value. Plain input, not a secret.
|
||||||
|
required: true
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- name: Deploy observability configs
|
||||||
|
shell: bash
|
||||||
|
# Copies the compose file and config tree from the workspace checkout
|
||||||
|
# into /opt/familienarchiv/ — the permanent location that persists
|
||||||
|
# between CI runs. Containers started in the next step bind-mount
|
||||||
|
# from there, so a future workspace wipe cannot corrupt a running
|
||||||
|
# config file.
|
||||||
|
#
|
||||||
|
# obs-secrets.env is written fresh from Gitea secrets on every run so
|
||||||
|
# Gitea is always the single source of truth for secret rotation.
|
||||||
|
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
||||||
|
#
|
||||||
|
# secrets.* is NOT available inside a composite action, so the values
|
||||||
|
# arrive as inputs mapped to env: below and are referenced as $VAR in
|
||||||
|
# the heredoc. The delimiter MUST stay unquoted (<<EOF, not <<'EOF') so
|
||||||
|
# the shell expands $VAR — a quoted delimiter would write the literal
|
||||||
|
# string "$GRAFANA_ADMIN_PASSWORD" and `config --quiet` would still pass
|
||||||
|
# (the var is present, just wrong). Do not stage these into intermediate
|
||||||
|
# variables either, or Gitea log masking can be lost.
|
||||||
|
env:
|
||||||
|
GRAFANA_ADMIN_PASSWORD: ${{ inputs.grafana_admin_password }}
|
||||||
|
GRAFANA_DB_PASSWORD: ${{ inputs.grafana_db_password }}
|
||||||
|
GLITCHTIP_SECRET_KEY: ${{ inputs.glitchtip_secret_key }}
|
||||||
|
POSTGRES_PASSWORD: ${{ inputs.postgres_password }}
|
||||||
|
POSTGRES_HOST: ${{ inputs.postgres_host }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
rm -rf /opt/familienarchiv/infra/observability
|
||||||
|
mkdir -p /opt/familienarchiv/infra/observability
|
||||||
|
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
||||||
|
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||||
|
cat > /opt/familienarchiv/obs-secrets.env <<EOF
|
||||||
|
GRAFANA_ADMIN_PASSWORD=$GRAFANA_ADMIN_PASSWORD
|
||||||
|
GRAFANA_DB_PASSWORD=$GRAFANA_DB_PASSWORD
|
||||||
|
GLITCHTIP_SECRET_KEY=$GLITCHTIP_SECRET_KEY
|
||||||
|
POSTGRES_PASSWORD=$POSTGRES_PASSWORD
|
||||||
|
POSTGRES_HOST=$POSTGRES_HOST
|
||||||
|
EOF
|
||||||
|
# Five-key non-empty guard: a bare presence check matches an empty
|
||||||
|
# `KEY=` line, so assert each key has a value. Fail loudly on any
|
||||||
|
# missing/empty key rather than starting the stack with broken auth.
|
||||||
|
for key in GRAFANA_ADMIN_PASSWORD GRAFANA_DB_PASSWORD GLITCHTIP_SECRET_KEY POSTGRES_PASSWORD POSTGRES_HOST; do
|
||||||
|
grep -Eq "^${key}=.+" /opt/familienarchiv/obs-secrets.env \
|
||||||
|
|| { echo "::error::obs-secrets.env missing or empty: ${key}"; exit 1; }
|
||||||
|
done
|
||||||
|
# chmod 600 MUST be the final operation: the ordering is the security
|
||||||
|
# property — there is no window where the file is world-readable.
|
||||||
|
chmod 600 /opt/familienarchiv/obs-secrets.env
|
||||||
|
|
||||||
|
- name: Validate observability compose config
|
||||||
|
shell: bash
|
||||||
|
# Dry-run: resolves all variable substitutions and reports any missing
|
||||||
|
# required keys before containers start. Catches undefined variables and
|
||||||
|
# YAML errors in config files updated by the previous step.
|
||||||
|
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
||||||
|
# second (CI-written secrets). Later files win on duplicate keys. POSTGRES_HOST
|
||||||
|
# is environment-specific and supplied only by obs-secrets.env — obs.env
|
||||||
|
# documents it but deliberately does not set a value.
|
||||||
|
run: |
|
||||||
|
docker compose \
|
||||||
|
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||||
|
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||||
|
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||||
|
config --quiet
|
||||||
|
|
||||||
|
- name: Start observability stack
|
||||||
|
shell: bash
|
||||||
|
# Runs with absolute paths so bind mounts resolve to stable host paths
|
||||||
|
# that survive workspace wipes between runs (see ADR-016).
|
||||||
|
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
||||||
|
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
||||||
|
# obs-secrets.env second — later file wins on duplicate keys.
|
||||||
|
run: |
|
||||||
|
docker compose \
|
||||||
|
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||||
|
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
||||||
|
--env-file /opt/familienarchiv/obs-secrets.env \
|
||||||
|
up -d --wait --remove-orphans
|
||||||
|
|
||||||
|
- name: Assert observability stack health
|
||||||
|
shell: bash
|
||||||
|
# docker compose up --wait covers services WITH healthcheck directives only.
|
||||||
|
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
||||||
|
# no healthcheck — they are considered "started" as soon as the process runs.
|
||||||
|
# This step explicitly asserts the five healthchecked critical services are
|
||||||
|
# healthy before the smoke test proceeds.
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
unhealthy=""
|
||||||
|
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
||||||
|
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
||||||
|
if [ "$status" != "healthy" ]; then
|
||||||
|
echo "::error::$svc is not healthy (status: $status)"
|
||||||
|
unhealthy="$unhealthy $svc"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
[ -z "$unhealthy" ] || exit 1
|
||||||
|
echo "All critical observability services are healthy"
|
||||||
41
.gitea/actions/reload-caddy/action.yml
Normal file
41
.gitea/actions/reload-caddy/action.yml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
name: Reload Caddy
|
||||||
|
description: >-
|
||||||
|
Reload the host Caddy service from a DooD job container via a privileged
|
||||||
|
sibling container and nsenter. No inputs.
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- name: Reload Caddy
|
||||||
|
shell: bash
|
||||||
|
# Apply any committed Caddyfile changes before smoke-testing the
|
||||||
|
# public surface. Without this step, a Caddyfile edit lands in the
|
||||||
|
# repo but Caddy keeps serving the previous config until someone
|
||||||
|
# reloads it manually — the smoke test would then catch a stale
|
||||||
|
# header or a still-proxied /actuator route rather than confirming
|
||||||
|
# the current config is live.
|
||||||
|
#
|
||||||
|
# The runner executes job steps inside Docker containers (DooD).
|
||||||
|
# `systemctl` is not present in container images and cannot reach
|
||||||
|
# the host's systemd directly. We use the Docker socket (mounted
|
||||||
|
# into every job container via runner-config.yaml) to spin up a
|
||||||
|
# privileged sibling container in the host PID namespace; nsenter
|
||||||
|
# then enters the host's namespaces so systemctl talks to the real
|
||||||
|
# host systemd daemon. No sudoers entry is required — the Docker
|
||||||
|
# socket already grants root-equivalent host access.
|
||||||
|
#
|
||||||
|
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
||||||
|
# tooling, and the digest is pinned so any upstream change requires
|
||||||
|
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
||||||
|
# at run time; apk add takes ~1 s on the warm VPS cache.
|
||||||
|
#
|
||||||
|
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
||||||
|
# config in-process without dropping TLS connections. `restart`
|
||||||
|
# would briefly stop the service, losing in-flight requests.
|
||||||
|
#
|
||||||
|
# If Caddy is not running this step fails fast before the smoke test
|
||||||
|
# issues a misleading "port 443 refused" error.
|
||||||
|
run: |
|
||||||
|
docker run --rm --privileged --pid=host \
|
||||||
|
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
||||||
|
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
||||||
58
.gitea/actions/smoke-test/action.yml
Normal file
58
.gitea/actions/smoke-test/action.yml
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
name: Smoke test
|
||||||
|
description: >-
|
||||||
|
Verify the deployed public surface (login reachable, HSTS pinned,
|
||||||
|
Permissions-Policy present, /actuator blocked) against a given vhost.
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
host:
|
||||||
|
description: Public vhost to smoke-test, e.g. staging.raddatz.cloud
|
||||||
|
required: true
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- name: Smoke test deployed environment
|
||||||
|
shell: bash
|
||||||
|
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
||||||
|
# public surface works. This step catches: Caddy not reloaded, HSTS
|
||||||
|
# header dropped, /actuator block bypassed.
|
||||||
|
#
|
||||||
|
# --resolve pins the public host to the Docker bridge gateway IP
|
||||||
|
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
||||||
|
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
||||||
|
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
||||||
|
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
||||||
|
# and is therefore reachable from the container via that IP.
|
||||||
|
# SNI still uses the public hostname so the TLS cert validates correctly.
|
||||||
|
#
|
||||||
|
# --resolve is stored as a Bash array so "${RESOLVE[@]}" expands to two
|
||||||
|
# separate arguments; a quoted string would pass the flag and its value
|
||||||
|
# as one token and curl would reject it as an unknown option.
|
||||||
|
#
|
||||||
|
# Gateway detection reads /proc/net/route (always present, no package
|
||||||
|
# required) instead of `ip route` to avoid a dependency on iproute2.
|
||||||
|
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
||||||
|
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
||||||
|
env:
|
||||||
|
HOST: ${{ inputs.host }}
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
URL="https://$HOST"
|
||||||
|
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
||||||
|
[ -n "$HOST_IP" ] || { echo "::error::could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
||||||
|
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
||||||
|
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
||||||
|
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
||||||
|
# Pin the preload-list-eligible HSTS value, not just header presence:
|
||||||
|
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
||||||
|
# fail this check rather than pass it silently.
|
||||||
|
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||||
|
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
||||||
|
# Permissions-Policy denies APIs the app does not use (camera,
|
||||||
|
# microphone, geolocation). A regression that loosens or drops the
|
||||||
|
# header now fails the smoke step.
|
||||||
|
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
||||||
|
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
||||||
|
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
||||||
|
[ "$status" = "404" ] || { echo "::error::expected 404 from /actuator/health, got $status"; exit 1; }
|
||||||
|
echo "All smoke checks passed"
|
||||||
@@ -108,6 +108,32 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Assert deploy-obs writes obs-secrets.env via an unquoted heredoc (#603)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Inside a composite action, secrets arrive as $VAR from env: (secrets.*
|
||||||
|
# is unavailable there), so the obs-secrets.env heredoc MUST use an
|
||||||
|
# unquoted delimiter (<<EOF) for $VAR to expand. A quoted delimiter
|
||||||
|
# (<<'EOF') would write the literal string "$GRAFANA_ADMIN_PASSWORD",
|
||||||
|
# and the action's five-key non-empty guard would STILL pass (the line
|
||||||
|
# is present, just wrong). This guard enforces the invariant in CI so a
|
||||||
|
# future re-quote cannot ship broken obs auth green. See ADR-029 / #603.
|
||||||
|
action='.gitea/actions/deploy-obs/action.yml'
|
||||||
|
quoted='obs-secrets\.env\s*<<-?\s*[\x27\x22]'
|
||||||
|
# Self-test: the regex must catch a quoted delimiter and ignore the unquoted one.
|
||||||
|
printf "obs-secrets.env <<'EOF'\n" | grep -qP "$quoted" \
|
||||||
|
|| { echo "FAIL: guard self-test — regex missed the quoted <<'EOF' form"; exit 1; }
|
||||||
|
printf 'obs-secrets.env <<EOF\n' | grep -qvP "$quoted" \
|
||||||
|
|| { echo "FAIL: guard self-test — regex wrongly flagged the unquoted <<EOF form"; exit 1; }
|
||||||
|
# Positive: the unquoted heredoc must be present at all.
|
||||||
|
grep -qP 'obs-secrets\.env\s*<<-?EOF\b' "$action" \
|
||||||
|
|| { echo "::error::$action no longer writes obs-secrets.env via an unquoted <<EOF heredoc (ADR-029 / #603)"; exit 1; }
|
||||||
|
# Negative: never a quoted delimiter on the obs-secrets.env heredoc.
|
||||||
|
if grep -nP "$quoted" "$action"; then
|
||||||
|
echo "::error::$action writes obs-secrets.env with a quoted heredoc delimiter — secrets would be written as literal \$VAR strings. Use unquoted <<EOF (ADR-029 / #603)."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Run unit and component tests with coverage
|
- name: Run unit and component tests with coverage
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ name: nightly
|
|||||||
# - host ports: backend 8081, frontend 3001
|
# - host ports: backend 8081, frontend 3001
|
||||||
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
# - profile: staging (starts mailpit instead of a real SMTP relay)
|
||||||
#
|
#
|
||||||
|
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||||
|
# release.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||||
|
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||||
|
# only exists on disk after checkout.
|
||||||
|
#
|
||||||
# Required Gitea secrets:
|
# Required Gitea secrets:
|
||||||
# STAGING_POSTGRES_PASSWORD
|
# STAGING_POSTGRES_PASSWORD
|
||||||
# STAGING_MINIO_PASSWORD
|
# STAGING_MINIO_PASSWORD
|
||||||
@@ -55,6 +60,8 @@ jobs:
|
|||||||
# for the same repo is within that boundary.
|
# for the same repo is within that boundary.
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
# MUST be first: the composite actions below live under .gitea/actions/
|
||||||
|
# and only exist on disk once the repo is checked out (ADR-029).
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Write staging env file
|
- name: Write staging env file
|
||||||
@@ -92,6 +99,7 @@ jobs:
|
|||||||
# `compose config` renders both shorthand and longform mounts as
|
# `compose config` renders both shorthand and longform mounts as
|
||||||
# `target: /import` + `read_only: true`, so we assert against
|
# `target: /import` + `read_only: true`, so we assert against
|
||||||
# the rendered form rather than the raw source YAML.
|
# the rendered form rather than the raw source YAML.
|
||||||
|
# App-compose check (not obs), nightly-only — stays inline.
|
||||||
run: |
|
run: |
|
||||||
set -e
|
set -e
|
||||||
docker compose \
|
docker compose \
|
||||||
@@ -128,150 +136,21 @@ jobs:
|
|||||||
--profile staging \
|
--profile staging \
|
||||||
up -d --wait --remove-orphans
|
up -d --wait --remove-orphans
|
||||||
|
|
||||||
- name: Deploy observability configs
|
# POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
||||||
# Copies the compose file and config tree from the workspace checkout
|
# and service name (db). A project rename requires updating this value.
|
||||||
# into /opt/familienarchiv/ — the permanent location that persists
|
- uses: ./.gitea/actions/deploy-obs
|
||||||
# between CI runs. Containers started in the next step bind-mount
|
with:
|
||||||
# from there, so a future workspace wipe cannot corrupt a running
|
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||||
# config file.
|
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||||
#
|
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||||
# obs-secrets.env is written fresh from Gitea secrets on every run so
|
postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||||
# Gitea is always the single source of truth for secret rotation.
|
postgres_host: archiv-staging-db-1
|
||||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
|
||||||
run: |
|
|
||||||
rm -rf /opt/familienarchiv/infra/observability
|
|
||||||
mkdir -p /opt/familienarchiv/infra/observability
|
|
||||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
|
||||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
|
||||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
|
||||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
|
||||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
|
||||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
|
||||||
POSTGRES_PASSWORD=${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
|
||||||
POSTGRES_HOST=archiv-staging-db-1
|
|
||||||
EOF
|
|
||||||
# Note: POSTGRES_HOST is derived from the Compose project name (archiv-staging)
|
|
||||||
# and service name (db). A project rename requires updating this value.
|
|
||||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
|
||||||
|
|
||||||
- name: Validate observability compose config
|
- uses: ./.gitea/actions/reload-caddy
|
||||||
# Dry-run: resolves all variable substitutions and reports any missing
|
|
||||||
# required keys before containers start. Catches undefined variables and
|
|
||||||
# YAML errors in config files updated by the previous step.
|
|
||||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
|
||||||
# second (CI-written secrets). Later files win on duplicate keys, so
|
|
||||||
# obs-secrets.env overrides POSTGRES_HOST set in obs.env.
|
|
||||||
run: |
|
|
||||||
docker compose \
|
|
||||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
|
||||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
|
||||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
|
||||||
config --quiet
|
|
||||||
|
|
||||||
- name: Start observability stack
|
- uses: ./.gitea/actions/smoke-test
|
||||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
with:
|
||||||
# that survive workspace wipes between nightly runs (see ADR-016).
|
host: staging.raddatz.cloud
|
||||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
|
||||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
|
||||||
# obs-secrets.env second — later file wins on duplicate keys.
|
|
||||||
run: |
|
|
||||||
docker compose \
|
|
||||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
|
||||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
|
||||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
|
||||||
up -d --wait --remove-orphans
|
|
||||||
|
|
||||||
- name: Assert observability stack health
|
|
||||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
|
||||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
|
||||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
|
||||||
# This step explicitly asserts the five healthchecked critical services are
|
|
||||||
# healthy before the smoke test proceeds.
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
unhealthy=""
|
|
||||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
|
||||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
|
||||||
if [ "$status" != "healthy" ]; then
|
|
||||||
echo "::error::$svc is not healthy (status: $status)"
|
|
||||||
unhealthy="$unhealthy $svc"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
[ -z "$unhealthy" ] || exit 1
|
|
||||||
echo "All critical observability services are healthy"
|
|
||||||
|
|
||||||
- name: Reload Caddy
|
|
||||||
# Apply any committed Caddyfile changes before smoke-testing the
|
|
||||||
# public surface. Without this step, a Caddyfile edit lands in the
|
|
||||||
# repo but Caddy keeps serving the previous config until someone
|
|
||||||
# reloads it manually — the smoke test would then catch a stale
|
|
||||||
# header or a still-proxied /actuator route rather than confirming
|
|
||||||
# the current config is live.
|
|
||||||
#
|
|
||||||
# The runner executes job steps inside Docker containers (DooD).
|
|
||||||
# `systemctl` is not present in container images and cannot reach
|
|
||||||
# the host's systemd directly. We use the Docker socket (mounted
|
|
||||||
# into every job container via runner-config.yaml) to spin up a
|
|
||||||
# privileged sibling container in the host PID namespace; nsenter
|
|
||||||
# then enters the host's namespaces so systemctl talks to the real
|
|
||||||
# host systemd daemon. No sudoers entry is required — the Docker
|
|
||||||
# socket already grants root-equivalent host access.
|
|
||||||
#
|
|
||||||
# Alpine is used: ~5 MB vs ~70 MB for ubuntu, no unnecessary
|
|
||||||
# tooling, and the digest is pinned so any upstream change requires
|
|
||||||
# an explicit bump PR. util-linux (which ships nsenter) is installed
|
|
||||||
# at run time; apk add takes ~1 s on the warm VPS cache.
|
|
||||||
#
|
|
||||||
# `reload` not `restart`: reload sends SIGHUP so Caddy re-reads its
|
|
||||||
# config in-process without dropping TLS connections. `restart`
|
|
||||||
# would briefly stop the service, losing in-flight requests.
|
|
||||||
#
|
|
||||||
# If Caddy is not running this step fails fast before the smoke test
|
|
||||||
# issues a misleading "port 443 refused" error.
|
|
||||||
run: |
|
|
||||||
docker run --rm --privileged --pid=host \
|
|
||||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
|
||||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
|
||||||
|
|
||||||
- name: Smoke test deployed environment
|
|
||||||
# Healthchecks confirm containers are healthy; they do NOT confirm the
|
|
||||||
# public surface works. This step catches: Caddy not reloaded, HSTS
|
|
||||||
# header dropped, /actuator block bypassed.
|
|
||||||
#
|
|
||||||
# --resolve pins staging.raddatz.cloud to the Docker bridge gateway IP
|
|
||||||
# (the host) so we do NOT depend on hairpin NAT on the host router.
|
|
||||||
# 127.0.0.1 cannot be used: job containers run in bridge network mode
|
|
||||||
# (runner-config.yaml), so 127.0.0.1 is the container's loopback, not
|
|
||||||
# the host's. The bridge gateway IS the host; Caddy binds 0.0.0.0:443
|
|
||||||
# and is therefore reachable from the container via that IP.
|
|
||||||
# SNI still uses the public hostname so the TLS cert validates correctly.
|
|
||||||
#
|
|
||||||
# Gateway detection reads /proc/net/route (always present, no package
|
|
||||||
# required) instead of `ip route` to avoid a dependency on iproute2.
|
|
||||||
# Field $2=="00000000" is the default route; field $3 is the gateway as
|
|
||||||
# a little-endian 32-bit hex value which awk decodes to dotted-decimal.
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
HOST="staging.raddatz.cloud"
|
|
||||||
URL="https://$HOST"
|
|
||||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
|
||||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
|
||||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
|
||||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
|
||||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
|
||||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
|
||||||
# fail this check rather than pass it silently.
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
|
||||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
|
||||||
# Permissions-Policy denies APIs the app does not use (camera,
|
|
||||||
# microphone, geolocation). A regression that loosens or drops the
|
|
||||||
# header now fails the smoke step.
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
|
||||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
|
||||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
|
||||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
|
||||||
echo "All smoke checks passed"
|
|
||||||
|
|
||||||
- name: Cleanup env file
|
- name: Cleanup env file
|
||||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ name: release
|
|||||||
# - host ports: backend 8080, frontend 3000
|
# - host ports: backend 8080, frontend 3000
|
||||||
# - profile: (none) — mailpit is excluded; real SMTP relay is used
|
# - profile: (none) — mailpit is excluded; real SMTP relay is used
|
||||||
#
|
#
|
||||||
|
# The obs-stack deploy, Caddy reload, and smoke test are shared with
|
||||||
|
# nightly.yml via the composite actions under .gitea/actions/ (ADR-029).
|
||||||
|
# actions/checkout MUST stay the first step: a local `uses: ./…` action
|
||||||
|
# only exists on disk after checkout.
|
||||||
|
#
|
||||||
# Required Gitea secrets:
|
# Required Gitea secrets:
|
||||||
# PROD_POSTGRES_PASSWORD
|
# PROD_POSTGRES_PASSWORD
|
||||||
# PROD_MINIO_PASSWORD
|
# PROD_MINIO_PASSWORD
|
||||||
@@ -53,6 +58,8 @@ jobs:
|
|||||||
# advertised label of our single-tenant self-hosted runner.
|
# advertised label of our single-tenant self-hosted runner.
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
# MUST be first: the composite actions below live under .gitea/actions/
|
||||||
|
# and only exist on disk once the repo is checked out (ADR-029).
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Write production env file
|
- name: Write production env file
|
||||||
@@ -100,117 +107,21 @@ jobs:
|
|||||||
--env-file .env.production \
|
--env-file .env.production \
|
||||||
up -d --wait --remove-orphans
|
up -d --wait --remove-orphans
|
||||||
|
|
||||||
- name: Deploy observability configs
|
# POSTGRES_HOST is derived from the Compose project name (archiv-production)
|
||||||
# Mirrors the nightly approach: copies obs compose file and config tree
|
# and service name (db). A project rename requires updating this value.
|
||||||
# to /opt/familienarchiv/ (permanent path, survives workspace wipes — ADR-016),
|
- uses: ./.gitea/actions/deploy-obs
|
||||||
# then writes obs-secrets.env fresh from Gitea secrets.
|
with:
|
||||||
# Non-secret config lives in infra/observability/obs.env (tracked in git).
|
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||||
run: |
|
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||||
rm -rf /opt/familienarchiv/infra/observability
|
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||||
mkdir -p /opt/familienarchiv/infra/observability
|
postgres_password: ${{ secrets.PROD_POSTGRES_PASSWORD }}
|
||||||
cp -r infra/observability/. /opt/familienarchiv/infra/observability/
|
postgres_host: archiv-production-db-1
|
||||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
|
||||||
cat > /opt/familienarchiv/obs-secrets.env <<'EOF'
|
|
||||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
|
||||||
GRAFANA_DB_PASSWORD=${{ secrets.GRAFANA_DB_PASSWORD }}
|
|
||||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
|
||||||
POSTGRES_PASSWORD=${{ secrets.PROD_POSTGRES_PASSWORD }}
|
|
||||||
POSTGRES_HOST=archiv-production-db-1
|
|
||||||
EOF
|
|
||||||
# Note: POSTGRES_HOST is derived from the Compose project name (archiv-production)
|
|
||||||
# and service name (db). A project rename requires updating this value.
|
|
||||||
chmod 600 /opt/familienarchiv/obs-secrets.env
|
|
||||||
|
|
||||||
- name: Validate observability compose config
|
- uses: ./.gitea/actions/reload-caddy
|
||||||
# Dry-run: resolves all variable substitutions and reports any missing
|
|
||||||
# required keys before containers start. Catches undefined variables and
|
|
||||||
# YAML errors in config files updated by the previous step.
|
|
||||||
# --env-file order: obs.env first (git-tracked defaults), obs-secrets.env
|
|
||||||
# second (CI-written secrets). Later files win on duplicate keys, so
|
|
||||||
# obs-secrets.env overrides POSTGRES_HOST set in obs.env.
|
|
||||||
# Keep in sync with the equivalent step in nightly.yml (#603).
|
|
||||||
run: |
|
|
||||||
docker compose \
|
|
||||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
|
||||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
|
||||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
|
||||||
config --quiet
|
|
||||||
|
|
||||||
- name: Start observability stack
|
- uses: ./.gitea/actions/smoke-test
|
||||||
# Runs with absolute paths so bind mounts resolve to stable host paths
|
with:
|
||||||
# that survive workspace wipes between runs (see ADR-016).
|
host: archiv.raddatz.cloud
|
||||||
# Non-secret config from obs.env (git-tracked); secrets from obs-secrets.env
|
|
||||||
# (written fresh from Gitea secrets above). --env-file order: obs.env first,
|
|
||||||
# obs-secrets.env second — later file wins on duplicate keys.
|
|
||||||
# Keep in sync with the equivalent step in nightly.yml (#603).
|
|
||||||
run: |
|
|
||||||
docker compose \
|
|
||||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
|
||||||
--env-file /opt/familienarchiv/infra/observability/obs.env \
|
|
||||||
--env-file /opt/familienarchiv/obs-secrets.env \
|
|
||||||
up -d --wait --remove-orphans
|
|
||||||
|
|
||||||
- name: Assert observability stack health
|
|
||||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
|
||||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
|
||||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
|
||||||
# This step explicitly asserts the five healthchecked critical services are
|
|
||||||
# healthy before the smoke test proceeds.
|
|
||||||
# Keep in sync with the equivalent step in nightly.yml (#603).
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
unhealthy=""
|
|
||||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo obs-glitchtip; do
|
|
||||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
|
||||||
if [ "$status" != "healthy" ]; then
|
|
||||||
echo "::error::$svc is not healthy (status: $status)"
|
|
||||||
unhealthy="$unhealthy $svc"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
[ -z "$unhealthy" ] || exit 1
|
|
||||||
echo "All critical observability services are healthy"
|
|
||||||
|
|
||||||
- name: Reload Caddy
|
|
||||||
# See nightly.yml — same rationale and mechanism: DooD job containers
|
|
||||||
# cannot call systemctl directly; nsenter via a privileged sibling
|
|
||||||
# container reaches the host systemd. Must run after deploy (so the
|
|
||||||
# latest Caddyfile is on disk) and before the smoke test (so the
|
|
||||||
# public surface reflects the current config). Alpine with pinned
|
|
||||||
# digest; reload not restart — see nightly.yml for full rationale.
|
|
||||||
run: |
|
|
||||||
docker run --rm --privileged --pid=host \
|
|
||||||
alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d \
|
|
||||||
sh -c 'apk add --no-cache util-linux -q && nsenter -t 1 -m -u -n -p -i -- /bin/systemctl reload caddy'
|
|
||||||
|
|
||||||
- name: Smoke test deployed environment
|
|
||||||
# See nightly.yml — same three checks, against the prod vhost.
|
|
||||||
# --resolve stored as a Bash array so "${RESOLVE[@]}" expands to two
|
|
||||||
# separate arguments; a quoted string would pass the flag and its value
|
|
||||||
# as one token and curl would reject it as an unknown option.
|
|
||||||
# Gateway detection via /proc/net/route — no iproute2 dependency.
|
|
||||||
# See nightly.yml for the full network topology explanation.
|
|
||||||
run: |
|
|
||||||
set -e
|
|
||||||
HOST="archiv.raddatz.cloud"
|
|
||||||
URL="https://$HOST"
|
|
||||||
HOST_IP=$(awk 'NR>1 && $2=="00000000"{h=$3;printf "%d.%d.%d.%d\n",strtonum("0x"substr(h,7,2)),strtonum("0x"substr(h,5,2)),strtonum("0x"substr(h,3,2)),strtonum("0x"substr(h,1,2));exit}' /proc/net/route)
|
|
||||||
[ -n "$HOST_IP" ] || { echo "ERROR: could not detect Docker bridge gateway via /proc/net/route"; exit 1; }
|
|
||||||
RESOLVE=(--resolve "$HOST:443:$HOST_IP")
|
|
||||||
echo "Smoke test: $URL (pinned to $HOST_IP via bridge gateway)"
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 "$URL/login" -o /dev/null
|
|
||||||
# Pin the preload-list-eligible HSTS value, not just header presence:
|
|
||||||
# a degraded `max-age=1` or a dropped `includeSubDomains; preload` must
|
|
||||||
# fail this check rather than pass it silently.
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
|
||||||
| grep -Eqi 'strict-transport-security:[[:space:]]*max-age=31536000.*includeSubDomains.*preload'
|
|
||||||
# Permissions-Policy denies APIs the app does not use (camera,
|
|
||||||
# microphone, geolocation). A regression that loosens or drops the
|
|
||||||
# header now fails the smoke step.
|
|
||||||
curl -fsS "${RESOLVE[@]}" --max-time 10 -I "$URL/" \
|
|
||||||
| grep -Eqi 'permissions-policy:[[:space:]]*camera=\(\),[[:space:]]*microphone=\(\),[[:space:]]*geolocation=\(\)'
|
|
||||||
status=$(curl -s "${RESOLVE[@]}" -o /dev/null -w "%{http_code}" --max-time 10 "$URL/actuator/health")
|
|
||||||
[ "$status" = "404" ] || { echo "expected 404 from /actuator/health, got $status"; exit 1; }
|
|
||||||
echo "All smoke checks passed"
|
|
||||||
|
|
||||||
- name: Cleanup env file
|
- name: Cleanup env file
|
||||||
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
# LOAD-BEARING: `if: always()` is the linchpin of the ADR-011
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
# ADR-029: Composite actions for cross-workflow deploy logic
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
The `nightly.yml` (staging) and `release.yml` (production) workflows shared three
|
||||||
|
blocks of deploy logic verbatim: the four observability-stack steps (deploy configs,
|
||||||
|
validate, start, assert health), the Caddy reload step, and the public-surface smoke
|
||||||
|
test. The only per-environment differences were secret names (`STAGING_*` vs `PROD_*`),
|
||||||
|
the `POSTGRES_HOST` value, and the smoke-test hostname.
|
||||||
|
|
||||||
|
This duplication was held together by `# Keep in sync with nightly.yml` comments — an
|
||||||
|
honour-system invariant. Any change (a new healthchecked service, a different rsync flag,
|
||||||
|
a new secret) had to be applied in two places, and nothing enforced that it was. Issue #603
|
||||||
|
documents a real instance: the obs secret set had grown to five keys while a refactor draft
|
||||||
|
listed only four.
|
||||||
|
|
||||||
|
### Decision drivers
|
||||||
|
|
||||||
|
1. Cross-workflow deploy logic must have a single definition, enforced — not a
|
||||||
|
discipline-based "keep in sync" promise.
|
||||||
|
2. Per-environment variation must be expressed as explicit, typed inputs, not by forking
|
||||||
|
the whole step block.
|
||||||
|
3. The mechanism must work on the existing single-tenant self-hosted Gitea runner with no
|
||||||
|
new infrastructure.
|
||||||
|
|
||||||
|
### Alternatives considered
|
||||||
|
|
||||||
|
**A: Reusable workflow (`workflow_call`)** — Gitea supports called workflows. Rejected for
|
||||||
|
this case: reusable workflows run as a separate job with their own runner context, which
|
||||||
|
breaks the in-job, sequential `deploy → reload → smoke` ordering these steps rely on and
|
||||||
|
complicates passing the already-checked-out workspace. Composite actions run inline in the
|
||||||
|
calling job, preserving step order and the workspace.
|
||||||
|
|
||||||
|
**B: Shared shell script invoked from both workflows** — e.g. `scripts/deploy-obs.sh`.
|
||||||
|
Rejected: loses the typed-input contract and per-step CI log sections, and reintroduces
|
||||||
|
manual argument threading that is as error-prone as the duplication it replaces.
|
||||||
|
|
||||||
|
**C: Keep the `# Keep in sync` comments** — status quo. Rejected: unenforced; issue #603
|
||||||
|
is direct evidence it fails.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
Extract the shared logic into three single-responsibility Gitea composite actions under
|
||||||
|
`.gitea/actions/`: `deploy-obs` (five inputs), `reload-caddy` (no inputs), and `smoke-test`
|
||||||
|
(`host` input). Both workflows invoke each via a single `uses: ./.gitea/actions/<name>` call,
|
||||||
|
passing per-environment values as `with:` inputs. This is the repository's first composite
|
||||||
|
action and sets the convention; `docs/infrastructure/ci-gitea.md` documents it.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
**Positive:**
|
||||||
|
- Shared deploy logic has one enforced definition; a change lands once and both
|
||||||
|
environments get it. The `# Keep in sync` comments are deleted.
|
||||||
|
- Per-environment variation is a typed input contract, not a forked block.
|
||||||
|
- Runs inline on the existing runner — no reusable-workflow job context, no new
|
||||||
|
infrastructure.
|
||||||
|
|
||||||
|
**Negative / constraints:**
|
||||||
|
- Workflows now depend on a checked-out `.gitea/actions/` tree: `actions/checkout` MUST run
|
||||||
|
before the first `uses: ./…` (a local action does not exist on disk until checkout).
|
||||||
|
- Secrets cannot be read from the `secrets.*` context inside a composite action; they must
|
||||||
|
be passed as inputs and mapped to `env:`. The `obs-secrets.env` heredoc therefore uses an
|
||||||
|
unquoted delimiter so `$VAR` expands at the shell layer.
|
||||||
|
- The `reload-caddy` pinned alpine digest now lives in the action, not the workflow file —
|
||||||
|
it must be added to Renovate's watch list so it does not go stale.
|
||||||
@@ -68,6 +68,8 @@ Job containers are unprivileged and do not share the host's PID/mount/network na
|
|||||||
|
|
||||||
Alpine is used instead of Ubuntu: ~5 MB vs ~70 MB, and the digest is pinned to a specific sha256 so any upstream change requires an explicit Renovate bump PR. `util-linux` (which ships `nsenter`) is not part of the Alpine base image but is installed at run time in ~1 s from the warm VPS cache.
|
Alpine is used instead of Ubuntu: ~5 MB vs ~70 MB, and the digest is pinned to a specific sha256 so any upstream change requires an explicit Renovate bump PR. `util-linux` (which ships `nsenter`) is not part of the Alpine base image but is installed at run time in ~1 s from the warm VPS cache.
|
||||||
|
|
||||||
|
This exact step now lives in the `reload-caddy` composite action (see [Composite actions](#composite-actions) below); both deploy workflows call it via `uses: ./.gitea/actions/reload-caddy`. The pinned digest moved with it, so Renovate's privileged-digest watch covers `.gitea/actions/**` as well as `.gitea/workflows/**`.
|
||||||
|
|
||||||
#### Why not `sudo systemctl` in the job container?
|
#### Why not `sudo systemctl` in the job container?
|
||||||
|
|
||||||
Job containers run as root inside an unprivileged Docker namespace. There is no systemd PID 1 inside the container — `systemctl` would attempt to reach a socket that does not exist. `sudo` is not present in container images and would not help even if it were.
|
Job containers run as root inside an unprivileged Docker namespace. There is no systemd PID 1 inside the container — `systemctl` would attempt to reach a socket that does not exist. `sudo` is not present in container images and would not help even if it were.
|
||||||
@@ -170,6 +172,72 @@ See `docs/DEPLOYMENT.md §3.1` and ADR-015 for the full setup rationale.
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Composite actions
|
||||||
|
|
||||||
|
The `nightly.yml` (staging) and `release.yml` (production) deploy workflows share their observability-stack deploy, Caddy reload, and smoke-test logic through three single-responsibility composite actions under `.gitea/actions/` (ADR-029). Before this, the shared logic was duplicated in both workflows and held together by `# Keep in sync with nightly.yml` comments — an unenforced honour-system invariant.
|
||||||
|
|
||||||
|
| Action | Inputs | Purpose |
|
||||||
|
|---|---|---|
|
||||||
|
| `deploy-obs` | `grafana_admin_password`, `grafana_db_password`, `glitchtip_secret_key`, `postgres_password`, `postgres_host` | Deploy obs configs + secrets to `/opt/familienarchiv`, validate the compose config, start the stack, assert the five healthchecked services |
|
||||||
|
| `reload-caddy` | — | Reload host Caddy via the privileged-sibling + nsenter pattern |
|
||||||
|
| `smoke-test` | `host` | Verify the public surface (login reachable, HSTS pinned, Permissions-Policy present, `/actuator → 404`) |
|
||||||
|
|
||||||
|
A workflow calls them by relative path, passing per-environment values as `with:` inputs:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- uses: ./.gitea/actions/deploy-obs
|
||||||
|
with:
|
||||||
|
grafana_admin_password: ${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||||
|
grafana_db_password: ${{ secrets.GRAFANA_DB_PASSWORD }}
|
||||||
|
glitchtip_secret_key: ${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||||
|
postgres_password: ${{ secrets.STAGING_POSTGRES_PASSWORD }}
|
||||||
|
postgres_host: archiv-staging-db-1
|
||||||
|
- uses: ./.gitea/actions/reload-caddy
|
||||||
|
- uses: ./.gitea/actions/smoke-test
|
||||||
|
with:
|
||||||
|
host: staging.raddatz.cloud
|
||||||
|
```
|
||||||
|
|
||||||
|
### Checkout-first ordering rule
|
||||||
|
|
||||||
|
A local composite action (`uses: ./…`) only exists on disk **after** the repo is checked out. `actions/checkout@v4` MUST therefore be the **first step** of any job that calls one — if a future reorder moves checkout later, every `uses: ./.gitea/actions/…` call fails because the action file is not yet on disk. Both deploy workflows pin checkout as step 1 for exactly this reason.
|
||||||
|
|
||||||
|
### Secrets inside composite actions
|
||||||
|
|
||||||
|
The `secrets.*` context is **not** available inside a composite action. Secrets are passed in as `inputs`, mapped to an `env:` block, and referenced as `$VAR`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
inputs:
|
||||||
|
grafana_admin_password:
|
||||||
|
required: true # no default — a missing secret must fail loudly, never fall back to empty
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- shell: bash # composite steps do NOT default the shell — always declare it
|
||||||
|
env:
|
||||||
|
GRAFANA_ADMIN_PASSWORD: ${{ inputs.grafana_admin_password }}
|
||||||
|
run: |
|
||||||
|
cat > obs-secrets.env <<EOF # unquoted EOF — $VAR expands at the shell layer
|
||||||
|
GRAFANA_ADMIN_PASSWORD=$GRAFANA_ADMIN_PASSWORD
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
Two load-bearing details:
|
||||||
|
|
||||||
|
- **Unquoted heredoc delimiter (`<<EOF`, not `<<'EOF'`).** With a quoted delimiter the shell writes the literal string `$GRAFANA_ADMIN_PASSWORD`, and `docker compose config --quiet` still passes (the variable is *present, just wrong*). The `deploy-obs` action guards against this with a five-key **non-empty** check (`grep -Eq "^KEY=.+"`) immediately after writing `obs-secrets.env`. `chmod 600` is the action's final operation so the file is never world-readable.
|
||||||
|
- **Every `run:` step declares `shell: bash`.** Composite actions do not inherit the workflow's default shell; a step without it fails to run.
|
||||||
|
|
||||||
|
### Adding an input to an action
|
||||||
|
|
||||||
|
To thread a new per-environment value (e.g. a new secret) through `deploy-obs`:
|
||||||
|
|
||||||
|
1. Add it under `inputs:` in `.gitea/actions/deploy-obs/action.yml` with `required: true` and **no `default:`**.
|
||||||
|
2. Map it in the relevant step's `env:` block: `NEW_KEY: ${{ inputs.new_key }}`.
|
||||||
|
3. Reference it as `$NEW_KEY` in the `run:` script — add a `NEW_KEY=$NEW_KEY` line to the heredoc **and** a matching entry to the five-key guard loop.
|
||||||
|
4. Pass it from **both** workflows' `with:` blocks. That is the whole point of the action: the contract lives in one place, so neither environment can silently drift.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Gitea vs GitHub Actions Differences
|
## Gitea vs GitHub Actions Differences
|
||||||
|
|
||||||
### Context Variable Names
|
### Context Variable Names
|
||||||
|
|||||||
@@ -14,8 +14,8 @@
|
|||||||
"automerge": false
|
"automerge": false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"description": "Digest bumps for images used in privileged CI steps (--privileged --pid=host) must be reviewed manually — a compromised image has root-equivalent host access.",
|
"description": "Digest bumps for images used in privileged CI steps (--privileged --pid=host) must be reviewed manually — a compromised image has root-equivalent host access. Covers .gitea/actions/** too: the reload-caddy alpine digest now lives in a composite action (#603).",
|
||||||
"matchPaths": [".gitea/workflows/**"],
|
"matchPaths": [".gitea/workflows/**", ".gitea/actions/**"],
|
||||||
"matchUpdateTypes": ["digest"],
|
"matchUpdateTypes": ["digest"],
|
||||||
"automerge": false,
|
"automerge": false,
|
||||||
"reviewersFromCodeOwners": false
|
"reviewersFromCodeOwners": false
|
||||||
|
|||||||
Reference in New Issue
Block a user