Compare commits
37 Commits
fix/issue-
...
79735e23e0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
79735e23e0 | ||
|
|
df37113d38 | ||
|
|
c7d2eeb3f0 | ||
|
|
4e94d85d7e | ||
|
|
dec6b8139b | ||
|
|
7b7d0c92a8 | ||
|
|
448c3cdcdb | ||
|
|
7e52494880 | ||
|
|
1181b97f94 | ||
|
|
458968ded5 | ||
|
|
23515b8542 | ||
|
|
e4ac5f08e7 | ||
|
|
15ef079eff | ||
|
|
56c3e51657 | ||
|
|
2cc8b1174b | ||
|
|
1fc47888d5 | ||
|
|
d435b2b0e4 | ||
|
|
fed427dc4a | ||
|
|
cf78ab2f8e | ||
|
|
c8883d0e40 | ||
|
|
7154092547 | ||
|
|
ada3a3ccaf | ||
|
|
8cf3a2a726 | ||
|
|
553e2f8898 | ||
|
|
4a7349543a | ||
|
|
f15e004645 | ||
|
|
b137e3e72d | ||
|
|
4c8a23ff14 | ||
|
|
d7d225af77 | ||
|
|
4358997482 | ||
|
|
7c2e75facc | ||
|
|
7b05b9d5a0 | ||
|
|
20edc0474c | ||
|
|
fa191b5c05 | ||
|
|
2139d600f5 | ||
|
|
68e4ff4121 | ||
|
|
0a1d709c5f |
15
.env.example
15
.env.example
@@ -29,16 +29,17 @@ OCR_TRAINING_TOKEN=change-me-in-production
|
||||
# --- Observability ---
|
||||
# Optional stack — start with: docker compose -f docker-compose.observability.yml up -d
|
||||
# Requires the main stack to already be running (docker compose up -d creates archiv-net).
|
||||
# In production the stack is managed from /opt/familienarchiv/ (see docs/DEPLOYMENT.md §4).
|
||||
|
||||
# Ports for host access
|
||||
PORT_GRAFANA=3001
|
||||
PORT_GRAFANA=3003
|
||||
PORT_GLITCHTIP=3002
|
||||
PORT_PROMETHEUS=9090
|
||||
|
||||
# Grafana admin password — change this before exposing Grafana beyond localhost
|
||||
GRAFANA_ADMIN_PASSWORD=changeme
|
||||
|
||||
# GlitchTip domain — production: use https://grafana.raddatz.cloud (must match Caddy vhost)
|
||||
# GlitchTip domain — production: use https://glitchtip.archiv.raddatz.cloud (must match Caddy vhost)
|
||||
GLITCHTIP_DOMAIN=http://localhost:3002
|
||||
|
||||
# GlitchTip secret key — Django SECRET_KEY equivalent, used to sign sessions and tokens.
|
||||
@@ -47,9 +48,19 @@ GLITCHTIP_DOMAIN=http://localhost:3002
|
||||
# Generate with: python3 -c "import secrets; print(secrets.token_hex(50))"
|
||||
GLITCHTIP_SECRET_KEY=changeme-generate-a-real-secret
|
||||
|
||||
# PostgreSQL hostname for GlitchTip's db-init job and workers.
|
||||
# Override when only the staging stack is running (container name differs from archive-db).
|
||||
# Default (archive-db) is correct for production with the full stack up.
|
||||
POSTGRES_HOST=archive-db
|
||||
|
||||
# $$ escaping note: passwords in /opt/familienarchiv/.env that contain a literal '$' must
|
||||
# use '$$' so Docker Compose does not expand them as variable references.
|
||||
# Example: a password 'p@$$word' should be written as 'p@$$$$word' in the .env file.
|
||||
|
||||
# Error reporting DSNs — leave empty to disable the SDK (safe default).
|
||||
# SENTRY_DSN: backend (Spring Boot) — used by the GlitchTip/Sentry Java SDK
|
||||
SENTRY_DSN=
|
||||
SENTRY_TRACES_SAMPLE_RATE=
|
||||
# VITE_SENTRY_DSN: frontend (SvelteKit) — injected at build time via Vite
|
||||
VITE_SENTRY_DSN=
|
||||
# Sentry/GlitchTip auth token for source map upload at build time (optional)
|
||||
|
||||
@@ -305,6 +305,7 @@ jobs:
|
||||
MAIL_PORT=1025
|
||||
APP_MAIL_FROM=noreply@local
|
||||
IMPORT_HOST_DIR=/tmp/dummy-import
|
||||
COMPOSE_NETWORK_NAME=test-idem-archiv-net
|
||||
EOF
|
||||
|
||||
- name: Bring up minio
|
||||
|
||||
@@ -30,6 +30,9 @@ name: nightly
|
||||
# STAGING_OCR_TRAINING_TOKEN
|
||||
# STAGING_APP_ADMIN_USERNAME
|
||||
# STAGING_APP_ADMIN_PASSWORD
|
||||
# GRAFANA_ADMIN_PASSWORD
|
||||
# GLITCHTIP_SECRET_KEY
|
||||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -74,6 +77,14 @@ jobs:
|
||||
MAIL_STARTTLS_ENABLE=false
|
||||
APP_MAIL_FROM=noreply@staging.raddatz.cloud
|
||||
IMPORT_HOST_DIR=/srv/familienarchiv-staging/import
|
||||
POSTGRES_USER=archiv
|
||||
PORT_GRAFANA=3003
|
||||
PORT_GLITCHTIP=3002
|
||||
PORT_PROMETHEUS=9090
|
||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud
|
||||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
EOF
|
||||
|
||||
- name: Verify backend /import:ro mount is wired
|
||||
@@ -120,6 +131,56 @@ jobs:
|
||||
--profile staging \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Deploy observability configs
|
||||
# Copies the compose file and config tree from the workspace checkout
|
||||
# into /opt/familienarchiv/ — the permanent location that persists
|
||||
# between CI runs. Containers started in the next step bind-mount
|
||||
# from there, so a future workspace wipe cannot corrupt a running
|
||||
# config file. Secrets are read from /opt/familienarchiv/.env (managed
|
||||
# separately on the server; not written or deleted by CI).
|
||||
run: |
|
||||
mkdir -p /opt/familienarchiv/infra
|
||||
cp -r infra/observability /opt/familienarchiv/infra/
|
||||
cp docker-compose.observability.yml /opt/familienarchiv/
|
||||
|
||||
- name: Validate observability compose config
|
||||
# Dry-run: resolves all variable substitutions from /opt/familienarchiv/.env
|
||||
# and reports any missing required keys before containers start. Catches
|
||||
# truncated passwords (missing $$ escaping), undefined variables, and YAML
|
||||
# errors in config files updated by the previous step.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
config --quiet
|
||||
|
||||
- name: Start observability stack
|
||||
# Runs from /opt/familienarchiv/ so bind mounts resolve to stable
|
||||
# host paths that survive workspace wipes between nightly runs.
|
||||
# Docker Compose reads /opt/familienarchiv/.env automatically.
|
||||
run: |
|
||||
docker compose \
|
||||
-f /opt/familienarchiv/docker-compose.observability.yml \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Assert observability stack health
|
||||
# docker compose up --wait covers services WITH healthcheck directives only.
|
||||
# obs-promtail, obs-cadvisor, obs-node-exporter, and obs-glitchtip-worker have
|
||||
# no healthcheck — they are considered "started" as soon as the process runs.
|
||||
# This step explicitly asserts the four healthchecked critical services are
|
||||
# healthy before the smoke test proceeds.
|
||||
run: |
|
||||
set -e
|
||||
unhealthy=""
|
||||
for svc in obs-loki obs-prometheus obs-grafana obs-tempo; do
|
||||
status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "missing")
|
||||
if [ "$status" != "healthy" ]; then
|
||||
echo "::error::$svc is not healthy (status: $status)"
|
||||
unhealthy="$unhealthy $svc"
|
||||
fi
|
||||
done
|
||||
[ -z "$unhealthy" ] || exit 1
|
||||
echo "All critical observability services are healthy"
|
||||
|
||||
- name: Reload Caddy
|
||||
# Apply any committed Caddyfile changes before smoke-testing the
|
||||
# public surface. Without this step, a Caddyfile edit lands in the
|
||||
|
||||
@@ -34,6 +34,9 @@ name: release
|
||||
# MAIL_PORT
|
||||
# MAIL_USERNAME
|
||||
# MAIL_PASSWORD
|
||||
# GRAFANA_ADMIN_PASSWORD
|
||||
# GLITCHTIP_SECRET_KEY
|
||||
# SENTRY_DSN (set after GlitchTip first-run; empty = Sentry disabled)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -72,6 +75,14 @@ jobs:
|
||||
MAIL_STARTTLS_ENABLE=true
|
||||
APP_MAIL_FROM=noreply@raddatz.cloud
|
||||
IMPORT_HOST_DIR=/srv/familienarchiv-production/import
|
||||
POSTGRES_USER=archiv
|
||||
PORT_GRAFANA=3003
|
||||
PORT_GLITCHTIP=3002
|
||||
PORT_PROMETHEUS=9090
|
||||
GRAFANA_ADMIN_PASSWORD=${{ secrets.GRAFANA_ADMIN_PASSWORD }}
|
||||
GLITCHTIP_SECRET_KEY=${{ secrets.GLITCHTIP_SECRET_KEY }}
|
||||
GLITCHTIP_DOMAIN=https://glitchtip.archiv.raddatz.cloud
|
||||
SENTRY_DSN=${{ secrets.SENTRY_DSN }}
|
||||
EOF
|
||||
|
||||
- name: Build images
|
||||
@@ -93,6 +104,13 @@ jobs:
|
||||
--env-file .env.production \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Start observability stack
|
||||
run: |
|
||||
docker compose \
|
||||
-f docker-compose.observability.yml \
|
||||
--env-file .env.production \
|
||||
up -d --wait --remove-orphans
|
||||
|
||||
- name: Reload Caddy
|
||||
# See nightly.yml — same rationale and mechanism: DooD job containers
|
||||
# cannot call systemctl directly; nsenter via a privileged sibling
|
||||
|
||||
@@ -29,6 +29,20 @@
|
||||
<properties>
|
||||
<java.version>21</java.version>
|
||||
</properties>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<!-- opentelemetry-spring-boot-starter:2.27.0 was built against opentelemetry-api:1.61.0,
|
||||
but Spring Boot 4.0.0 BOM only manages 1.55.0 (missing GlobalOpenTelemetry.getOrNoop()).
|
||||
Import the core OTel BOM here to override it before the Spring Boot BOM applies. -->
|
||||
<dependency>
|
||||
<groupId>io.opentelemetry</groupId>
|
||||
<artifactId>opentelemetry-bom</artifactId>
|
||||
<version>1.61.0</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
@@ -224,6 +238,15 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Sentry error reporting (GlitchTip-compatible) — sentry-spring-boot-4 is the
|
||||
Spring Boot 4 / Spring Framework 7 compatible module (replaces the jakarta starter
|
||||
which crashes with SF7 due to bean-name generation for triply-nested @Import classes) -->
|
||||
<dependency>
|
||||
<groupId>io.sentry</groupId>
|
||||
<artifactId>sentry-spring-boot-4</artifactId>
|
||||
<version>8.41.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package org.raddatz.familienarchiv.exception;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import io.sentry.Sentry;
|
||||
import jakarta.validation.ConstraintViolationException;
|
||||
import org.raddatz.familienarchiv.exception.DomainException;
|
||||
import org.raddatz.familienarchiv.exception.ErrorCode;
|
||||
@@ -63,6 +64,7 @@ public class GlobalExceptionHandler {
|
||||
|
||||
@ExceptionHandler(Exception.class)
|
||||
public ResponseEntity<ErrorResponse> handleGeneric(Exception ex) {
|
||||
Sentry.captureException(ex);
|
||||
log.error("Unhandled exception", ex);
|
||||
return ResponseEntity.internalServerError()
|
||||
.body(new ErrorResponse(ErrorCode.INTERNAL_ERROR, "An unexpected error occurred"));
|
||||
|
||||
@@ -118,3 +118,12 @@ ocr:
|
||||
sender-model:
|
||||
activation-threshold: 100
|
||||
retrain-delta: 50
|
||||
|
||||
sentry:
|
||||
dsn: ${SENTRY_DSN:}
|
||||
environment: ${SPRING_PROFILES_ACTIVE:dev}
|
||||
traces-sample-rate: ${SENTRY_TRACES_SAMPLE_RATE:1.0}
|
||||
send-default-pii: false
|
||||
enable-tracing: true
|
||||
ignored-exceptions-for-type:
|
||||
- org.raddatz.familienarchiv.exception.DomainException
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
package org.raddatz.familienarchiv;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.testcontainers.service.connection.ServiceConnection;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.testcontainers.containers.PostgreSQLContainer;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@@ -17,9 +21,18 @@ class ApplicationContextTest {
|
||||
@MockitoBean
|
||||
S3Client s3Client;
|
||||
|
||||
@Autowired
|
||||
ApplicationContext ctx;
|
||||
|
||||
@Test
|
||||
void contextLoads() {
|
||||
// verifies that the Spring context starts successfully with all beans wired,
|
||||
// Flyway migrations applied, and no configuration errors
|
||||
}
|
||||
|
||||
@Test
|
||||
void sentry_is_disabled_when_no_dsn_is_configured() {
|
||||
// application-test.yaml has no sentry.dsn — SDK must stay inactive so tests are clean
|
||||
assertThat(io.sentry.Sentry.isEnabled()).isFalse();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
package org.raddatz.familienarchiv.audit;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.raddatz.familienarchiv.PostgresContainerConfig;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.support.TransactionTemplate;
|
||||
@@ -18,7 +18,6 @@ import static org.awaitility.Awaitility.await;
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
|
||||
class AuditServiceIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
@@ -26,6 +25,11 @@ class AuditServiceIntegrationTest {
|
||||
@Autowired AuditLogRepository auditLogRepository;
|
||||
@Autowired TransactionTemplate transactionTemplate;
|
||||
|
||||
@BeforeEach
|
||||
void resetAuditLog() {
|
||||
auditLogRepository.deleteAll();
|
||||
}
|
||||
|
||||
@Test
|
||||
void logAfterCommit_writes_ANNOTATION_CREATED_row_after_transaction_commits() {
|
||||
transactionTemplate.execute(status -> {
|
||||
|
||||
@@ -12,9 +12,9 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.time.LocalDate;
|
||||
@@ -33,7 +33,7 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
|
||||
@Transactional
|
||||
class DocumentSearchPagedIntegrationTest {
|
||||
|
||||
private static final int FIXTURE_SIZE = 120;
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
package org.raddatz.familienarchiv.exception;
|
||||
|
||||
import io.sentry.Sentry;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.InjectMocks;
|
||||
import org.mockito.MockedStatic;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.mockStatic;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class GlobalExceptionHandlerTest {
|
||||
|
||||
@InjectMocks
|
||||
private GlobalExceptionHandler handler;
|
||||
|
||||
@Test
|
||||
void handleGeneric_captures_exception_in_sentry_and_returns_500() {
|
||||
RuntimeException ex = new RuntimeException("unexpected failure");
|
||||
|
||||
try (MockedStatic<Sentry> sentryMock = mockStatic(Sentry.class)) {
|
||||
ResponseEntity<GlobalExceptionHandler.ErrorResponse> response = handler.handleGeneric(ex);
|
||||
|
||||
sentryMock.verify(() -> Sentry.captureException(ex));
|
||||
assertThat(response.getStatusCode().value()).isEqualTo(500);
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
assertThat(response.getBody().code()).isEqualTo(ErrorCode.INTERNAL_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,9 +19,9 @@ import org.springframework.context.annotation.Import;
|
||||
import org.springframework.security.authentication.UsernamePasswordAuthenticationToken;
|
||||
import org.springframework.security.core.authority.SimpleGrantedAuthority;
|
||||
import org.springframework.security.core.context.SecurityContextHolder;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import java.util.List;
|
||||
@@ -32,7 +32,7 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
|
||||
@Transactional
|
||||
class GeschichteServiceIntegrationTest {
|
||||
|
||||
@MockitoBean
|
||||
|
||||
@@ -8,9 +8,9 @@ import org.raddatz.familienarchiv.person.PersonRepository;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
import org.springframework.test.context.ActiveProfiles;
|
||||
import org.springframework.test.context.bean.override.mockito.MockitoBean;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import software.amazon.awssdk.services.s3.S3Client;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
@@ -18,7 +18,7 @@ import static org.assertj.core.api.Assertions.assertThat;
|
||||
@SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.NONE)
|
||||
@ActiveProfiles("test")
|
||||
@Import(PostgresContainerConfig.class)
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.AFTER_EACH_TEST_METHOD)
|
||||
@Transactional
|
||||
class PersonServiceIntegrationTest {
|
||||
|
||||
@MockitoBean S3Client s3Client;
|
||||
|
||||
@@ -142,7 +142,7 @@ services:
|
||||
container_name: obs-grafana
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:${PORT_GRAFANA:-3001}:3000"
|
||||
- "127.0.0.1:${PORT_GRAFANA:-3003}:3000"
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
@@ -184,7 +184,7 @@ services:
|
||||
- obs-net
|
||||
|
||||
obs-glitchtip:
|
||||
image: glitchtip/glitchtip:v4
|
||||
image: glitchtip/glitchtip:6.1.6
|
||||
container_name: obs-glitchtip
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
@@ -193,7 +193,7 @@ services:
|
||||
obs-glitchtip-db-init:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@archive-db:5432/glitchtip
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST:-archive-db}:5432/glitchtip
|
||||
REDIS_URL: redis://obs-redis:6379/0
|
||||
SECRET_KEY: ${GLITCHTIP_SECRET_KEY}
|
||||
GLITCHTIP_DOMAIN: ${GLITCHTIP_DOMAIN:-http://localhost:3002}
|
||||
@@ -207,7 +207,7 @@ services:
|
||||
- obs-net
|
||||
|
||||
obs-glitchtip-worker:
|
||||
image: glitchtip/glitchtip:v4
|
||||
image: glitchtip/glitchtip:6.1.6
|
||||
container_name: obs-glitchtip-worker
|
||||
restart: unless-stopped
|
||||
command: ./bin/run-celery-with-beat.sh
|
||||
@@ -215,7 +215,7 @@ services:
|
||||
obs-redis:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@archive-db:5432/glitchtip
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST:-archive-db}:5432/glitchtip
|
||||
REDIS_URL: redis://obs-redis:6379/0
|
||||
SECRET_KEY: ${GLITCHTIP_SECRET_KEY}
|
||||
networks:
|
||||
@@ -229,10 +229,10 @@ services:
|
||||
environment:
|
||||
PGPASSWORD: ${POSTGRES_PASSWORD}
|
||||
command: >
|
||||
sh -c "psql -h archive-db -U ${POSTGRES_USER} -tc
|
||||
sh -c "psql -h ${POSTGRES_HOST:-archive-db} -U ${POSTGRES_USER} -tc
|
||||
\"SELECT 1 FROM pg_database WHERE datname = 'glitchtip'\" |
|
||||
grep -q 1 ||
|
||||
psql -h archive-db -U ${POSTGRES_USER} -c \"CREATE DATABASE glitchtip;\""
|
||||
psql -h ${POSTGRES_HOST:-archive-db} -U ${POSTGRES_USER} -c \"CREATE DATABASE glitchtip;\""
|
||||
networks:
|
||||
- archiv-net
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
networks:
|
||||
archiv-net:
|
||||
driver: bridge
|
||||
name: ${COMPOSE_NETWORK_NAME:-archiv-net}
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
@@ -212,10 +213,11 @@ services:
|
||||
APP_MAIL_FROM: ${APP_MAIL_FROM:-noreply@raddatz.cloud}
|
||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_AUTH: ${MAIL_SMTP_AUTH:-true}
|
||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-true}
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo:4317
|
||||
networks:
|
||||
- archiv-net
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8080/actuator/health | grep -q UP || exit 1"]
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8081/actuator/health | grep -q UP || exit 1"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
@@ -147,6 +147,8 @@ services:
|
||||
SPRING_MAIL_PROPERTIES_MAIL_SMTP_STARTTLS_ENABLE: ${MAIL_STARTTLS_ENABLE:-false}
|
||||
APP_OCR_BASE_URL: http://ocr-service:8000
|
||||
APP_OCR_TRAINING_TOKEN: "${OCR_TRAINING_TOKEN:-}"
|
||||
SENTRY_DSN: ${SENTRY_DSN:-}
|
||||
SENTRY_TRACES_SAMPLE_RATE: ${SENTRY_TRACES_SAMPLE_RATE:-1.0}
|
||||
# Observability: send traces to Tempo inside archiv-net (OTLP gRPC port 4317)
|
||||
# Tempo is defined in docker-compose.observability.yml (future issue).
|
||||
# OTLP failures are non-fatal — backend starts cleanly without the observability stack.
|
||||
|
||||
@@ -43,7 +43,7 @@ graph TD
|
||||
- SSE notifications transit Caddy (browser → Caddy → backend); the backend is never reachable directly from the public internet. The SvelteKit SSR layer is bypassed for SSE, but Caddy is not.
|
||||
- The Caddyfile responds `404` on `/actuator/*` (defense in depth). Internal monitoring scrapes the backend on the docker network, not through Caddy.
|
||||
- Production and staging cohabit on the same host via docker compose project names: `archiv-production` (ports 8080/3000) and `archiv-staging` (ports 8081/3001).
|
||||
- An optional observability stack (Prometheus, Node Exporter, cAdvisor) runs as a separate compose file: `docker compose -f docker-compose.observability.yml up -d`. It joins `archiv-net` and scrapes the backend's management port (`:8081`). Configuration lives under `infra/observability/`.
|
||||
- An optional observability stack (Prometheus, Node Exporter, cAdvisor, Loki, Tempo, Grafana, GlitchTip) runs as a separate compose file. Configuration lives under `infra/observability/`. In production and CI, the stack is managed from `/opt/familienarchiv/` (CI copies it there on every nightly run) so bind mounts survive workspace wipes — see §4 for the ops procedure.
|
||||
|
||||
### OCR memory requirements
|
||||
|
||||
@@ -142,7 +142,8 @@ All vars are set in `.env` at the repo root (copy from `.env.example`). The back
|
||||
| Variable | Purpose | Default | Required? | Sensitive? |
|
||||
|---|---|---|---|---|
|
||||
| `PORT_PROMETHEUS` | Host port for the Prometheus UI (bound to `127.0.0.1` only) | `9090` | — | — |
|
||||
| `PORT_GRAFANA` | Host port for the Grafana UI (bound to `127.0.0.1` only) | `3001` | — | — |
|
||||
| `PORT_GRAFANA` | Host port for the Grafana UI (bound to `127.0.0.1` only) | `3003` | — | — |
|
||||
| `POSTGRES_HOST` | PostgreSQL hostname for GlitchTip's db-init job and workers. Override when only the staging stack is running and `archive-db` is not resolvable by that name. | `archive-db` | — | — |
|
||||
| `GRAFANA_ADMIN_PASSWORD` | Grafana `admin` user password | `changeme` | YES (prod) | YES |
|
||||
| `PORT_GLITCHTIP` | Host port for the GlitchTip UI (bound to `127.0.0.1` only) | `3002` | — | — |
|
||||
| `GLITCHTIP_DOMAIN` | Public-facing base URL for GlitchTip (used in email links and CORS) | `http://localhost:3002` | YES (prod) | — |
|
||||
@@ -193,6 +194,27 @@ curl -fsSL https://tailscale.com/install.sh | sh && tailscale up
|
||||
# files to disk during execution (cleaned up unconditionally on completion).
|
||||
# A multi-tenant runner would need to switch to stdin-piped env files.
|
||||
# (See https://docs.gitea.com/usage/actions/quickstart for the register step.)
|
||||
|
||||
# Runner workspace directory — required for DooD bind-mount resolution (ADR-015).
|
||||
# act_runner stores job workspaces here so that docker compose bind mounts resolve
|
||||
# to real host paths. The path must be identical on the host and inside job containers.
|
||||
mkdir -p /srv/gitea-workspace
|
||||
# Also add this volume line to the runner service in ~/docker/gitea/compose.yaml:
|
||||
# volumes:
|
||||
# - /srv/gitea-workspace:/srv/gitea-workspace
|
||||
# See runner-config.yaml (workdir_parent + valid_volumes + options) and ADR-015.
|
||||
|
||||
# Observability config permanent directory — the nightly CI job copies
|
||||
# docker-compose.observability.yml and infra/observability/ here on every run.
|
||||
# The obs stack is always started from this path, not from the workspace.
|
||||
# See ADR-016 for why this directory is used instead of a server-pull approach.
|
||||
mkdir -p /opt/familienarchiv/infra
|
||||
|
||||
# ⚠ IMPORTANT: after any change to runner-config.yaml (valid_volumes, options, workdir_parent),
|
||||
# restart the Gitea Act runner on the host for the new config to take effect:
|
||||
# systemctl restart gitea-runner
|
||||
# Until restarted, job containers are spawned with the old config and any new bind mounts
|
||||
# (e.g. /opt/familienarchiv) will not be available inside job steps.
|
||||
```
|
||||
|
||||
### 3.2 DNS records
|
||||
@@ -223,6 +245,9 @@ git.raddatz.cloud A <server IP>
|
||||
| `MAIL_PORT` | release.yml | typically `587` |
|
||||
| `MAIL_USERNAME` | release.yml | SMTP user |
|
||||
| `MAIL_PASSWORD` | release.yml | SMTP password |
|
||||
| `GRAFANA_ADMIN_PASSWORD` | both | Grafana `admin` login — generate a strong password |
|
||||
| `GLITCHTIP_SECRET_KEY` | both | Django secret key — `openssl rand -hex 32` |
|
||||
| `SENTRY_DSN` | both | GlitchTip project DSN — set after first-run (§4); leave empty to keep Sentry disabled |
|
||||
|
||||
### 3.4 First deploy
|
||||
|
||||
@@ -272,13 +297,43 @@ docker compose logs --tail=200 <service>
|
||||
|
||||
### Observability stack
|
||||
|
||||
An observability stack is available via `docker-compose.observability.yml`. Configuration lives under `infra/observability/`. Start it after the main stack is up (which creates `archiv-net`):
|
||||
An observability stack is available via `docker-compose.observability.yml`. Configuration lives under `infra/observability/`.
|
||||
|
||||
#### Dev — start from the workspace
|
||||
|
||||
```bash
|
||||
docker compose up -d # creates archiv-net
|
||||
docker compose -f docker-compose.observability.yml up -d
|
||||
```
|
||||
|
||||
#### Production — managed from `/opt/familienarchiv/`
|
||||
|
||||
The nightly CI job copies `docker-compose.observability.yml` and `infra/observability/` to `/opt/familienarchiv/` on every run, then starts the stack from there. Bind mounts in the compose file resolve to `/opt/familienarchiv/infra/observability/…` on the host, which survives workspace wipes between CI runs (see ADR-016).
|
||||
|
||||
The obs stack reads secrets from `/opt/familienarchiv/.env` (Docker Compose auto-reads this file when launched from that directory). This file is managed by the operator — CI does **not** write or delete it.
|
||||
|
||||
**Required keys in `/opt/familienarchiv/.env`:**
|
||||
|
||||
| Key | Example / notes |
|
||||
|---|---|
|
||||
| `GRAFANA_ADMIN_PASSWORD` | Strong unique password |
|
||||
| `GLITCHTIP_SECRET_KEY` | `python3 -c "import secrets; print(secrets.token_hex(32))"` |
|
||||
| `GLITCHTIP_DOMAIN` | `https://glitchtip.archiv.raddatz.cloud` — must match the Caddy vhost |
|
||||
| `POSTGRES_USER` | Must match the `archiv` user set in `.env.staging` / `.env.production` |
|
||||
| `POSTGRES_PASSWORD` | Must match the running PostgreSQL container's password |
|
||||
| `PORT_GRAFANA` | `3003` (staging default; 3001 was used by staging frontend) |
|
||||
| `PORT_GLITCHTIP` | `3002` |
|
||||
| `PORT_PROMETHEUS` | `9090` |
|
||||
| `SENTRY_DSN` | Set after GlitchTip first-run; leave empty to disable |
|
||||
|
||||
**`$$` escaping rule:** passwords that contain a literal `$` must use `$$` in this file so Docker Compose does not expand them as variable references. Example: a password `p@$word` must be written as `p@$$word`. Failure to escape produces a silently truncated password — Grafana or GlitchTip will start but reject logins.
|
||||
|
||||
To start or restart the obs stack manually on the server:
|
||||
|
||||
```bash
|
||||
docker compose -f /opt/familienarchiv/docker-compose.observability.yml up -d --wait --remove-orphans
|
||||
```
|
||||
|
||||
Current services:
|
||||
|
||||
| Service | Image | Purpose |
|
||||
@@ -299,7 +354,7 @@ Current services:
|
||||
|
||||
| Item | Value |
|
||||
|---|---|
|
||||
| URL | `http://localhost:3001` (or `http://localhost:$PORT_GRAFANA`) |
|
||||
| URL | `http://localhost:3003` (or `http://localhost:$PORT_GRAFANA`) |
|
||||
| Username | `admin` |
|
||||
| Password | `$GRAFANA_ADMIN_PASSWORD` (default: `changeme` — **change before exposing to a network**) |
|
||||
|
||||
@@ -329,7 +384,7 @@ docker exec obs-loki wget -qO- \
|
||||
|
||||
**Prefer `compose_service` over `container_name` in LogQL queries** — `container_name` differs between dev (`archive-backend`) and prod (`archiv-production-backend-1`), while `compose_service` is stable (`backend`, `db`, `minio`, etc.).
|
||||
|
||||
Prometheus port `9090` and Grafana port `3001` are bound to `127.0.0.1` on the host. No other observability ports are host-bound.
|
||||
Prometheus port `9090` and Grafana port `3003` (default; configurable via `PORT_GRAFANA`) are bound to `127.0.0.1` on the host. No other observability ports are host-bound.
|
||||
|
||||
#### GlitchTip
|
||||
|
||||
|
||||
69
docs/adr/015-dood-workspace-bind-mount.md
Normal file
69
docs/adr/015-dood-workspace-bind-mount.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# ADR-015: DooD workspace bind mount for Compose file bind-mount resolution
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
The deploy workflows (`.gitea/workflows/nightly.yml`, `release.yml`) run job steps inside Docker containers via Docker-out-of-Docker (DooD): the Gitea runner mounts the host Docker socket, and act_runner spawns sibling containers for each job.
|
||||
|
||||
When a job step calls `docker compose -f docker-compose.observability.yml up`, Docker Compose resolves relative bind-mount sources against `$(pwd)` inside the job container and passes the resulting absolute paths to the **host** daemon. For example, `./infra/observability/prometheus/prometheus.yml` becomes `/some/path/infra/observability/prometheus/prometheus.yml`, and the host daemon tries to bind-mount that path from the **host filesystem**.
|
||||
|
||||
In the default DooD setup (`runner-config.yaml` with only `valid_volumes: ["/var/run/docker.sock"]`), job container workspaces live in the act_runner overlay2 layer. The host has no corresponding directory at the job container's `$(pwd)` path, so the daemon auto-creates an empty directory in its place. The container then fails to start because the mount target was expected to be a file, not a directory:
|
||||
|
||||
```
|
||||
error mounting "…/prometheus/prometheus.yml" to rootfs at "/etc/prometheus/prometheus.yml": not a directory
|
||||
```
|
||||
|
||||
This affected all five config file bind mounts in `docker-compose.observability.yml`.
|
||||
|
||||
## Decision
|
||||
|
||||
Configure act_runner to store job workspaces on a real host path (`/srv/gitea-workspace`) and mount that path into both the runner container and every job container at the **same absolute path**. The identity of the host path and container path is the key constraint: Compose resolves to an absolute path and hands it to the host daemon, which looks for that exact path on the host filesystem.
|
||||
|
||||
**runner-config.yaml changes:**
|
||||
|
||||
```yaml
|
||||
container:
|
||||
workdir_parent: /srv/gitea-workspace
|
||||
valid_volumes:
|
||||
- "/var/run/docker.sock"
|
||||
- "/srv/gitea-workspace"
|
||||
options: "-v /srv/gitea-workspace:/srv/gitea-workspace"
|
||||
```
|
||||
|
||||
**Runner compose.yaml change** (host side — not in this repo):
|
||||
|
||||
```yaml
|
||||
runner:
|
||||
volumes:
|
||||
- /srv/gitea-workspace:/srv/gitea-workspace
|
||||
```
|
||||
|
||||
With this in place, `$(pwd)` inside a job container resolves to `/srv/gitea-workspace/<owner>/<repo>/`, which is a real directory on the host. Compose-managed bind mounts from that directory work without any additional steps.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
| Alternative | Why rejected |
|
||||
|---|---|
|
||||
| **overlay2 `MergedDir` sync via privileged nsenter** (the previous approach, see PR #599 v1) | Required `--privileged --pid=host` (effective root on the host) plus fragile overlay2 driver assumption. Introduced stale-file risk on the host and a second stable path (`/srv/familienarchiv-*/obs-configs`) to maintain separately from the source tree. Replaced by this ADR. |
|
||||
| **Build configs into a dedicated Docker image** (pattern used for MinIO bootstrap, see `infra/minio/Dockerfile`) | Viable for static files that change infrequently. Requires a build step and an image rebuild every time a config changes. Appropriate for bootstrap scripts; too heavy for frequently-tuned observability configs. |
|
||||
| **Add workspace directory to runner-config `valid_volumes` only** (without `workdir_parent`) | `valid_volumes` whitelists paths that workflow steps may reference, but does not change where act_runner stores workspaces. Without `workdir_parent`, the workspace would still be in overlay2 and the bind-mount resolution problem would remain. |
|
||||
| **Map workspace under a different host path than container path** (e.g. host `/srv/workspace`, container `/workspace`) | Compose resolves to the container-internal path (e.g. `/workspace/…`) and passes that to the host daemon. The host daemon interprets the source as a host path. If host `/workspace` does not exist, the daemon creates an empty directory — the original bug. The paths must be identical. |
|
||||
|
||||
## Consequences
|
||||
|
||||
- `/srv/gitea-workspace` must exist on the VPS before the runner starts. The directory was created as part of this change; it is not created automatically.
|
||||
- The runner container's `compose.yaml` (maintained outside this repo at `~/docker/gitea/compose.yaml` on the VPS) must include the `- /srv/gitea-workspace:/srv/gitea-workspace` volume line. This is an out-of-band operational dependency; the prerequisite is documented in `runner-config.yaml`.
|
||||
- `workdir_parent` applies to all jobs on this runner. Any future workflow that calls `docker compose` with relative bind mounts benefits automatically without further configuration.
|
||||
- Job workspaces persist across runs under `/srv/gitea-workspace`. act_runner manages per-run subdirectory cleanup. Orphaned directories from interrupted runs should be cleaned up manually if disk space becomes a concern.
|
||||
- Workflows that previously relied on `OBS_CONFIG_DIR` env var or the `obs-configs` stable path on the host no longer need those. Both were removed in this PR.
|
||||
- This pattern does **not** apply to the `nsenter`-based Caddy reload step (ADR-012), which manages a host systemd service — a different problem class with no bind-mount equivalent.
|
||||
|
||||
## References
|
||||
|
||||
- ADR-011 — single-tenant runner trust model
|
||||
- ADR-012 — nsenter via privileged container for host service management
|
||||
- Issue #598 — original observability stack bind-mount failure
|
||||
- `runner-config.yaml` — `workdir_parent`, `valid_volumes`, `options`
|
||||
52
docs/adr/016-obs-stack-co-location-ci-push.md
Normal file
52
docs/adr/016-obs-stack-co-location-ci-push.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# ADR-016: Observability stack co-location at `/opt/familienarchiv/` with CI-push config sync
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Issue #601 established that the observability stack must survive Gitea CI workspace wipes between nightly runs. When the nightly job completes, act_runner deletes the job workspace. Any Docker container that bind-mounts a config file from a workspace path (`/srv/gitea-workspace/…/infra/observability/prometheus/prometheus.yml`) then references a path that no longer exists on the host. On the next nightly run, Docker Compose either auto-creates an empty directory in its place (causing the container to fail to start because a file mount receives a directory) or finds a stale file from a previous run if the workspace happened to land at the same path.
|
||||
|
||||
ADR-015 solved the workspace bind-mount resolution problem: job workspaces are stored at `/srv/gitea-workspace` so `$(pwd)` inside the job container maps to a real host path. But it did not address persistence: the workspace is still wiped after the job, so bind mounts from workspace-relative paths remain fragile across runs.
|
||||
|
||||
### Decision drivers
|
||||
|
||||
1. Bind-mount sources must point to a host path that persists indefinitely, not to a path that disappears after each CI run.
|
||||
2. Config files must reflect the committed state of the repo after every nightly run (no manual sync steps).
|
||||
3. Secrets must not be written to the workspace or to any path managed by CI; they must survive independently of deployments.
|
||||
4. The solution must not introduce new infrastructure dependencies (no SSH access from CI, no external registry, no additional server-side daemon).
|
||||
|
||||
### Alternatives considered
|
||||
|
||||
**A: Server-pull model** — a systemd timer or cron job on the server does `git pull` from the repo into `/opt/familienarchiv/` and then runs `docker compose up`. Rejected because: (1) requires git credentials on the server and a registered deploy key, (2) adds a second deployment mechanism that diverges from the CI-push model used for the main app stack, (3) timing coupling — the server pull must complete before CI's health checks run, requiring polling or a webhook.
|
||||
|
||||
**B: Separate directory (e.g. `/opt/obs/`)** — keeps obs configs isolated from the app stack. Rejected because: (1) the main app compose files are already in `/opt/familienarchiv/` (managed the same way), and (2) GlitchTip shares the `archive-db` PostgreSQL instance and `archiv-net` Docker network — it is architecturally part of the same deployment unit, not a separate one. Co-location reflects the actual coupling.
|
||||
|
||||
**C: Named Docker configs (Swarm)** — Docker Swarm supports first-class config objects that persist in the cluster. Rejected because the project does not use Swarm and introducing it solely for config persistence is a disproportionate dependency.
|
||||
|
||||
## Decision
|
||||
|
||||
The observability stack is co-located with the main application deployment at `/opt/familienarchiv/`:
|
||||
|
||||
- `docker-compose.observability.yml` → `/opt/familienarchiv/docker-compose.observability.yml`
|
||||
- `infra/observability/` → `/opt/familienarchiv/infra/observability/`
|
||||
|
||||
The nightly CI job (`nightly.yml`) copies these files from the workspace checkout to `/opt/familienarchiv/` using `cp -r` on every run (CI-push model). Containers always read config from the permanent location; a workspace wipe has no effect on running containers.
|
||||
|
||||
Secrets are stored in `/opt/familienarchiv/.env` on the server. This file is managed by the operator — CI does not write or delete it. Docker Compose auto-reads it when started from `/opt/familienarchiv/`. The required key inventory is documented in `docs/DEPLOYMENT.md §4`.
|
||||
|
||||
The CI runner mounts `/opt/familienarchiv` as a bind mount into job containers (see `runner-config.yaml`). This requires a one-time `mkdir -p /opt/familienarchiv/infra` on the server and a runner restart after updating `runner-config.yaml` (see ADR-015 and `docs/DEPLOYMENT.md §3.1`).
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- Bind-mount sources survive workspace wipes by definition — they are on a persistent host path.
|
||||
- Config is always in sync with the repo after each nightly run.
|
||||
- No new infrastructure dependencies; the CI-push model mirrors how the main app stack is deployed.
|
||||
- Secrets (`/opt/familienarchiv/.env`) are decoupled from CI — a deployment cannot accidentally overwrite them.
|
||||
|
||||
**Negative:**
|
||||
- `cp -r` does not remove deleted files; a config file removed from the repo persists in `/opt/familienarchiv/infra/observability/` until manually deleted. Acceptable for this project's change frequency. A `rsync -a --delete` would give a clean mirror if this becomes a problem.
|
||||
- Mounting `/opt/familienarchiv/` into CI job containers expands the blast radius of a compromised workflow step — a malicious step could overwrite app compose files and Caddy config. Acceptable because the runner is single-tenant (trusted code only). See `runner-config.yaml` security comment.
|
||||
- Runner must be restarted (`systemctl restart gitea-runner`) after any change to `runner-config.yaml` for the new mount to take effect.
|
||||
@@ -17,7 +17,7 @@ System_Boundary(archiv, "Familienarchiv (Docker Compose)") {
|
||||
Container(mc, "Bucket / Service-Account Init", "MinIO Client (mc)", "One-shot container on startup. Idempotent: creates the archive bucket, the archiv-app service account, and attaches the readwrite policy.")
|
||||
}
|
||||
|
||||
System_Boundary(observability, "Observability Stack (docker-compose.observability.yml)") {
|
||||
System_Boundary(observability, "Observability Stack (/opt/familienarchiv/docker-compose.observability.yml)") {
|
||||
Container(prometheus, "Prometheus", "prom/prometheus:v3.4.0", "Scrapes metrics from backend management port 8081 (/actuator/prometheus), node-exporter, and cAdvisor. Retention: 30 days.")
|
||||
Container(node_exporter, "Node Exporter", "prom/node-exporter:v1.9.0", "Host-level CPU, memory, disk, and network metrics.")
|
||||
Container(cadvisor, "cAdvisor", "gcr.io/cadvisor/cadvisor:v0.52.1", "Per-container resource metrics.")
|
||||
|
||||
@@ -19,6 +19,39 @@ Both containers live in the `gitea_gitea` Docker network on the VPS. The runner
|
||||
|
||||
The `gitea-runner` container mounts the host Docker socket (`/var/run/docker.sock`). When a workflow job runs, act_runner spawns a **sibling container** for each job. That job container also gets the Docker socket mounted (via `valid_volumes` in `runner-config.yaml`), enabling `docker compose` calls in workflow steps.
|
||||
|
||||
### Workspace bind-mount setup (DooD path resolution)
|
||||
|
||||
When a workflow step calls `docker compose up` with relative bind-mount sources (e.g. `./infra/observability/prometheus/prometheus.yml`), Compose resolves them against `$(pwd)` inside the job container and passes the resulting **absolute path** to the host Docker daemon. The host daemon then tries to bind-mount that path from the **host filesystem**.
|
||||
|
||||
In the default DooD setup the job container's workspace lives in the act_runner overlay2 layer — the host has no directory at that path, auto-creates an empty one, and the container fails with:
|
||||
|
||||
```
|
||||
error mounting "…/prometheus/prometheus.yml" to rootfs at "/etc/prometheus/prometheus.yml": not a directory
|
||||
```
|
||||
|
||||
**Solution (ADR-015):** store job workspaces on a real host path and mount it at the **same absolute path** inside the runner and every job container. `runner-config.yaml` configures this via `workdir_parent`, `valid_volumes`, and `options`.
|
||||
|
||||
**One-time host setup** (required on any fresh VPS):
|
||||
|
||||
```bash
|
||||
mkdir -p /srv/gitea-workspace
|
||||
# Then add to the runner service in ~/docker/gitea/compose.yaml:
|
||||
# volumes:
|
||||
# - /srv/gitea-workspace:/srv/gitea-workspace
|
||||
# Restart the runner container for the change to take effect.
|
||||
```
|
||||
|
||||
The path `/srv/gitea-workspace` is the canonical workspace root. It must be identical on the host and inside job containers — if the paths differ, Compose still resolves to the container-internal path, which the host daemon cannot find (the original bug).
|
||||
|
||||
**Disk management:** act_runner cleans per-run subdirectories on completion. Orphaned directories from interrupted runs accumulate under `/srv/gitea-workspace` and should be pruned manually if disk space becomes a concern:
|
||||
|
||||
```bash
|
||||
# List workspace directories older than 7 days
|
||||
find /srv/gitea-workspace -mindepth 3 -maxdepth 3 -type d -mtime +7
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Running host-level commands from CI (nsenter pattern)
|
||||
|
||||
Job containers are unprivileged and do not share the host's PID/mount/network namespaces. Commands like `systemctl` that target the host daemon are therefore unavailable by default. When a workflow step needs to manage a host service (e.g. `systemctl reload caddy`), it uses the Docker socket to spin up a **privileged sibling container** in the host PID namespace:
|
||||
@@ -108,6 +141,33 @@ nsenter: failed to execute /bin/systemctl: No such file or directory
|
||||
|
||||
The first error means the Docker socket is not mounted into the job container — check `valid_volumes` in `/root/docker/gitea/runner-config.yaml` on the VPS. The second means the Alpine image is running but cannot enter the host mount namespace; verify `--privileged` and `--pid=host` are both present in the workflow step.
|
||||
|
||||
**Failure mode 4 — workspace bind-mount not configured (observability stack or any compose-with-file-mounts job)**
|
||||
|
||||
Symptom in CI log:
|
||||
```
|
||||
Error response from daemon: error while creating mount source path "…/prometheus/prometheus.yml": mkdir …: not a directory
|
||||
```
|
||||
|
||||
Or the service starts but immediately crashes because a config file was mounted as an empty directory.
|
||||
|
||||
Cause: `/srv/gitea-workspace` does not exist on the host, or the runner container's `compose.yaml` is missing the `- /srv/gitea-workspace:/srv/gitea-workspace` volume line.
|
||||
|
||||
Diagnosis:
|
||||
```bash
|
||||
ssh root@<vps>
|
||||
ls -la /srv/gitea-workspace # must exist and be a directory
|
||||
docker inspect gitea-runner | grep -A5 Mounts # must show /srv/gitea-workspace
|
||||
```
|
||||
|
||||
Recovery:
|
||||
```bash
|
||||
mkdir -p /srv/gitea-workspace
|
||||
# Add volume line to runner compose.yaml, then:
|
||||
docker compose -f ~/docker/gitea/compose.yaml up -d gitea-runner
|
||||
```
|
||||
|
||||
See `docs/DEPLOYMENT.md §3.1` and ADR-015 for the full setup rationale.
|
||||
|
||||
---
|
||||
|
||||
## Gitea vs GitHub Actions Differences
|
||||
|
||||
@@ -45,7 +45,6 @@ export default defineConfig(
|
||||
files: ['**/*.svelte', '**/*.svelte.ts', '**/*.svelte.js'],
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
projectService: true,
|
||||
extraFileExtensions: ['.svelte'],
|
||||
parser: ts.parser,
|
||||
svelteConfig
|
||||
|
||||
@@ -88,3 +88,13 @@ git.raddatz.cloud {
|
||||
import security_headers
|
||||
reverse_proxy 127.0.0.1:3005
|
||||
}
|
||||
|
||||
grafana.archiv.raddatz.cloud {
|
||||
import security_headers
|
||||
reverse_proxy 127.0.0.1:3003
|
||||
}
|
||||
|
||||
glitchtip.archiv.raddatz.cloud {
|
||||
import security_headers
|
||||
reverse_proxy 127.0.0.1:3002
|
||||
}
|
||||
|
||||
@@ -36,9 +36,6 @@ metrics_generator:
|
||||
source: tempo
|
||||
storage:
|
||||
path: /var/tempo/generator/wal
|
||||
processors:
|
||||
- service-graphs
|
||||
- span-metrics
|
||||
|
||||
# Tempo HTTP API (port 3200) is unauthenticated. Access is controlled entirely
|
||||
# by network isolation: only Grafana (on obs-net) should reach this port.
|
||||
|
||||
@@ -2,15 +2,32 @@
|
||||
container:
|
||||
# passed as DOCKER_HOST inside the job container
|
||||
docker_host: "unix:///var/run/docker.sock"
|
||||
# whitelists the socket path so workflows can mount it
|
||||
# Job workspaces are stored here on the NAS and mounted at the same
|
||||
# absolute path inside job containers. Identical host ↔ container path
|
||||
# is the requirement: Docker Compose resolves relative bind mounts to
|
||||
# $(pwd) inside the job container and passes that absolute path to the
|
||||
# host daemon — the daemon must find the file at that exact host path.
|
||||
# Prerequisite: mkdir -p /srv/gitea-workspace on the host, and add
|
||||
# - /srv/gitea-workspace:/srv/gitea-workspace
|
||||
# to the runner service volumes in gitea's compose.yaml.
|
||||
workdir_parent: /srv/gitea-workspace
|
||||
# whitelists volumes that workflow steps may bind-mount
|
||||
valid_volumes:
|
||||
- "/var/run/docker.sock"
|
||||
- "/srv/gitea-workspace"
|
||||
- "/opt/familienarchiv"
|
||||
# appended to `docker run` when the runner spawns a job container
|
||||
# SECURITY: Mounting the Docker socket grants job containers root-equivalent
|
||||
# access to the host Docker daemon. Acceptable here because only trusted code
|
||||
# from this private repo runs on this runner. Do NOT use on a runner that
|
||||
# accepts untrusted PRs from external contributors.
|
||||
options: "-v /var/run/docker.sock:/var/run/docker.sock"
|
||||
# SECURITY WARNING: This mount configuration grants CI job containers:
|
||||
# 1. Root-equivalent access to the host Docker daemon (via the socket).
|
||||
# 2. Read/write access to /opt/familienarchiv/ — including the main app's
|
||||
# compose files, Caddy config, and observability configs. A malicious
|
||||
# workflow step could overwrite any file in that directory.
|
||||
# Both are acceptable ONLY because this runner is single-tenant: it executes
|
||||
# code exclusively from this private repo with a fixed set of trusted authors.
|
||||
# WARNING: Do NOT add this runner to any repo with external contributors or
|
||||
# untrusted PRs — the blast radius includes the entire production deployment.
|
||||
# See ADR-016 for the reasoning behind the /opt/familienarchiv mount.
|
||||
options: "-v /var/run/docker.sock:/var/run/docker.sock -v /srv/gitea-workspace:/srv/gitea-workspace -v /opt/familienarchiv:/opt/familienarchiv"
|
||||
# keep network mode default (bridge) — Testcontainers handles its own networking
|
||||
force_pull: false
|
||||
|
||||
|
||||
Reference in New Issue
Block a user