feat(ocr): record training runs in ocr_training_runs_total per kind and outcome

Wraps the await asyncio.to_thread(_run_*) calls in /train, /train-sender,
and /segtrain with try/except. Recognition training (/train, /train-sender)
shares kind="recognition"; /segtrain uses kind="segmentation". The
ocr_model_accuracy gauge is set per kind on success.

Refs #652 (AC6, decision #2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-21 16:12:26 +02:00
parent 2e3744d9ef
commit 6c2b9af10b
2 changed files with 105 additions and 3 deletions

View File

@@ -478,7 +478,14 @@ async def train_model(
return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
result = await asyncio.to_thread(_run_training)
try:
result = await asyncio.to_thread(_run_training)
except Exception:
metrics.ocr_training_runs_total.labels(kind="recognition", outcome="error").inc()
raise
metrics.ocr_training_runs_total.labels(kind="recognition", outcome="success").inc()
if result.get("accuracy") is not None:
metrics.ocr_model_accuracy.labels(kind="recognition").set(result["accuracy"])
return result
@@ -558,7 +565,14 @@ async def train_sender_model(
return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
result = await asyncio.to_thread(_run_sender_training)
try:
result = await asyncio.to_thread(_run_sender_training)
except Exception:
metrics.ocr_training_runs_total.labels(kind="recognition", outcome="error").inc()
raise
metrics.ocr_training_runs_total.labels(kind="recognition", outcome="success").inc()
if result.get("accuracy") is not None:
metrics.ocr_model_accuracy.labels(kind="recognition").set(result["accuracy"])
return result
@@ -668,7 +682,14 @@ async def segtrain_model(
return {"loss": None, "accuracy": accuracy, "cer": cer, "epochs": epochs}
result = await asyncio.to_thread(_run_segtrain)
try:
result = await asyncio.to_thread(_run_segtrain)
except Exception:
metrics.ocr_training_runs_total.labels(kind="segmentation", outcome="error").inc()
raise
metrics.ocr_training_runs_total.labels(kind="segmentation", outcome="success").inc()
if result.get("accuracy") is not None:
metrics.ocr_model_accuracy.labels(kind="segmentation").set(result["accuracy"])
return result