Runbook script to download both HTR-United Kurrent model candidates (german_kurrent_manu_9, kurrent-de) into the ocr_models Docker volume, test them against sample documents, and activate the winner. Usage: ./scripts/download-kraken-models.sh # download both ./scripts/download-kraken-models.sh --activate 1 # pick model 1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
90 lines
3.3 KiB
Bash
Executable File
90 lines
3.3 KiB
Bash
Executable File
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Downloads Kraken HTR models for German Kurrent/Suetterlin into the ocr_models volume.
|
|
# Run this once after first deployment, or whenever you want to switch models.
|
|
#
|
|
# Usage:
|
|
# ./scripts/download-kraken-models.sh # download both candidates
|
|
# ./scripts/download-kraken-models.sh --activate 1 # activate model 1 (german_kurrent_manu_9)
|
|
# ./scripts/download-kraken-models.sh --activate 2 # activate model 2 (kurrent-de)
|
|
|
|
COMPOSE_SERVICE="ocr-service"
|
|
MODEL_DIR="/app/models"
|
|
ACTIVE_MODEL="$MODEL_DIR/german_kurrent.mlmodel"
|
|
|
|
MODEL_1_NAME="german_kurrent_manu_9"
|
|
MODEL_1_DESC="19th-century German administrative Kurrent (HTR-United)"
|
|
MODEL_1_PATH="$MODEL_DIR/$MODEL_1_NAME.mlmodel"
|
|
|
|
MODEL_2_NAME="kurrent-de"
|
|
MODEL_2_DESC="Broad German Kurrent coverage (HTR-United)"
|
|
MODEL_2_PATH="$MODEL_DIR/$MODEL_2_NAME.mlmodel"
|
|
|
|
# ─── Functions ────────────────────────────────────────────────────────────────
|
|
|
|
download_models() {
|
|
echo "Downloading Kraken HTR models into the ocr_models volume..."
|
|
echo ""
|
|
|
|
echo "Model 1: $MODEL_1_NAME"
|
|
echo " $MODEL_1_DESC"
|
|
docker compose run --rm "$COMPOSE_SERVICE" \
|
|
kraken get "$MODEL_1_NAME" -o "$MODEL_1_PATH"
|
|
echo ""
|
|
|
|
echo "Model 2: $MODEL_2_NAME"
|
|
echo " $MODEL_2_DESC"
|
|
docker compose run --rm "$COMPOSE_SERVICE" \
|
|
kraken get "$MODEL_2_NAME" -o "$MODEL_2_PATH"
|
|
echo ""
|
|
|
|
echo "Both models downloaded. To test them against a sample document:"
|
|
echo ""
|
|
echo " # Copy a sample Kurrent scan into the container:"
|
|
echo " docker cp sample-kurrent.png archive-ocr:/tmp/sample.png"
|
|
echo ""
|
|
echo " # Test model 1:"
|
|
echo " docker compose exec ocr-service kraken -i /tmp/sample.png /tmp/out1.txt segment -bl ocr -m $MODEL_1_PATH"
|
|
echo " docker compose exec ocr-service cat /tmp/out1.txt"
|
|
echo ""
|
|
echo " # Test model 2:"
|
|
echo " docker compose exec ocr-service kraken -i /tmp/sample.png /tmp/out2.txt segment -bl ocr -m $MODEL_2_PATH"
|
|
echo " docker compose exec ocr-service cat /tmp/out2.txt"
|
|
echo ""
|
|
echo "Then activate the better model:"
|
|
echo " ./scripts/download-kraken-models.sh --activate 1 # or 2"
|
|
}
|
|
|
|
activate_model() {
|
|
local choice="$1"
|
|
case "$choice" in
|
|
1)
|
|
echo "Activating model 1: $MODEL_1_NAME"
|
|
docker compose run --rm "$COMPOSE_SERVICE" \
|
|
cp "$MODEL_1_PATH" "$ACTIVE_MODEL"
|
|
;;
|
|
2)
|
|
echo "Activating model 2: $MODEL_2_NAME"
|
|
docker compose run --rm "$COMPOSE_SERVICE" \
|
|
cp "$MODEL_2_PATH" "$ACTIVE_MODEL"
|
|
;;
|
|
*)
|
|
echo "Error: --activate expects 1 or 2"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
echo "Active model is now: $ACTIVE_MODEL"
|
|
echo "Restart the OCR service to load the new model:"
|
|
echo " docker compose restart ocr-service"
|
|
}
|
|
|
|
# ─── Main ─────────────────────────────────────────────────────────────────────
|
|
|
|
if [[ "${1:-}" == "--activate" ]]; then
|
|
activate_model "${2:-}"
|
|
else
|
|
download_models
|
|
fi
|