fix(ocr): parse kraken 'Model dir' output to locate downloaded model
Some checks failed
CI / Unit & Component Tests (push) Failing after 1s
CI / Backend Unit Tests (push) Failing after 0s
CI / Unit & Component Tests (pull_request) Failing after 1s
CI / Backend Unit Tests (pull_request) Failing after 0s

The previous approach used find across the htrmopo cache which failed
because -newer /tmp ran in a separate container. Now parses the
'Model dir: <path>' line from kraken get output directly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-12 20:09:23 +02:00
parent f12b41161e
commit c0004f5e6f

View File

@@ -37,29 +37,33 @@ download_model() {
echo " Downloading $name ($doi)..."
# kraken get downloads to /root/.local/share/htrmopo/<uuid>/
# We find the .mlmodel file after download and copy it to our volume
docker compose run --rm "$COMPOSE_SERVICE" sh -c "
kraken get $doi 2>&1
# Find the most recently downloaded .mlmodel and copy to target
FOUND=\$(find /root/.local/share/htrmopo -name '*.mlmodel' -newer /tmp 2>/dev/null | head -1)
if [ -n \"\$FOUND\" ]; then
cp \"\$FOUND\" $dest
echo \"Saved to $dest\"
# kraken get downloads to /root/.local/share/htrmopo/<uuid>/<name>.mlmodel
# Parse the "Model dir: <path>" line from kraken output to locate the file
docker compose run --rm "$COMPOSE_SERVICE" sh -c '
OUTPUT=$(kraken get '"$doi"' 2>&1)
echo "$OUTPUT"
MODEL_DIR=$(echo "$OUTPUT" | grep -oP "Model dir: \K[^ ]+")
if [ -n "$MODEL_DIR" ] && [ -d "$MODEL_DIR" ]; then
FOUND=$(find "$MODEL_DIR" -name "*.mlmodel" | head -1)
if [ -n "$FOUND" ]; then
cp "$FOUND" '"$dest"'
echo "Saved to '"$dest"'"
else
echo "ERROR: No .mlmodel file in $MODEL_DIR"
ls -la "$MODEL_DIR"
exit 1
fi
else
echo 'ERROR: No .mlmodel file found after download'
echo "ERROR: Could not parse model directory from kraken output"
exit 1
fi
"
'
}
download_models() {
echo "Downloading Kraken HTR models into the ocr_models volume..."
echo ""
# Create a timestamp marker so we can find newly downloaded files
docker compose run --rm "$COMPOSE_SERVICE" touch /tmp/.download-marker
echo "Model 1: $MODEL_1_NAME"
echo " $MODEL_1_DESC"
download_model "$MODEL_1_DOI" "$MODEL_1_PATH" "$MODEL_1_NAME"