diff --git a/scripts/download-kraken-models.sh b/scripts/download-kraken-models.sh index 8c9898b1..6e3d5a47 100755 --- a/scripts/download-kraken-models.sh +++ b/scripts/download-kraken-models.sh @@ -37,29 +37,33 @@ download_model() { echo " Downloading $name ($doi)..." - # kraken get downloads to /root/.local/share/htrmopo// - # We find the .mlmodel file after download and copy it to our volume - docker compose run --rm "$COMPOSE_SERVICE" sh -c " - kraken get $doi 2>&1 - # Find the most recently downloaded .mlmodel and copy to target - FOUND=\$(find /root/.local/share/htrmopo -name '*.mlmodel' -newer /tmp 2>/dev/null | head -1) - if [ -n \"\$FOUND\" ]; then - cp \"\$FOUND\" $dest - echo \"Saved to $dest\" + # kraken get downloads to /root/.local/share/htrmopo//.mlmodel + # Parse the "Model dir: " line from kraken output to locate the file + docker compose run --rm "$COMPOSE_SERVICE" sh -c ' + OUTPUT=$(kraken get '"$doi"' 2>&1) + echo "$OUTPUT" + MODEL_DIR=$(echo "$OUTPUT" | grep -oP "Model dir: \K[^ ]+") + if [ -n "$MODEL_DIR" ] && [ -d "$MODEL_DIR" ]; then + FOUND=$(find "$MODEL_DIR" -name "*.mlmodel" | head -1) + if [ -n "$FOUND" ]; then + cp "$FOUND" '"$dest"' + echo "Saved to '"$dest"'" + else + echo "ERROR: No .mlmodel file in $MODEL_DIR" + ls -la "$MODEL_DIR" + exit 1 + fi else - echo 'ERROR: No .mlmodel file found after download' + echo "ERROR: Could not parse model directory from kraken output" exit 1 fi - " + ' } download_models() { echo "Downloading Kraken HTR models into the ocr_models volume..." echo "" - # Create a timestamp marker so we can find newly downloaded files - docker compose run --rm "$COMPOSE_SERVICE" touch /tmp/.download-marker - echo "Model 1: $MODEL_1_NAME" echo " $MODEL_1_DESC" download_model "$MODEL_1_DOI" "$MODEL_1_PATH" "$MODEL_1_NAME"