fix(ocr): parse kraken 'Model dir' output to locate downloaded model
The previous approach used find across the htrmopo cache which failed because -newer /tmp ran in a separate container. Now parses the 'Model dir: <path>' line from kraken get output directly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,29 +37,33 @@ download_model() {
|
||||
|
||||
echo " Downloading $name ($doi)..."
|
||||
|
||||
# kraken get downloads to /root/.local/share/htrmopo/<uuid>/
|
||||
# We find the .mlmodel file after download and copy it to our volume
|
||||
docker compose run --rm "$COMPOSE_SERVICE" sh -c "
|
||||
kraken get $doi 2>&1
|
||||
# Find the most recently downloaded .mlmodel and copy to target
|
||||
FOUND=\$(find /root/.local/share/htrmopo -name '*.mlmodel' -newer /tmp 2>/dev/null | head -1)
|
||||
if [ -n \"\$FOUND\" ]; then
|
||||
cp \"\$FOUND\" $dest
|
||||
echo \"Saved to $dest\"
|
||||
# kraken get downloads to /root/.local/share/htrmopo/<uuid>/<name>.mlmodel
|
||||
# Parse the "Model dir: <path>" line from kraken output to locate the file
|
||||
docker compose run --rm "$COMPOSE_SERVICE" sh -c '
|
||||
OUTPUT=$(kraken get '"$doi"' 2>&1)
|
||||
echo "$OUTPUT"
|
||||
MODEL_DIR=$(echo "$OUTPUT" | grep -oP "Model dir: \K[^ ]+")
|
||||
if [ -n "$MODEL_DIR" ] && [ -d "$MODEL_DIR" ]; then
|
||||
FOUND=$(find "$MODEL_DIR" -name "*.mlmodel" | head -1)
|
||||
if [ -n "$FOUND" ]; then
|
||||
cp "$FOUND" '"$dest"'
|
||||
echo "Saved to '"$dest"'"
|
||||
else
|
||||
echo "ERROR: No .mlmodel file in $MODEL_DIR"
|
||||
ls -la "$MODEL_DIR"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo 'ERROR: No .mlmodel file found after download'
|
||||
echo "ERROR: Could not parse model directory from kraken output"
|
||||
exit 1
|
||||
fi
|
||||
"
|
||||
'
|
||||
}
|
||||
|
||||
download_models() {
|
||||
echo "Downloading Kraken HTR models into the ocr_models volume..."
|
||||
echo ""
|
||||
|
||||
# Create a timestamp marker so we can find newly downloaded files
|
||||
docker compose run --rm "$COMPOSE_SERVICE" touch /tmp/.download-marker
|
||||
|
||||
echo "Model 1: $MODEL_1_NAME"
|
||||
echo " $MODEL_1_DESC"
|
||||
download_model "$MODEL_1_DOI" "$MODEL_1_PATH" "$MODEL_1_NAME"
|
||||
|
||||
Reference in New Issue
Block a user