fix(ocr): parse kraken 'Model dir' output to locate downloaded model
The previous approach used find across the htrmopo cache which failed because -newer /tmp ran in a separate container. Now parses the 'Model dir: <path>' line from kraken get output directly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,29 +37,33 @@ download_model() {
|
|||||||
|
|
||||||
echo " Downloading $name ($doi)..."
|
echo " Downloading $name ($doi)..."
|
||||||
|
|
||||||
# kraken get downloads to /root/.local/share/htrmopo/<uuid>/
|
# kraken get downloads to /root/.local/share/htrmopo/<uuid>/<name>.mlmodel
|
||||||
# We find the .mlmodel file after download and copy it to our volume
|
# Parse the "Model dir: <path>" line from kraken output to locate the file
|
||||||
docker compose run --rm "$COMPOSE_SERVICE" sh -c "
|
docker compose run --rm "$COMPOSE_SERVICE" sh -c '
|
||||||
kraken get $doi 2>&1
|
OUTPUT=$(kraken get '"$doi"' 2>&1)
|
||||||
# Find the most recently downloaded .mlmodel and copy to target
|
echo "$OUTPUT"
|
||||||
FOUND=\$(find /root/.local/share/htrmopo -name '*.mlmodel' -newer /tmp 2>/dev/null | head -1)
|
MODEL_DIR=$(echo "$OUTPUT" | grep -oP "Model dir: \K[^ ]+")
|
||||||
if [ -n \"\$FOUND\" ]; then
|
if [ -n "$MODEL_DIR" ] && [ -d "$MODEL_DIR" ]; then
|
||||||
cp \"\$FOUND\" $dest
|
FOUND=$(find "$MODEL_DIR" -name "*.mlmodel" | head -1)
|
||||||
echo \"Saved to $dest\"
|
if [ -n "$FOUND" ]; then
|
||||||
|
cp "$FOUND" '"$dest"'
|
||||||
|
echo "Saved to '"$dest"'"
|
||||||
|
else
|
||||||
|
echo "ERROR: No .mlmodel file in $MODEL_DIR"
|
||||||
|
ls -la "$MODEL_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
echo 'ERROR: No .mlmodel file found after download'
|
echo "ERROR: Could not parse model directory from kraken output"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
"
|
'
|
||||||
}
|
}
|
||||||
|
|
||||||
download_models() {
|
download_models() {
|
||||||
echo "Downloading Kraken HTR models into the ocr_models volume..."
|
echo "Downloading Kraken HTR models into the ocr_models volume..."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Create a timestamp marker so we can find newly downloaded files
|
|
||||||
docker compose run --rm "$COMPOSE_SERVICE" touch /tmp/.download-marker
|
|
||||||
|
|
||||||
echo "Model 1: $MODEL_1_NAME"
|
echo "Model 1: $MODEL_1_NAME"
|
||||||
echo " $MODEL_1_DESC"
|
echo " $MODEL_1_DESC"
|
||||||
download_model "$MODEL_1_DOI" "$MODEL_1_PATH" "$MODEL_1_NAME"
|
download_model "$MODEL_1_DOI" "$MODEL_1_PATH" "$MODEL_1_NAME"
|
||||||
|
|||||||
Reference in New Issue
Block a user