From 25252fc709b920fefbf8202c5328166ea83cc233 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sat, 6 Jun 2026 13:55:07 +0200 Subject: [PATCH] feat(observability): add Grafana Ollama inference latency dashboard (#737) Co-Authored-By: Claude Sonnet 4.6 --- .../provisioning/dashboards/ollama.json | 218 ++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 infra/observability/grafana/provisioning/dashboards/ollama.json diff --git a/infra/observability/grafana/provisioning/dashboards/ollama.json b/infra/observability/grafana/provisioning/dashboards/ollama.json new file mode 100644 index 00000000..47536e2d --- /dev/null +++ b/infra/observability/grafana/provisioning/dashboards/ollama.json @@ -0,0 +1,218 @@ +{ + "id": null, + "uid": "ollama-dashboard", + "title": "Ollama", + "description": "Ollama inference latency and request rate", + "version": 1, + "schemaVersion": 39, + "tags": ["ollama", "inference"], + "timezone": "browser", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "liveNow": false, + "refresh": "30s", + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "weekStart": "", + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "datasource", "uid": "grafana" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "panels": [ + { + "id": 1, + "type": "timeseries", + "title": "Inference Latency p50", + "description": "50th percentile of Ollama request duration over a 5-minute window", + "gridPos": { "h": 8, "w": 8, "x": 0, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, rate(ollama_request_duration_seconds_bucket[5m]))", + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + } + ] + }, + { + "id": 2, + "type": "timeseries", + "title": "Inference Latency p95", + "description": "95th percentile of Ollama request duration over a 5-minute window", + "gridPos": { "h": 8, "w": 8, "x": 8, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, rate(ollama_request_duration_seconds_bucket[5m]))", + "instant": false, + "legendFormat": "p95", + "range": true, + "refId": "A" + } + ] + }, + { + "id": 3, + "type": "timeseries", + "title": "Request Rate", + "description": "Ollama requests per second over a 5-minute window", + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 0 }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "red", "value": 80 } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "editorMode": "code", + "expr": "rate(ollama_requests_total[5m])", + "instant": false, + "legendFormat": "req/s", + "range": true, + "refId": "A" + } + ] + } + ], + "preload": false, + "templating": { + "list": [] + } +}