diff --git a/infra/observability/grafana/provisioning/dashboards/ollama.json b/infra/observability/grafana/provisioning/dashboards/ollama.json deleted file mode 100644 index 47536e2d..00000000 --- a/infra/observability/grafana/provisioning/dashboards/ollama.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "id": null, - "uid": "ollama-dashboard", - "title": "Ollama", - "description": "Ollama inference latency and request rate", - "version": 1, - "schemaVersion": 39, - "tags": ["ollama", "inference"], - "timezone": "browser", - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "links": [], - "liveNow": false, - "refresh": "30s", - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "weekStart": "", - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { "type": "datasource", "uid": "grafana" }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "panels": [ - { - "id": 1, - "type": "timeseries", - "title": "Inference Latency p50", - "description": "50th percentile of Ollama request duration over a 5-minute window", - "gridPos": { "h": 8, "w": 8, "x": 0, "y": 0 }, - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 80 } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "options": { - "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "single", "sort": "none" } - }, - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "histogram_quantile(0.5, rate(ollama_request_duration_seconds_bucket[5m]))", - "instant": false, - "legendFormat": "p50", - "range": true, - "refId": "A" - } - ] - }, - { - "id": 2, - "type": "timeseries", - "title": "Inference Latency p95", - "description": "95th percentile of Ollama request duration over a 5-minute window", - "gridPos": { "h": 8, "w": 8, "x": 8, "y": 0 }, - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 80 } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "options": { - "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "single", "sort": "none" } - }, - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, rate(ollama_request_duration_seconds_bucket[5m]))", - "instant": false, - "legendFormat": "p95", - "range": true, - "refId": "A" - } - ] - }, - { - "id": 3, - "type": "timeseries", - "title": "Request Rate", - "description": "Ollama requests per second over a 5-minute window", - "gridPos": { "h": 8, "w": 8, "x": 16, "y": 0 }, - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "palette-classic" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { "group": "A", "mode": "none" }, - "thresholdsStyle": { "mode": "off" } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 80 } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "options": { - "legend": { "calcs": ["mean", "max"], "displayMode": "list", "placement": "bottom", "showLegend": true }, - "tooltip": { "mode": "single", "sort": "none" } - }, - "targets": [ - { - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "editorMode": "code", - "expr": "rate(ollama_requests_total[5m])", - "instant": false, - "legendFormat": "req/s", - "range": true, - "refId": "A" - } - ] - } - ], - "preload": false, - "templating": { - "list": [] - } -} diff --git a/infra/observability/prometheus/prometheus.yml b/infra/observability/prometheus/prometheus.yml index 53121566..ecffc410 100644 --- a/infra/observability/prometheus/prometheus.yml +++ b/infra/observability/prometheus/prometheus.yml @@ -22,8 +22,3 @@ scrape_configs: static_configs: - targets: ['ocr-service:8000'] - - job_name: ollama - metrics_path: /metrics - static_configs: - # Uses the Docker service name for reliable DNS resolution. - - targets: ['ollama:11434']