From 3279342ea747c17994d134290853cf49a627eb96 Mon Sep 17 00:00:00 2001
From: Marcel <marcel@familienarchiv>
Date: Tue, 7 Apr 2026 11:00:23 +0200
Subject: [PATCH] feat(util): add splitByMarkers for [unleserlich] and [...]
 text splitting

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../lib/utils/transcriptionMarkers.spec.ts    | 60 +++++++++++++++++++
 .../src/lib/utils/transcriptionMarkers.ts     | 25 ++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 frontend/src/lib/utils/transcriptionMarkers.spec.ts
 create mode 100644 frontend/src/lib/utils/transcriptionMarkers.ts

diff --git a/frontend/src/lib/utils/transcriptionMarkers.spec.ts b/frontend/src/lib/utils/transcriptionMarkers.spec.ts
new file mode 100644
index 00000000..0f82b728
--- /dev/null
+++ b/frontend/src/lib/utils/transcriptionMarkers.spec.ts
@@ -0,0 +1,60 @@
+import { describe, it, expect } from 'vitest';
+import { splitByMarkers } from './transcriptionMarkers';
+
+describe('splitByMarkers', () => {
+	it('should return single text segment for plain text', () => {
+		const result = splitByMarkers('Hello world');
+		expect(result).toEqual([{ type: 'text', text: 'Hello world' }]);
+	});
+
+	it('should split [unleserlich] into a marker segment', () => {
+		const result = splitByMarkers('before [unleserlich] after');
+		expect(result).toEqual([
+			{ type: 'text', text: 'before ' },
+			{ type: 'marker', text: '[unleserlich]' },
+			{ type: 'text', text: ' after' }
+		]);
+	});
+
+	it('should split [...] into a marker segment', () => {
+		const result = splitByMarkers('some text [...] more text');
+		expect(result).toEqual([
+			{ type: 'text', text: 'some text ' },
+			{ type: 'marker', text: '[...]' },
+			{ type: 'text', text: ' more text' }
+		]);
+	});
+
+	it('should handle multiple markers in one string', () => {
+		const result = splitByMarkers('[unleserlich] middle [...] end');
+		expect(result).toEqual([
+			{ type: 'marker', text: '[unleserlich]' },
+			{ type: 'text', text: ' middle ' },
+			{ type: 'marker', text: '[...]' },
+			{ type: 'text', text: ' end' }
+		]);
+	});
+
+	it('should handle text that is only a marker', () => {
+		const result = splitByMarkers('[unleserlich]');
+		expect(result).toEqual([{ type: 'marker', text: '[unleserlich]' }]);
+	});
+
+	it('should handle empty string', () => {
+		const result = splitByMarkers('');
+		expect(result).toEqual([]);
+	});
+
+	it('should not match other bracket markers', () => {
+		const result = splitByMarkers('text [Seitenumbruch] more');
+		expect(result).toEqual([{ type: 'text', text: 'text [Seitenumbruch] more' }]);
+	});
+
+	it('should handle adjacent markers', () => {
+		const result = splitByMarkers('[unleserlich][...]');
+		expect(result).toEqual([
+			{ type: 'marker', text: '[unleserlich]' },
+			{ type: 'marker', text: '[...]' }
+		]);
+	});
+});
diff --git a/frontend/src/lib/utils/transcriptionMarkers.ts b/frontend/src/lib/utils/transcriptionMarkers.ts
new file mode 100644
index 00000000..e1f4a26b
--- /dev/null
+++ b/frontend/src/lib/utils/transcriptionMarkers.ts
@@ -0,0 +1,25 @@
+export type TextSegment = { type: 'text' | 'marker'; text: string };
+
+const MARKER_PATTERN = /(\[unleserlich\]|\[\.{3}\])/g;
+
+export function splitByMarkers(input: string): TextSegment[] {
+	if (!input) return [];
+
+	const segments: TextSegment[] = [];
+	let lastIndex = 0;
+
+	for (const match of input.matchAll(MARKER_PATTERN)) {
+		const matchStart = match.index;
+		if (matchStart > lastIndex) {
+			segments.push({ type: 'text', text: input.slice(lastIndex, matchStart) });
+		}
+		segments.push({ type: 'marker', text: match[0] });
+		lastIndex = matchStart + match[0].length;
+	}
+
+	if (lastIndex < input.length) {
+		segments.push({ type: 'text', text: input.slice(lastIndex) });
+	}
+
+	return segments;
+}