From 3279342ea747c17994d134290853cf49a627eb96 Mon Sep 17 00:00:00 2001 From: Marcel Date: Tue, 7 Apr 2026 11:00:23 +0200 Subject: [PATCH] feat(util): add splitByMarkers for [unleserlich] and [...] text splitting Co-Authored-By: Claude Sonnet 4.6 --- .../lib/utils/transcriptionMarkers.spec.ts | 60 +++++++++++++++++++ .../src/lib/utils/transcriptionMarkers.ts | 25 ++++++++ 2 files changed, 85 insertions(+) create mode 100644 frontend/src/lib/utils/transcriptionMarkers.spec.ts create mode 100644 frontend/src/lib/utils/transcriptionMarkers.ts diff --git a/frontend/src/lib/utils/transcriptionMarkers.spec.ts b/frontend/src/lib/utils/transcriptionMarkers.spec.ts new file mode 100644 index 00000000..0f82b728 --- /dev/null +++ b/frontend/src/lib/utils/transcriptionMarkers.spec.ts @@ -0,0 +1,60 @@ +import { describe, it, expect } from 'vitest'; +import { splitByMarkers } from './transcriptionMarkers'; + +describe('splitByMarkers', () => { + it('should return single text segment for plain text', () => { + const result = splitByMarkers('Hello world'); + expect(result).toEqual([{ type: 'text', text: 'Hello world' }]); + }); + + it('should split [unleserlich] into a marker segment', () => { + const result = splitByMarkers('before [unleserlich] after'); + expect(result).toEqual([ + { type: 'text', text: 'before ' }, + { type: 'marker', text: '[unleserlich]' }, + { type: 'text', text: ' after' } + ]); + }); + + it('should split [...] into a marker segment', () => { + const result = splitByMarkers('some text [...] more text'); + expect(result).toEqual([ + { type: 'text', text: 'some text ' }, + { type: 'marker', text: '[...]' }, + { type: 'text', text: ' more text' } + ]); + }); + + it('should handle multiple markers in one string', () => { + const result = splitByMarkers('[unleserlich] middle [...] end'); + expect(result).toEqual([ + { type: 'marker', text: '[unleserlich]' }, + { type: 'text', text: ' middle ' }, + { type: 'marker', text: '[...]' }, + { type: 'text', text: ' end' } + ]); + }); + + it('should handle text that is only a marker', () => { + const result = splitByMarkers('[unleserlich]'); + expect(result).toEqual([{ type: 'marker', text: '[unleserlich]' }]); + }); + + it('should handle empty string', () => { + const result = splitByMarkers(''); + expect(result).toEqual([]); + }); + + it('should not match other bracket markers', () => { + const result = splitByMarkers('text [Seitenumbruch] more'); + expect(result).toEqual([{ type: 'text', text: 'text [Seitenumbruch] more' }]); + }); + + it('should handle adjacent markers', () => { + const result = splitByMarkers('[unleserlich][...]'); + expect(result).toEqual([ + { type: 'marker', text: '[unleserlich]' }, + { type: 'marker', text: '[...]' } + ]); + }); +}); diff --git a/frontend/src/lib/utils/transcriptionMarkers.ts b/frontend/src/lib/utils/transcriptionMarkers.ts new file mode 100644 index 00000000..e1f4a26b --- /dev/null +++ b/frontend/src/lib/utils/transcriptionMarkers.ts @@ -0,0 +1,25 @@ +export type TextSegment = { type: 'text' | 'marker'; text: string }; + +const MARKER_PATTERN = /(\[unleserlich\]|\[\.{3}\])/g; + +export function splitByMarkers(input: string): TextSegment[] { + if (!input) return []; + + const segments: TextSegment[] = []; + let lastIndex = 0; + + for (const match of input.matchAll(MARKER_PATTERN)) { + const matchStart = match.index; + if (matchStart > lastIndex) { + segments.push({ type: 'text', text: input.slice(lastIndex, matchStart) }); + } + segments.push({ type: 'marker', text: match[0] }); + lastIndex = matchStart + match[0].length; + } + + if (lastIndex < input.length) { + segments.push({ type: 'text', text: input.slice(lastIndex) }); + } + + return segments; +}