feat(util): add splitByMarkers for [unleserlich] and [...] text splitting
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
60
frontend/src/lib/utils/transcriptionMarkers.spec.ts
Normal file
60
frontend/src/lib/utils/transcriptionMarkers.spec.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { splitByMarkers } from './transcriptionMarkers';
|
||||
|
||||
describe('splitByMarkers', () => {
|
||||
it('should return single text segment for plain text', () => {
|
||||
const result = splitByMarkers('Hello world');
|
||||
expect(result).toEqual([{ type: 'text', text: 'Hello world' }]);
|
||||
});
|
||||
|
||||
it('should split [unleserlich] into a marker segment', () => {
|
||||
const result = splitByMarkers('before [unleserlich] after');
|
||||
expect(result).toEqual([
|
||||
{ type: 'text', text: 'before ' },
|
||||
{ type: 'marker', text: '[unleserlich]' },
|
||||
{ type: 'text', text: ' after' }
|
||||
]);
|
||||
});
|
||||
|
||||
it('should split [...] into a marker segment', () => {
|
||||
const result = splitByMarkers('some text [...] more text');
|
||||
expect(result).toEqual([
|
||||
{ type: 'text', text: 'some text ' },
|
||||
{ type: 'marker', text: '[...]' },
|
||||
{ type: 'text', text: ' more text' }
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle multiple markers in one string', () => {
|
||||
const result = splitByMarkers('[unleserlich] middle [...] end');
|
||||
expect(result).toEqual([
|
||||
{ type: 'marker', text: '[unleserlich]' },
|
||||
{ type: 'text', text: ' middle ' },
|
||||
{ type: 'marker', text: '[...]' },
|
||||
{ type: 'text', text: ' end' }
|
||||
]);
|
||||
});
|
||||
|
||||
it('should handle text that is only a marker', () => {
|
||||
const result = splitByMarkers('[unleserlich]');
|
||||
expect(result).toEqual([{ type: 'marker', text: '[unleserlich]' }]);
|
||||
});
|
||||
|
||||
it('should handle empty string', () => {
|
||||
const result = splitByMarkers('');
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('should not match other bracket markers', () => {
|
||||
const result = splitByMarkers('text [Seitenumbruch] more');
|
||||
expect(result).toEqual([{ type: 'text', text: 'text [Seitenumbruch] more' }]);
|
||||
});
|
||||
|
||||
it('should handle adjacent markers', () => {
|
||||
const result = splitByMarkers('[unleserlich][...]');
|
||||
expect(result).toEqual([
|
||||
{ type: 'marker', text: '[unleserlich]' },
|
||||
{ type: 'marker', text: '[...]' }
|
||||
]);
|
||||
});
|
||||
});
|
||||
25
frontend/src/lib/utils/transcriptionMarkers.ts
Normal file
25
frontend/src/lib/utils/transcriptionMarkers.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
export type TextSegment = { type: 'text' | 'marker'; text: string };
|
||||
|
||||
const MARKER_PATTERN = /(\[unleserlich\]|\[\.{3}\])/g;
|
||||
|
||||
export function splitByMarkers(input: string): TextSegment[] {
|
||||
if (!input) return [];
|
||||
|
||||
const segments: TextSegment[] = [];
|
||||
let lastIndex = 0;
|
||||
|
||||
for (const match of input.matchAll(MARKER_PATTERN)) {
|
||||
const matchStart = match.index;
|
||||
if (matchStart > lastIndex) {
|
||||
segments.push({ type: 'text', text: input.slice(lastIndex, matchStart) });
|
||||
}
|
||||
segments.push({ type: 'marker', text: match[0] });
|
||||
lastIndex = matchStart + match[0].length;
|
||||
}
|
||||
|
||||
if (lastIndex < input.length) {
|
||||
segments.push({ type: 'text', text: input.slice(lastIndex) });
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
Reference in New Issue
Block a user