feat(filename): add parseFilename utility with full-pattern-only matching
Supports four patterns: date_lastname_firstname and lastname_firstname_date,
both with ISO (YYYY-MM-DD) and compact (YYYYMMDD) date formats.
Returns dateIso, personName and a formatted suggestedTitle.
Partial matches are rejected — unrecognised filenames return {}.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
85
frontend/src/lib/utils/filename.spec.ts
Normal file
85
frontend/src/lib/utils/filename.spec.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { parseFilename, stripExtension } from './filename';
|
||||
|
||||
describe('parseFilename', () => {
|
||||
describe('YYYY-MM-DD_Lastname_Firstname pattern', () => {
|
||||
it('extracts date and name', () => {
|
||||
expect(parseFilename('1965-03-12_Mueller_Hans.pdf')).toEqual({
|
||||
dateIso: '1965-03-12',
|
||||
personName: 'Hans Mueller',
|
||||
suggestedTitle: 'Hans Mueller (12.03.1965)'
|
||||
});
|
||||
});
|
||||
|
||||
it('handles umlauts in names', () => {
|
||||
const result = parseFilename('2024-01-15_Müller_Jürgen.pdf');
|
||||
expect(result.personName).toBe('Jürgen Müller');
|
||||
});
|
||||
});
|
||||
|
||||
describe('YYYYMMDD_Lastname_Firstname pattern', () => {
|
||||
it('extracts date and name', () => {
|
||||
expect(parseFilename('19650312_Mueller_Hans.pdf')).toEqual({
|
||||
dateIso: '1965-03-12',
|
||||
personName: 'Hans Mueller',
|
||||
suggestedTitle: 'Hans Mueller (12.03.1965)'
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Lastname_Firstname_YYYY-MM-DD pattern', () => {
|
||||
it('extracts date and name', () => {
|
||||
expect(parseFilename('Mueller_Hans_1965-03-12.pdf')).toEqual({
|
||||
dateIso: '1965-03-12',
|
||||
personName: 'Hans Mueller',
|
||||
suggestedTitle: 'Hans Mueller (12.03.1965)'
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Lastname_Firstname_YYYYMMDD pattern', () => {
|
||||
it('extracts date and name', () => {
|
||||
expect(parseFilename('Mueller_Hans_19650312.pdf')).toEqual({
|
||||
dateIso: '1965-03-12',
|
||||
personName: 'Hans Mueller',
|
||||
suggestedTitle: 'Hans Mueller (12.03.1965)'
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('non-matching filenames', () => {
|
||||
it('returns empty for date-only filename', () => {
|
||||
expect(parseFilename('1965-03-12.pdf')).toEqual({});
|
||||
});
|
||||
|
||||
it('returns empty for name-only filename', () => {
|
||||
expect(parseFilename('Mueller_Hans.pdf')).toEqual({});
|
||||
});
|
||||
|
||||
it('returns empty for unstructured filename', () => {
|
||||
expect(parseFilename('scan_001.pdf')).toEqual({});
|
||||
});
|
||||
|
||||
it('returns empty for three name segments without date', () => {
|
||||
expect(parseFilename('Mueller_Hans_Juergen.pdf')).toEqual({});
|
||||
});
|
||||
|
||||
it('returns empty for filename without extension', () => {
|
||||
expect(parseFilename('1965-03-12_Mueller_Hans')).toEqual({});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripExtension', () => {
|
||||
it('removes the extension', () => {
|
||||
expect(stripExtension('document.pdf')).toBe('document');
|
||||
});
|
||||
|
||||
it('removes only the last extension', () => {
|
||||
expect(stripExtension('archive.tar.gz')).toBe('archive.tar');
|
||||
});
|
||||
|
||||
it('leaves names without extension unchanged', () => {
|
||||
expect(stripExtension('nodotfile')).toBe('nodotfile');
|
||||
});
|
||||
});
|
||||
56
frontend/src/lib/utils/filename.ts
Normal file
56
frontend/src/lib/utils/filename.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { isoToGerman } from './date';
|
||||
|
||||
export interface FilenameParseResult {
|
||||
/** ISO format: YYYY-MM-DD */
|
||||
dateIso?: string;
|
||||
/** "Firstname Lastname" — order reversed from filename convention */
|
||||
personName?: string;
|
||||
/** Ready-to-use title, e.g. "Hans Mueller (12.03.1965)" */
|
||||
suggestedTitle?: string;
|
||||
}
|
||||
|
||||
// Full-match patterns only. Name segments use Unicode letters (\p{L}) to cover umlauts etc.
|
||||
// Order: date_lastname_firstname
|
||||
const P_DATE_ISO_NAME = /^(\d{4}-\d{2}-\d{2})_(\p{L}+)_(\p{L}+)\.[^.]+$/u;
|
||||
const P_DATE_COMPACT_NAME = /^(\d{8})_(\p{L}+)_(\p{L}+)\.[^.]+$/u;
|
||||
// Order: lastname_firstname_date
|
||||
const P_NAME_DATE_ISO = /^(\p{L}+)_(\p{L}+)_(\d{4}-\d{2}-\d{2})\.[^.]+$/u;
|
||||
const P_NAME_DATE_COMPACT = /^(\p{L}+)_(\p{L}+)_(\d{8})\.[^.]+$/u;
|
||||
|
||||
function compactToIso(compact: string): string {
|
||||
return `${compact.slice(0, 4)}-${compact.slice(4, 6)}-${compact.slice(6, 8)}`;
|
||||
}
|
||||
|
||||
export function parseFilename(filename: string): FilenameParseResult {
|
||||
let dateIso: string;
|
||||
let lastName: string;
|
||||
let firstName: string;
|
||||
|
||||
let m: RegExpMatchArray | null;
|
||||
|
||||
if ((m = P_DATE_ISO_NAME.exec(filename))) {
|
||||
[, dateIso, lastName, firstName] = m;
|
||||
} else if ((m = P_DATE_COMPACT_NAME.exec(filename))) {
|
||||
dateIso = compactToIso(m[1]);
|
||||
lastName = m[2];
|
||||
firstName = m[3];
|
||||
} else if ((m = P_NAME_DATE_ISO.exec(filename))) {
|
||||
lastName = m[1];
|
||||
firstName = m[2];
|
||||
dateIso = m[3];
|
||||
} else if ((m = P_NAME_DATE_COMPACT.exec(filename))) {
|
||||
lastName = m[1];
|
||||
firstName = m[2];
|
||||
dateIso = compactToIso(m[3]);
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
|
||||
const personName = `${firstName} ${lastName}`;
|
||||
const suggestedTitle = `${personName} (${isoToGerman(dateIso!)})`;
|
||||
return { dateIso: dateIso!, personName, suggestedTitle };
|
||||
}
|
||||
|
||||
export function stripExtension(filename: string): string {
|
||||
return filename.replace(/\.[^/.]+$/, '');
|
||||
}
|
||||
Reference in New Issue
Block a user