refactor: move document domain core to lib/document/
Moves ~25 components, utils (search, filename, groupDocuments, documentStatusLabel, validateFile), bulkSelection store, and TranscriptionSection sub-component. Fixes broken relative imports. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
87
frontend/src/lib/document/filename.ts
Normal file
87
frontend/src/lib/document/filename.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
import { isoToGerman } from '$lib/utils/date';
|
||||
|
||||
export interface FilenameParseResult {
|
||||
/** ISO format: YYYY-MM-DD */
|
||||
dateIso?: string;
|
||||
/** "Firstname Lastname" — order reversed from filename convention */
|
||||
personName?: string;
|
||||
/** Ready-to-use title, e.g. "Hans Mueller (12.03.1965)" */
|
||||
suggestedTitle?: string;
|
||||
}
|
||||
|
||||
// A date token is either YYYY-MM-DD or YYYYMMDD with a plausible month/day range.
|
||||
function tryParseDate(s: string): string | undefined {
|
||||
if (/^\d{4}-\d{2}-\d{2}$/.test(s)) {
|
||||
const m = parseInt(s.slice(5, 7));
|
||||
const d = parseInt(s.slice(8, 10));
|
||||
if (m >= 1 && m <= 12 && d >= 1 && d <= 31) return s;
|
||||
} else if (/^\d{8}$/.test(s)) {
|
||||
const m = parseInt(s.slice(4, 6));
|
||||
const d = parseInt(s.slice(6, 8));
|
||||
if (m >= 1 && m <= 12 && d >= 1 && d <= 31)
|
||||
return `${s.slice(0, 4)}-${s.slice(4, 6)}-${s.slice(6, 8)}`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const NAME_PART = /^\p{L}+$/u;
|
||||
|
||||
/**
|
||||
* Parses a structured filename and extracts a date and person name.
|
||||
*
|
||||
* Supported conventions (date-first or date-last, compound last names supported):
|
||||
* YYYY-MM-DD_Lastname_Firstname.ext
|
||||
* YYYYMMDD_Lastname_Firstname.ext
|
||||
* YYYYMMDD_de_Gruyter_Walter.ext ← compound last name: lastName="de Gruyter"
|
||||
* Lastname_Firstname_YYYY-MM-DD.ext
|
||||
* Lastname_Firstname_YYYYMMDD.ext
|
||||
* de_Gruyter_Walter_YYYYMMDD.ext ← compound last name: lastName="de Gruyter"
|
||||
*
|
||||
* Algorithm: split on "_", identify the date token (first or last segment),
|
||||
* treat the outermost remaining segment as firstName, rest as lastName parts.
|
||||
* Returns {} for anything that doesn't match cleanly.
|
||||
*/
|
||||
export function parseFilename(filename: string): FilenameParseResult {
|
||||
const dot = filename.lastIndexOf('.');
|
||||
if (dot < 0) return {}; // no extension — not a real file
|
||||
const stem = filename.slice(0, dot);
|
||||
const parts = stem.split('_');
|
||||
|
||||
// Minimum: date + at least one lastName segment + firstName = 3 parts
|
||||
if (parts.length < 3) return {};
|
||||
|
||||
let dateIso: string;
|
||||
let nameParts: string[];
|
||||
|
||||
const dateFromFirst = tryParseDate(parts[0]);
|
||||
if (dateFromFirst) {
|
||||
dateIso = dateFromFirst;
|
||||
nameParts = parts.slice(1);
|
||||
} else {
|
||||
const dateFromLast = tryParseDate(parts[parts.length - 1]);
|
||||
if (!dateFromLast) return {};
|
||||
dateIso = dateFromLast;
|
||||
nameParts = parts.slice(0, -1);
|
||||
}
|
||||
|
||||
// Need at least lastName + firstName after removing the date
|
||||
if (nameParts.length < 2) return {};
|
||||
|
||||
// All name segments must be pure letters (covers umlauts via \p{L})
|
||||
if (!nameParts.every((p) => NAME_PART.test(p))) return {};
|
||||
|
||||
const firstName = nameParts[nameParts.length - 1];
|
||||
const lastName = nameParts.slice(0, -1).join(' ');
|
||||
const personName = `${firstName} ${lastName}`;
|
||||
const suggestedTitle = `${personName} (${isoToGerman(dateIso)})`;
|
||||
|
||||
return { dateIso, personName, suggestedTitle };
|
||||
}
|
||||
|
||||
export function stripExtension(filename: string): string {
|
||||
return filename.replace(/\.[^/.]+$/, '');
|
||||
}
|
||||
|
||||
export function bulkTitleFromFilename(filename: string): string {
|
||||
return stripExtension(filename).replace(/[_-]+/g, ' ').trim();
|
||||
}
|
||||
Reference in New Issue
Block a user