From e93b09f1e23c9989b5b1a377f38314d691f1a8c8 Mon Sep 17 00:00:00 2001 From: Marcel Date: Thu, 28 May 2026 10:23:24 +0200 Subject: [PATCH] refactor(importing): split DocumentImporter.buildDocument into named applyX helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildDocument was a ~30-line method mixing attribution routing, date parsing, authoritative collection management, file metadata, and computed flags. Split into five named helpers — applyAttribution, applyDates, applyAuthoritativeAssociations, applyFileMetadata, applyComputedFlags — each doing one job. Pure refactor; all 43 existing DocumentImporterTest cases still pass. Co-Authored-By: Claude Opus 4.7 --- .../importing/DocumentImporter.java | 70 ++++++++++++------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java index ab693920..5e6e8a6f 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/DocumentImporter.java @@ -178,39 +178,61 @@ public class DocumentImporter { String s3Key, String contentType, DocumentStatus status) { Document doc = existing != null ? existing : Document.builder().originalFilename(index).build(); + applyAttribution(doc, row); + applyDates(doc, row); + applyAuthoritativeAssociations(doc, row); + applyFileMetadata(doc, s3Key, contentType, status, index); + applyComputedFlags(doc); + return doc; + } + // Sender + raw sender/receiver text. The raw cells are always retained verbatim, even + // when a person is linked — the load-bearing invariant behind the merge story (ADR-025). + private void applyAttribution(Document doc, CanonicalSheetReader.Row row) { String senderName = row.get("sender_name"); String receiverNames = row.get("receiver_names"); Person sender = resolveSender(row.get("sender_person_id"), senderName); + doc.setSender(sender); + doc.setSenderText(blankToNull(senderName)); + doc.setReceiverText(blankToNull(receiverNames)); + } + + // Date triplet + raw + location. Pure value parsing, no semantic logic. + private void applyDates(Document doc, CanonicalSheetReader.Row row) { + doc.setDocumentDate(parseIsoDate(row.get("date_iso"))); + doc.setMetaDatePrecision(parsePrecision(row.get("date_precision"))); + doc.setMetaDateEnd(parseIsoDate(row.get("date_end"))); + doc.setMetaDateRaw(blankToNull(row.get("date_raw"))); + doc.setLocation(blankToNull(row.get("location"))); + doc.setSummary(blankToNull(row.get("summary"))); + } + + // Receivers and tags are owned by the canonical row (ADR-025): clear then re-populate so a + // shrunk set on re-import prunes stale links rather than accumulating them. The + // "preserve human edits" rule does NOT extend to these collections. + private void applyAuthoritativeAssociations(Document doc, CanonicalSheetReader.Row row) { Set receivers = resolveReceivers(row.get("receiver_person_ids")); + doc.getReceivers().clear(); + doc.getReceivers().addAll(receivers); + attachTag(doc, row.get("tags")); + } - LocalDate date = parseIsoDate(row.get("date_iso")); - DatePrecision precision = parsePrecision(row.get("date_precision")); - LocalDate dateEnd = parseIsoDate(row.get("date_end")); - String dateRaw = blankToNull(row.get("date_raw")); - String location = blankToNull(row.get("location")); - - doc.setTitle(buildTitle(index, date, precision, dateEnd, dateRaw, location)); + // S3 key, content type, status, and the index-derived title. + private void applyFileMetadata(Document doc, String s3Key, String contentType, + DocumentStatus status, String index) { doc.setStatus(status); doc.setFilePath(s3Key); doc.setContentType(contentType); - doc.setSender(sender); - doc.setSenderText(blankToNull(senderName)); - // The canonical row is authoritative for receivers/tags (ADR-025): clear then - // re-populate so a shrunk set on re-import prunes stale links rather than - // accumulating them. The raw sender_text/receiver_text retention is separate. - doc.getReceivers().clear(); - doc.getReceivers().addAll(receivers); - doc.setReceiverText(blankToNull(receiverNames)); - doc.setDocumentDate(date); - doc.setMetaDatePrecision(precision); - doc.setMetaDateEnd(dateEnd); - doc.setMetaDateRaw(dateRaw); - doc.setLocation(location); - doc.setSummary(blankToNull(row.get("summary"))); - attachTag(doc, row.get("tags")); - doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty()); - return doc; + doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(), + doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation())); + } + + // metadataComplete: a document counts as fully described if any of the three "who/when" + // pieces is filled. Called last so the upstream setters have already populated the doc. + private void applyComputedFlags(Document doc) { + doc.setMetadataComplete(doc.getDocumentDate() != null + || doc.getSender() != null + || !doc.getReceivers().isEmpty()); } // The title carries the date at the HONEST precision (never a fabricated day) via the