refactor(importing): split DocumentImporter.buildDocument into named applyX helpers
buildDocument was a ~30-line method mixing attribution routing, date parsing, authoritative collection management, file metadata, and computed flags. Split into five named helpers — applyAttribution, applyDates, applyAuthoritativeAssociations, applyFileMetadata, applyComputedFlags — each doing one job. Pure refactor; all 43 existing DocumentImporterTest cases still pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -178,39 +178,61 @@ public class DocumentImporter {
|
||||
String s3Key, String contentType, DocumentStatus status) {
|
||||
Document doc = existing != null ? existing
|
||||
: Document.builder().originalFilename(index).build();
|
||||
applyAttribution(doc, row);
|
||||
applyDates(doc, row);
|
||||
applyAuthoritativeAssociations(doc, row);
|
||||
applyFileMetadata(doc, s3Key, contentType, status, index);
|
||||
applyComputedFlags(doc);
|
||||
return doc;
|
||||
}
|
||||
|
||||
// Sender + raw sender/receiver text. The raw cells are always retained verbatim, even
|
||||
// when a person is linked — the load-bearing invariant behind the merge story (ADR-025).
|
||||
private void applyAttribution(Document doc, CanonicalSheetReader.Row row) {
|
||||
String senderName = row.get("sender_name");
|
||||
String receiverNames = row.get("receiver_names");
|
||||
Person sender = resolveSender(row.get("sender_person_id"), senderName);
|
||||
doc.setSender(sender);
|
||||
doc.setSenderText(blankToNull(senderName));
|
||||
doc.setReceiverText(blankToNull(receiverNames));
|
||||
}
|
||||
|
||||
// Date triplet + raw + location. Pure value parsing, no semantic logic.
|
||||
private void applyDates(Document doc, CanonicalSheetReader.Row row) {
|
||||
doc.setDocumentDate(parseIsoDate(row.get("date_iso")));
|
||||
doc.setMetaDatePrecision(parsePrecision(row.get("date_precision")));
|
||||
doc.setMetaDateEnd(parseIsoDate(row.get("date_end")));
|
||||
doc.setMetaDateRaw(blankToNull(row.get("date_raw")));
|
||||
doc.setLocation(blankToNull(row.get("location")));
|
||||
doc.setSummary(blankToNull(row.get("summary")));
|
||||
}
|
||||
|
||||
// Receivers and tags are owned by the canonical row (ADR-025): clear then re-populate so a
|
||||
// shrunk set on re-import prunes stale links rather than accumulating them. The
|
||||
// "preserve human edits" rule does NOT extend to these collections.
|
||||
private void applyAuthoritativeAssociations(Document doc, CanonicalSheetReader.Row row) {
|
||||
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"));
|
||||
doc.getReceivers().clear();
|
||||
doc.getReceivers().addAll(receivers);
|
||||
attachTag(doc, row.get("tags"));
|
||||
}
|
||||
|
||||
LocalDate date = parseIsoDate(row.get("date_iso"));
|
||||
DatePrecision precision = parsePrecision(row.get("date_precision"));
|
||||
LocalDate dateEnd = parseIsoDate(row.get("date_end"));
|
||||
String dateRaw = blankToNull(row.get("date_raw"));
|
||||
String location = blankToNull(row.get("location"));
|
||||
|
||||
doc.setTitle(buildTitle(index, date, precision, dateEnd, dateRaw, location));
|
||||
// S3 key, content type, status, and the index-derived title.
|
||||
private void applyFileMetadata(Document doc, String s3Key, String contentType,
|
||||
DocumentStatus status, String index) {
|
||||
doc.setStatus(status);
|
||||
doc.setFilePath(s3Key);
|
||||
doc.setContentType(contentType);
|
||||
doc.setSender(sender);
|
||||
doc.setSenderText(blankToNull(senderName));
|
||||
// The canonical row is authoritative for receivers/tags (ADR-025): clear then
|
||||
// re-populate so a shrunk set on re-import prunes stale links rather than
|
||||
// accumulating them. The raw sender_text/receiver_text retention is separate.
|
||||
doc.getReceivers().clear();
|
||||
doc.getReceivers().addAll(receivers);
|
||||
doc.setReceiverText(blankToNull(receiverNames));
|
||||
doc.setDocumentDate(date);
|
||||
doc.setMetaDatePrecision(precision);
|
||||
doc.setMetaDateEnd(dateEnd);
|
||||
doc.setMetaDateRaw(dateRaw);
|
||||
doc.setLocation(location);
|
||||
doc.setSummary(blankToNull(row.get("summary")));
|
||||
attachTag(doc, row.get("tags"));
|
||||
doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty());
|
||||
return doc;
|
||||
doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(),
|
||||
doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation()));
|
||||
}
|
||||
|
||||
// metadataComplete: a document counts as fully described if any of the three "who/when"
|
||||
// pieces is filled. Called last so the upstream setters have already populated the doc.
|
||||
private void applyComputedFlags(Document doc) {
|
||||
doc.setMetadataComplete(doc.getDocumentDate() != null
|
||||
|| doc.getSender() != null
|
||||
|| !doc.getReceivers().isEmpty());
|
||||
}
|
||||
|
||||
// The title carries the date at the HONEST precision (never a fabricated day) via the
|
||||
|
||||
Reference in New Issue
Block a user