refactor(importing): split DocumentImporter.buildDocument into named applyX helpers
buildDocument was a ~30-line method mixing attribution routing, date parsing, authoritative collection management, file metadata, and computed flags. Split into five named helpers — applyAttribution, applyDates, applyAuthoritativeAssociations, applyFileMetadata, applyComputedFlags — each doing one job. Pure refactor; all 43 existing DocumentImporterTest cases still pass. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -178,39 +178,61 @@ public class DocumentImporter {
|
|||||||
String s3Key, String contentType, DocumentStatus status) {
|
String s3Key, String contentType, DocumentStatus status) {
|
||||||
Document doc = existing != null ? existing
|
Document doc = existing != null ? existing
|
||||||
: Document.builder().originalFilename(index).build();
|
: Document.builder().originalFilename(index).build();
|
||||||
|
applyAttribution(doc, row);
|
||||||
|
applyDates(doc, row);
|
||||||
|
applyAuthoritativeAssociations(doc, row);
|
||||||
|
applyFileMetadata(doc, s3Key, contentType, status, index);
|
||||||
|
applyComputedFlags(doc);
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sender + raw sender/receiver text. The raw cells are always retained verbatim, even
|
||||||
|
// when a person is linked — the load-bearing invariant behind the merge story (ADR-025).
|
||||||
|
private void applyAttribution(Document doc, CanonicalSheetReader.Row row) {
|
||||||
String senderName = row.get("sender_name");
|
String senderName = row.get("sender_name");
|
||||||
String receiverNames = row.get("receiver_names");
|
String receiverNames = row.get("receiver_names");
|
||||||
Person sender = resolveSender(row.get("sender_person_id"), senderName);
|
Person sender = resolveSender(row.get("sender_person_id"), senderName);
|
||||||
|
doc.setSender(sender);
|
||||||
|
doc.setSenderText(blankToNull(senderName));
|
||||||
|
doc.setReceiverText(blankToNull(receiverNames));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Date triplet + raw + location. Pure value parsing, no semantic logic.
|
||||||
|
private void applyDates(Document doc, CanonicalSheetReader.Row row) {
|
||||||
|
doc.setDocumentDate(parseIsoDate(row.get("date_iso")));
|
||||||
|
doc.setMetaDatePrecision(parsePrecision(row.get("date_precision")));
|
||||||
|
doc.setMetaDateEnd(parseIsoDate(row.get("date_end")));
|
||||||
|
doc.setMetaDateRaw(blankToNull(row.get("date_raw")));
|
||||||
|
doc.setLocation(blankToNull(row.get("location")));
|
||||||
|
doc.setSummary(blankToNull(row.get("summary")));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Receivers and tags are owned by the canonical row (ADR-025): clear then re-populate so a
|
||||||
|
// shrunk set on re-import prunes stale links rather than accumulating them. The
|
||||||
|
// "preserve human edits" rule does NOT extend to these collections.
|
||||||
|
private void applyAuthoritativeAssociations(Document doc, CanonicalSheetReader.Row row) {
|
||||||
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"));
|
Set<Person> receivers = resolveReceivers(row.get("receiver_person_ids"));
|
||||||
|
doc.getReceivers().clear();
|
||||||
|
doc.getReceivers().addAll(receivers);
|
||||||
|
attachTag(doc, row.get("tags"));
|
||||||
|
}
|
||||||
|
|
||||||
LocalDate date = parseIsoDate(row.get("date_iso"));
|
// S3 key, content type, status, and the index-derived title.
|
||||||
DatePrecision precision = parsePrecision(row.get("date_precision"));
|
private void applyFileMetadata(Document doc, String s3Key, String contentType,
|
||||||
LocalDate dateEnd = parseIsoDate(row.get("date_end"));
|
DocumentStatus status, String index) {
|
||||||
String dateRaw = blankToNull(row.get("date_raw"));
|
|
||||||
String location = blankToNull(row.get("location"));
|
|
||||||
|
|
||||||
doc.setTitle(buildTitle(index, date, precision, dateEnd, dateRaw, location));
|
|
||||||
doc.setStatus(status);
|
doc.setStatus(status);
|
||||||
doc.setFilePath(s3Key);
|
doc.setFilePath(s3Key);
|
||||||
doc.setContentType(contentType);
|
doc.setContentType(contentType);
|
||||||
doc.setSender(sender);
|
doc.setTitle(buildTitle(index, doc.getDocumentDate(), doc.getMetaDatePrecision(),
|
||||||
doc.setSenderText(blankToNull(senderName));
|
doc.getMetaDateEnd(), doc.getMetaDateRaw(), doc.getLocation()));
|
||||||
// The canonical row is authoritative for receivers/tags (ADR-025): clear then
|
}
|
||||||
// re-populate so a shrunk set on re-import prunes stale links rather than
|
|
||||||
// accumulating them. The raw sender_text/receiver_text retention is separate.
|
// metadataComplete: a document counts as fully described if any of the three "who/when"
|
||||||
doc.getReceivers().clear();
|
// pieces is filled. Called last so the upstream setters have already populated the doc.
|
||||||
doc.getReceivers().addAll(receivers);
|
private void applyComputedFlags(Document doc) {
|
||||||
doc.setReceiverText(blankToNull(receiverNames));
|
doc.setMetadataComplete(doc.getDocumentDate() != null
|
||||||
doc.setDocumentDate(date);
|
|| doc.getSender() != null
|
||||||
doc.setMetaDatePrecision(precision);
|
|| !doc.getReceivers().isEmpty());
|
||||||
doc.setMetaDateEnd(dateEnd);
|
|
||||||
doc.setMetaDateRaw(dateRaw);
|
|
||||||
doc.setLocation(location);
|
|
||||||
doc.setSummary(blankToNull(row.get("summary")));
|
|
||||||
attachTag(doc, row.get("tags"));
|
|
||||||
doc.setMetadataComplete(doc.getDocumentDate() != null || sender != null || !receivers.isEmpty());
|
|
||||||
return doc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The title carries the date at the HONEST precision (never a fabricated day) via the
|
// The title carries the date at the HONEST precision (never a fabricated day) via the
|
||||||
|
|||||||
Reference in New Issue
Block a user