feat(normalizer): person resolution context + to_canonical
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum, auto
|
||||
|
||||
import dates as _dates
|
||||
|
||||
|
||||
class Triage(Enum):
|
||||
OK = auto()
|
||||
@@ -84,3 +86,33 @@ def index_file_mismatch(index: str, file_path: str) -> bool:
|
||||
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1]
|
||||
stem = basename.rsplit(".", 1)[0]
|
||||
return stem != index
|
||||
|
||||
|
||||
def to_canonical(raw, ctx, date_overrides: dict) -> CanonicalDocument:
|
||||
pd = _dates.parse_date(raw.date, date_overrides)
|
||||
flags = []
|
||||
|
||||
sender_id, sender_name, sender_matched, sender_multi = ctx.resolve_sender(raw.sender, raw.source_row)
|
||||
if raw.sender.strip() and not sender_matched:
|
||||
flags.append("unmatched_sender")
|
||||
if sender_multi:
|
||||
flags.append("multi_sender")
|
||||
|
||||
receivers = ctx.resolve_receivers(raw.receivers, raw.source_row)
|
||||
if any(not matched for _, _, matched in receivers):
|
||||
flags.append("unmatched_receiver")
|
||||
|
||||
if raw.date.strip() and pd.precision == _dates.Precision.UNKNOWN:
|
||||
flags.append("unparsed_date")
|
||||
if index_file_mismatch(raw.index, raw.file):
|
||||
flags.append("index_file_mismatch")
|
||||
|
||||
return CanonicalDocument(
|
||||
index=raw.index, box=raw.box, folder=raw.folder,
|
||||
sender_person_id=sender_id, sender_name=sender_name,
|
||||
receiver_person_ids=[r[0] for r in receivers],
|
||||
receiver_names=[r[1] for r in receivers],
|
||||
date_iso=pd.iso or "", date_raw=raw.date, date_precision=str(pd.precision),
|
||||
location=raw.location, tags=[raw.tags] if raw.tags else [], summary=raw.summary,
|
||||
source_row=raw.source_row, needs_review=flags,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user