import uuid import random import datetime # --- Configuration --- NUM_PERSONS = 50 NUM_DOCUMENTS = 500 OUTPUT_FILE = "large-data.sql" # --- Source Data --- FIRST_NAMES = ["Hans", "Helga", "Thomas", "Maria", "Otto", "Frieda", "Heinrich", "Anna", "Wilhelm", "Elisabeth", "Paul", "Gertrud", "Karl", "Martha", "Fritz", "Erna"] LAST_NAMES = ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Schulz", "Hoffmann", "Raddatz", "Koch", "Richter", "Klein"] CITIES = ["Berlin", "München", "Hamburg", "Köln", "Frankfurt", "Leipzig", "Dresden", "Breslau", "Königsberg", "Wien", "Stuttgart"] TITLES = ["Brief von", "Rechnung", "Postkarte aus", "Notiz an", "Dokument betreffend", "Urkunde für", "Foto von"] def random_date(start_year=1900, end_year=2000): start = datetime.date(start_year, 1, 1) end = datetime.date(end_year, 12, 31) return start + datetime.timedelta(days=random.randint(0, (end - start).days)) # --- Generation --- print(f"Generating {NUM_PERSONS} persons and {NUM_DOCUMENTS} documents...") persons = [] sql_lines = [] # 1. Generate Persons sql_lines.append("-- Persons") for _ in range(NUM_PERSONS): p_id = str(uuid.uuid4()) fn = random.choice(FIRST_NAMES) ln = random.choice(LAST_NAMES) persons.append(p_id) sql_lines.append(f"INSERT INTO persons (id, first_name, last_name, alias) VALUES ('{p_id}', '{fn}', '{ln}', NULL) ON CONFLICT DO NOTHING;") # 2. Generate Documents sql_lines.append("\n-- Documents") document_ids = [] for _ in range(NUM_DOCUMENTS): doc_id = str(uuid.uuid4()) document_ids.append(doc_id) sender_id = random.choice(persons) title_start = random.choice(TITLES) date = random_date() year = date.year city = random.choice(CITIES) title = f"{title_start} {city} {year}" # Simple transcription text transcription = f"Lieber Empfänger, dies ist ein Testdokument aus {city}, geschrieben am {date}. Das Wetter war schön." sql = ( f"INSERT INTO documents (id, title, original_filename, file_path, status, meta_date, meta_location, transcription, sender_id, created_at, updated_at) " f"VALUES ('{doc_id}', '{title}', 'scan_{year}_{random.randint(100,999)}.pdf', 'dummy/path.pdf', 'UPLOADED', '{date}', '{city}', '{transcription}', '{sender_id}', NOW(), NOW()) " f"ON CONFLICT DO NOTHING;" ) sql_lines.append(sql) # 3. Generate Receivers (Many-to-Many) sql_lines.append("\n-- Receivers") for doc_id in document_ids: # 0 to 3 receivers per document num_receivers = random.randint(0, 3) receivers = random.sample(persons, num_receivers) for rec_id in receivers: sql_lines.append(f"INSERT INTO document_receivers (document_id, person_id) VALUES ('{doc_id}', '{rec_id}') ON CONFLICT DO NOTHING;") # --- Write to File --- with open(OUTPUT_FILE, "w", encoding="utf-8") as f: f.write("\n".join(sql_lines)) print(f"Done! Created {OUTPUT_FILE}")