- backend/workspaces/backend/ → backend/ - backend/workspaces/frontend/ → frontend/ - backend/.devcontainer/ + .vscode/ → repo root (where VS Code expects them) - loose scripts/SQL files → scripts/ - replace nested git repo with single repo at project root - update docker-compose.yml build context and devcontainer.json path - add root .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
74 lines
2.9 KiB
Python
74 lines
2.9 KiB
Python
import uuid
|
|
import random
|
|
import datetime
|
|
|
|
# --- Configuration ---
|
|
NUM_PERSONS = 50
|
|
NUM_DOCUMENTS = 500
|
|
OUTPUT_FILE = "large-data.sql"
|
|
|
|
# --- Source Data ---
|
|
FIRST_NAMES = ["Hans", "Helga", "Thomas", "Maria", "Otto", "Frieda", "Heinrich", "Anna", "Wilhelm", "Elisabeth", "Paul", "Gertrud", "Karl", "Martha", "Fritz", "Erna"]
|
|
LAST_NAMES = ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Schulz", "Hoffmann", "Raddatz", "Koch", "Richter", "Klein"]
|
|
CITIES = ["Berlin", "München", "Hamburg", "Köln", "Frankfurt", "Leipzig", "Dresden", "Breslau", "Königsberg", "Wien", "Stuttgart"]
|
|
TITLES = ["Brief von", "Rechnung", "Postkarte aus", "Notiz an", "Dokument betreffend", "Urkunde für", "Foto von"]
|
|
|
|
def random_date(start_year=1900, end_year=2000):
|
|
start = datetime.date(start_year, 1, 1)
|
|
end = datetime.date(end_year, 12, 31)
|
|
return start + datetime.timedelta(days=random.randint(0, (end - start).days))
|
|
|
|
# --- Generation ---
|
|
print(f"Generating {NUM_PERSONS} persons and {NUM_DOCUMENTS} documents...")
|
|
|
|
persons = []
|
|
sql_lines = []
|
|
|
|
# 1. Generate Persons
|
|
sql_lines.append("-- Persons")
|
|
for _ in range(NUM_PERSONS):
|
|
p_id = str(uuid.uuid4())
|
|
fn = random.choice(FIRST_NAMES)
|
|
ln = random.choice(LAST_NAMES)
|
|
persons.append(p_id)
|
|
sql_lines.append(f"INSERT INTO persons (id, first_name, last_name, alias) VALUES ('{p_id}', '{fn}', '{ln}', NULL) ON CONFLICT DO NOTHING;")
|
|
|
|
# 2. Generate Documents
|
|
sql_lines.append("\n-- Documents")
|
|
document_ids = []
|
|
|
|
for _ in range(NUM_DOCUMENTS):
|
|
doc_id = str(uuid.uuid4())
|
|
document_ids.append(doc_id)
|
|
|
|
sender_id = random.choice(persons)
|
|
title_start = random.choice(TITLES)
|
|
date = random_date()
|
|
year = date.year
|
|
city = random.choice(CITIES)
|
|
title = f"{title_start} {city} {year}"
|
|
|
|
# Simple transcription text
|
|
transcription = f"Lieber Empfänger, dies ist ein Testdokument aus {city}, geschrieben am {date}. Das Wetter war schön."
|
|
|
|
sql = (
|
|
f"INSERT INTO documents (id, title, original_filename, file_path, status, meta_date, meta_location, transcription, sender_id, created_at, updated_at) "
|
|
f"VALUES ('{doc_id}', '{title}', 'scan_{year}_{random.randint(100,999)}.pdf', 'dummy/path.pdf', 'UPLOADED', '{date}', '{city}', '{transcription}', '{sender_id}', NOW(), NOW()) "
|
|
f"ON CONFLICT DO NOTHING;"
|
|
)
|
|
sql_lines.append(sql)
|
|
|
|
# 3. Generate Receivers (Many-to-Many)
|
|
sql_lines.append("\n-- Receivers")
|
|
for doc_id in document_ids:
|
|
# 0 to 3 receivers per document
|
|
num_receivers = random.randint(0, 3)
|
|
receivers = random.sample(persons, num_receivers)
|
|
for rec_id in receivers:
|
|
sql_lines.append(f"INSERT INTO document_receivers (document_id, person_id) VALUES ('{doc_id}', '{rec_id}') ON CONFLICT DO NOTHING;")
|
|
|
|
# --- Write to File ---
|
|
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
|
|
f.write("\n".join(sql_lines))
|
|
|
|
print(f"Done! Created {OUTPUT_FILE}") |