restructure: flatten workspace nesting, move devcontainer to root

- backend/workspaces/backend/ → backend/ - backend/workspaces/frontend/ → frontend/ - backend/.devcontainer/ + .vscode/ → repo root (where VS Code expects them) - loose scripts/SQL files → scripts/ - replace nested git repo with single repo at project root - update docker-compose.yml build context and devcontainer.json path - add root .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-15 11:47:58 +01:00
parent 7e725090fe
commit e63adb964d
155 changed files with 650 additions and 29 deletions
--- a/scripts/generate_data.py
+++ b/scripts/generate_data.py
@@ -0,0 +1,74 @@
+import uuid
+import random
+import datetime
+
+# --- Configuration ---
+NUM_PERSONS = 50
+NUM_DOCUMENTS = 500
+OUTPUT_FILE = "large-data.sql"
+
+# --- Source Data ---
+FIRST_NAMES = ["Hans", "Helga", "Thomas", "Maria", "Otto", "Frieda", "Heinrich", "Anna", "Wilhelm", "Elisabeth", "Paul", "Gertrud", "Karl", "Martha", "Fritz", "Erna"]
+LAST_NAMES = ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Schulz", "Hoffmann", "Raddatz", "Koch", "Richter", "Klein"]
+CITIES = ["Berlin", "München", "Hamburg", "Köln", "Frankfurt", "Leipzig", "Dresden", "Breslau", "Königsberg", "Wien", "Stuttgart"]
+TITLES = ["Brief von", "Rechnung", "Postkarte aus", "Notiz an", "Dokument betreffend", "Urkunde für", "Foto von"]
+
+def random_date(start_year=1900, end_year=2000):
+    start = datetime.date(start_year, 1, 1)
+    end = datetime.date(end_year, 12, 31)
+    return start + datetime.timedelta(days=random.randint(0, (end - start).days))
+
+# --- Generation ---
+print(f"Generating {NUM_PERSONS} persons and {NUM_DOCUMENTS} documents...")
+
+persons = []
+sql_lines = []
+
+# 1. Generate Persons
+sql_lines.append("-- Persons")
+for _ in range(NUM_PERSONS):
+    p_id = str(uuid.uuid4())
+    fn = random.choice(FIRST_NAMES)
+    ln = random.choice(LAST_NAMES)
+    persons.append(p_id)
+    sql_lines.append(f"INSERT INTO persons (id, first_name, last_name, alias) VALUES ('{p_id}', '{fn}', '{ln}', NULL) ON CONFLICT DO NOTHING;")
+
+# 2. Generate Documents
+sql_lines.append("\n-- Documents")
+document_ids = []
+
+for _ in range(NUM_DOCUMENTS):
+    doc_id = str(uuid.uuid4())
+    document_ids.append(doc_id)
+
+    sender_id = random.choice(persons)
+    title_start = random.choice(TITLES)
+    date = random_date()
+    year = date.year
+    city = random.choice(CITIES)
+    title = f"{title_start} {city} {year}"
+
+    # Simple transcription text
+    transcription = f"Lieber Empfänger, dies ist ein Testdokument aus {city}, geschrieben am {date}. Das Wetter war schön."
+
+    sql = (
+        f"INSERT INTO documents (id, title, original_filename, file_path, status, meta_date, meta_location, transcription, sender_id, created_at, updated_at) "
+        f"VALUES ('{doc_id}', '{title}', 'scan_{year}_{random.randint(100,999)}.pdf', 'dummy/path.pdf', 'UPLOADED', '{date}', '{city}', '{transcription}', '{sender_id}', NOW(), NOW()) "
+        f"ON CONFLICT DO NOTHING;"
+    )
+    sql_lines.append(sql)
+
+# 3. Generate Receivers (Many-to-Many)
+sql_lines.append("\n-- Receivers")
+for doc_id in document_ids:
+    # 0 to 3 receivers per document
+    num_receivers = random.randint(0, 3)
+    receivers = random.sample(persons, num_receivers)
+    for rec_id in receivers:
+        sql_lines.append(f"INSERT INTO document_receivers (document_id, person_id) VALUES ('{doc_id}', '{rec_id}') ON CONFLICT DO NOTHING;")
+
+# --- Write to File ---
+with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
+    f.write("\n".join(sql_lines))
+
+print(f"Done! Created {OUTPUT_FILE}")