From 25a39fca9ce1e740c5e962e5e2aaeba659f4a6fc Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 17 May 2026 14:48:03 +0200 Subject: [PATCH 1/3] security(import): harden DocumentBuilderFactory against XXE in MassImportService Extract XxeSafeXmlParser with all 6 OWASP-recommended features (disallow-doctype-decl, external-general-entities, external-parameter-entities, load-external-dtd, XInclude, expandEntityReferences). Make readOds() package-private; add failing-then-passing regression test and valid-ODS guard test. POI 5.5.0 does not mitigate this: the vulnerable parser is a custom DocumentBuilderFactory call in readOds(), not inside POI's internal ODS reader. The hardening is defence-in-depth, not redundant with POI defaults. Closes #528 Co-Authored-By: Claude Sonnet 4.6 --- .../importing/MassImportService.java | 4 +- .../importing/XxeSafeXmlParser.java | 20 ++++++ .../importing/MassImportServiceTest.java | 67 +++++++++++++++++++ 3 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java index ea648870..d6453e19 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java @@ -168,14 +168,14 @@ public class MassImportService { * Reads an ODS file by parsing its content.xml directly (no extra library needed). * ODS is a ZIP archive; content.xml holds the spreadsheet data as XML. */ - private List> readOds(File file) throws Exception { + List> readOds(File file) throws Exception { List> result = new ArrayList<>(); try (ZipFile zip = new ZipFile(file)) { var entry = zip.getEntry("content.xml"); if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt"); - var factory = DocumentBuilderFactory.newInstance(); + var factory = XxeSafeXmlParser.hardenedFactory(); factory.setNamespaceAware(true); var builder = factory.newDocumentBuilder(); var doc = builder.parse(zip.getInputStream(entry)); diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java new file mode 100644 index 00000000..949ea054 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java @@ -0,0 +1,20 @@ +package org.raddatz.familienarchiv.importing; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +class XxeSafeXmlParser { + + private XxeSafeXmlParser() {} + + static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException { + var factory = DocumentBuilderFactory.newInstance(); + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); + return factory; + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index 845aaf3b..dfd63607 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -21,9 +21,12 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.xml.sax.SAXParseException; import java.io.File; import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; @@ -32,6 +35,8 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -520,6 +525,25 @@ class MassImportServiceTest { assertThat(result).isEqualTo("hello"); } + // ─── readOds — XXE security regression ─────────────────────────────────── + + // Security regression — do not remove. Introduced by issue #528. + @Test + void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception { + File malicious = buildXxeOds(tempDir, "file:///etc/hostname"); + assertThatThrownBy(() -> service.readOds(malicious)) + .isInstanceOf(SAXParseException.class) + .hasMessageContaining("DOCTYPE is disallowed"); + } + + @Test + void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception { + File valid = buildValidOds(tempDir, "Mustermann"); + List> rows = service.readOds(valid); + assertThat(rows).isNotEmpty(); + assertThat(rows.get(0)).contains("Mustermann"); + } + // ─── helpers ────────────────────────────────────────────────────────────── /** @@ -553,4 +577,47 @@ class MassImportServiceTest { "" // 13: transcription ); } + + /** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */ + private File buildXxeOds(Path dir, String entityTarget) throws Exception { + String xml = "" + + "]>" + + "" + + "" + + "" + + "&xxe;" + + "" + + "" + + ""; + return writeOdsZip(dir.resolve("malicious.ods"), xml); + } + + /** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value. */ + private File buildValidOds(Path dir, String cellValue) throws Exception { + String xml = "" + + "" + + "" + + "" + + "" + cellValue + "" + + "" + + "" + + ""; + return writeOdsZip(dir.resolve("valid.ods"), xml); + } + + private File writeOdsZip(Path destination, String contentXml) throws Exception { + try (OutputStream fos = Files.newOutputStream(destination); + ZipOutputStream zip = new ZipOutputStream(fos)) { + zip.putNextEntry(new ZipEntry("content.xml")); + zip.write(contentXml.getBytes(StandardCharsets.UTF_8)); + zip.closeEntry(); + } + return destination.toFile(); + } } -- 2.49.1 From f15ea031d18915793f252cf2d8eab0e9be916e41 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 17 May 2026 14:48:46 +0200 Subject: [PATCH 2/3] ci(security): add Semgrep XXE rule and CI scan job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add .semgrep/security.yml with rules for DocumentBuilderFactory, SAXParserFactory, and XMLInputFactory without XXE hardening (CWE-611). Add semgrep-scan CI job — runs in parallel with backend-unit-tests, local rules only, --error flag fails the build on any match. Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/ci.yml | 20 +++++++++++++++++ .semgrep/security.yml | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 .semgrep/security.yml diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index bd4a6cac..2877e9b3 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -276,6 +276,26 @@ jobs: echo "$dump" | grep -qE "\['add', 'familienarchiv-auth', 'polling'\]" \ || { echo "FAIL: familienarchiv-auth jail did not resolve to 'polling' backend"; exit 1; } + # ─── Semgrep Security Scan ─────────────────────────────────────────────────── + # Catches XXE-unprotected XML parser factories and similar patterns defined in + # .semgrep/security.yml. Runs in parallel with backend-unit-tests for fast feedback. + # Uses local rules only (no SEMGREP_APP_TOKEN / OIDC — act_runner does not support it). + semgrep-scan: + name: Semgrep Security Scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Semgrep + run: pip install semgrep + + - name: Run security rules + run: semgrep --config .semgrep/security.yml --error --metrics=off backend/src/ + # ─── Compose Bucket-Bootstrap Idempotency ───────────────────────────────────── # docker-compose.prod.yml's create-buckets service runs on every # `docker compose up` (one-shot, no restart). Must be idempotent — a diff --git a/.semgrep/security.yml b/.semgrep/security.yml new file mode 100644 index 00000000..da787175 --- /dev/null +++ b/.semgrep/security.yml @@ -0,0 +1,49 @@ +# Semgrep security rules for Familienarchiv backend. +# These rules catch the absence of XXE protection on XML parser factories. +# CWE-611: Improper Restriction of XML External Entity Reference. +# Run: semgrep --config .semgrep/security.yml --error backend/src/ + +rules: + + # DocumentBuilderFactory without XXE hardening. + # All call sites must call setFeature("…disallow-doctype-decl", true) before use. + - id: dbf-xxe-default + patterns: + - pattern: $X = DocumentBuilderFactory.newInstance(); + - pattern-not-inside: | + ... + $X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + ... + message: > + DocumentBuilderFactory without XXE protection (CWE-611). + Call XxeSafeXmlParser.hardenedFactory() instead of DocumentBuilderFactory.newInstance(). + languages: [java] + severity: WARNING + + # SAXParserFactory without XXE hardening. + - id: sax-xxe-default + patterns: + - pattern: $X = SAXParserFactory.newInstance(); + - pattern-not-inside: | + ... + $X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + ... + message: > + SAXParserFactory without XXE protection (CWE-611). + Apply disallow-doctype-decl and disable external entity features before use. + languages: [java] + severity: WARNING + + # XMLInputFactory without XXE hardening (StAX parser). + - id: stax-xxe-default + patterns: + - pattern: $X = XMLInputFactory.newInstance(); + - pattern-not-inside: | + ... + $X.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + ... + message: > + XMLInputFactory without XXE protection (CWE-611). + Set IS_SUPPORTING_EXTERNAL_ENTITIES to false and SUPPORT_DTD to false before use. + languages: [java] + severity: WARNING -- 2.49.1 From 669eaa7c65baa98c573ca79ae05b491fac737eb3 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 17 May 2026 16:18:03 +0200 Subject: [PATCH 3/3] fix(ci): pin semgrep version, add pip cache, harden rule severity - Pin semgrep to 1.163.0 to prevent silent upgrades breaking the scan - Add cache: 'pip' to setup-python@v5 for faster CI runs - Promote all three XXE Semgrep rules from WARNING to ERROR to match the --error CI flag intent - Update SAX/StAX rule messages to reference XxeSafeXmlParser and the OWASP XXE prevention cheat sheet - Remove stale issue reference from regression test comment - Document XML metacharacter constraint on buildValidOds test helper Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/ci.yml | 3 ++- .semgrep/security.yml | 15 ++++++++++----- .../importing/MassImportServiceTest.java | 5 +++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 2877e9b3..a086f7c8 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -289,9 +289,10 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.11' + cache: 'pip' - name: Install Semgrep - run: pip install semgrep + run: pip install semgrep==1.163.0 - name: Run security rules run: semgrep --config .semgrep/security.yml --error --metrics=off backend/src/ diff --git a/.semgrep/security.yml b/.semgrep/security.yml index da787175..fb3e00cc 100644 --- a/.semgrep/security.yml +++ b/.semgrep/security.yml @@ -17,8 +17,9 @@ rules: message: > DocumentBuilderFactory without XXE protection (CWE-611). Call XxeSafeXmlParser.hardenedFactory() instead of DocumentBuilderFactory.newInstance(). + See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html languages: [java] - severity: WARNING + severity: ERROR # SAXParserFactory without XXE hardening. - id: sax-xxe-default @@ -30,9 +31,11 @@ rules: ... message: > SAXParserFactory without XXE protection (CWE-611). - Apply disallow-doctype-decl and disable external entity features before use. + Set disallow-doctype-decl=true, external-general-entities=false, external-parameter-entities=false, + and load-external-dtd=false before use. Follow the pattern in XxeSafeXmlParser.hardenedFactory(). + See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html languages: [java] - severity: WARNING + severity: ERROR # XMLInputFactory without XXE hardening (StAX parser). - id: stax-xxe-default @@ -44,6 +47,8 @@ rules: ... message: > XMLInputFactory without XXE protection (CWE-611). - Set IS_SUPPORTING_EXTERNAL_ENTITIES to false and SUPPORT_DTD to false before use. + Set IS_SUPPORTING_EXTERNAL_ENTITIES=false and SUPPORT_DTD=false before use. + Follow the pattern in XxeSafeXmlParser.hardenedFactory(). + See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html languages: [java] - severity: WARNING + severity: ERROR diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index dfd63607..dcd7c707 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -527,7 +527,7 @@ class MassImportServiceTest { // ─── readOds — XXE security regression ─────────────────────────────────── - // Security regression — do not remove. Introduced by issue #528. + // Security regression — do not remove. @Test void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception { File malicious = buildXxeOds(tempDir, "file:///etc/hostname"); @@ -595,7 +595,8 @@ class MassImportServiceTest { return writeOdsZip(dir.resolve("malicious.ods"), xml); } - /** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value. */ + /** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value. + * cellValue must not contain XML metacharacters ({@code < > &}). */ private File buildValidOds(Path dir, String cellValue) throws Exception { String xml = "" + "