diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index bd4a6cac..a086f7c8 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -276,6 +276,27 @@ jobs: echo "$dump" | grep -qE "\['add', 'familienarchiv-auth', 'polling'\]" \ || { echo "FAIL: familienarchiv-auth jail did not resolve to 'polling' backend"; exit 1; } + # ─── Semgrep Security Scan ─────────────────────────────────────────────────── + # Catches XXE-unprotected XML parser factories and similar patterns defined in + # .semgrep/security.yml. Runs in parallel with backend-unit-tests for fast feedback. + # Uses local rules only (no SEMGREP_APP_TOKEN / OIDC — act_runner does not support it). + semgrep-scan: + name: Semgrep Security Scan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install Semgrep + run: pip install semgrep==1.163.0 + + - name: Run security rules + run: semgrep --config .semgrep/security.yml --error --metrics=off backend/src/ + # ─── Compose Bucket-Bootstrap Idempotency ───────────────────────────────────── # docker-compose.prod.yml's create-buckets service runs on every # `docker compose up` (one-shot, no restart). Must be idempotent — a diff --git a/.semgrep/security.yml b/.semgrep/security.yml new file mode 100644 index 00000000..fb3e00cc --- /dev/null +++ b/.semgrep/security.yml @@ -0,0 +1,54 @@ +# Semgrep security rules for Familienarchiv backend. +# These rules catch the absence of XXE protection on XML parser factories. +# CWE-611: Improper Restriction of XML External Entity Reference. +# Run: semgrep --config .semgrep/security.yml --error backend/src/ + +rules: + + # DocumentBuilderFactory without XXE hardening. + # All call sites must call setFeature("…disallow-doctype-decl", true) before use. + - id: dbf-xxe-default + patterns: + - pattern: $X = DocumentBuilderFactory.newInstance(); + - pattern-not-inside: | + ... + $X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + ... + message: > + DocumentBuilderFactory without XXE protection (CWE-611). + Call XxeSafeXmlParser.hardenedFactory() instead of DocumentBuilderFactory.newInstance(). + See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html + languages: [java] + severity: ERROR + + # SAXParserFactory without XXE hardening. + - id: sax-xxe-default + patterns: + - pattern: $X = SAXParserFactory.newInstance(); + - pattern-not-inside: | + ... + $X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + ... + message: > + SAXParserFactory without XXE protection (CWE-611). + Set disallow-doctype-decl=true, external-general-entities=false, external-parameter-entities=false, + and load-external-dtd=false before use. Follow the pattern in XxeSafeXmlParser.hardenedFactory(). + See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html + languages: [java] + severity: ERROR + + # XMLInputFactory without XXE hardening (StAX parser). + - id: stax-xxe-default + patterns: + - pattern: $X = XMLInputFactory.newInstance(); + - pattern-not-inside: | + ... + $X.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + ... + message: > + XMLInputFactory without XXE protection (CWE-611). + Set IS_SUPPORTING_EXTERNAL_ENTITIES=false and SUPPORT_DTD=false before use. + Follow the pattern in XxeSafeXmlParser.hardenedFactory(). + See: https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html + languages: [java] + severity: ERROR diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java index ea648870..d6453e19 100644 --- a/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/MassImportService.java @@ -168,14 +168,14 @@ public class MassImportService { * Reads an ODS file by parsing its content.xml directly (no extra library needed). * ODS is a ZIP archive; content.xml holds the spreadsheet data as XML. */ - private List> readOds(File file) throws Exception { + List> readOds(File file) throws Exception { List> result = new ArrayList<>(); try (ZipFile zip = new ZipFile(file)) { var entry = zip.getEntry("content.xml"); if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt"); - var factory = DocumentBuilderFactory.newInstance(); + var factory = XxeSafeXmlParser.hardenedFactory(); factory.setNamespaceAware(true); var builder = factory.newDocumentBuilder(); var doc = builder.parse(zip.getInputStream(entry)); diff --git a/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java b/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java new file mode 100644 index 00000000..949ea054 --- /dev/null +++ b/backend/src/main/java/org/raddatz/familienarchiv/importing/XxeSafeXmlParser.java @@ -0,0 +1,20 @@ +package org.raddatz.familienarchiv.importing; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +class XxeSafeXmlParser { + + private XxeSafeXmlParser() {} + + static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException { + var factory = DocumentBuilderFactory.newInstance(); + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); + return factory; + } +} diff --git a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java index 845aaf3b..dcd7c707 100644 --- a/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java +++ b/backend/src/test/java/org/raddatz/familienarchiv/importing/MassImportServiceTest.java @@ -21,9 +21,12 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.xml.sax.SAXParseException; import java.io.File; import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.time.LocalDate; @@ -32,6 +35,8 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -520,6 +525,25 @@ class MassImportServiceTest { assertThat(result).isEqualTo("hello"); } + // ─── readOds — XXE security regression ─────────────────────────────────── + + // Security regression — do not remove. + @Test + void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception { + File malicious = buildXxeOds(tempDir, "file:///etc/hostname"); + assertThatThrownBy(() -> service.readOds(malicious)) + .isInstanceOf(SAXParseException.class) + .hasMessageContaining("DOCTYPE is disallowed"); + } + + @Test + void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception { + File valid = buildValidOds(tempDir, "Mustermann"); + List> rows = service.readOds(valid); + assertThat(rows).isNotEmpty(); + assertThat(rows.get(0)).contains("Mustermann"); + } + // ─── helpers ────────────────────────────────────────────────────────────── /** @@ -553,4 +577,48 @@ class MassImportServiceTest { "" // 13: transcription ); } + + /** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */ + private File buildXxeOds(Path dir, String entityTarget) throws Exception { + String xml = "" + + "]>" + + "" + + "" + + "" + + "&xxe;" + + "" + + "" + + ""; + return writeOdsZip(dir.resolve("malicious.ods"), xml); + } + + /** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value. + * cellValue must not contain XML metacharacters ({@code < > &}). */ + private File buildValidOds(Path dir, String cellValue) throws Exception { + String xml = "" + + "" + + "" + + "" + + "" + cellValue + "" + + "" + + "" + + ""; + return writeOdsZip(dir.resolve("valid.ods"), xml); + } + + private File writeOdsZip(Path destination, String contentXml) throws Exception { + try (OutputStream fos = Files.newOutputStream(destination); + ZipOutputStream zip = new ZipOutputStream(fos)) { + zip.putNextEntry(new ZipEntry("content.xml")); + zip.write(contentXml.getBytes(StandardCharsets.UTF_8)); + zip.closeEntry(); + } + return destination.toFile(); + } }