Compare commits

..

2 Commits

Author SHA1 Message Date
Marcel
f15ea031d1 ci(security): add Semgrep XXE rule and CI scan job
All checks were successful
CI / Unit & Component Tests (pull_request) Successful in 3m2s
CI / OCR Service Tests (pull_request) Successful in 18s
CI / Backend Unit Tests (pull_request) Successful in 3m3s
CI / fail2ban Regex (pull_request) Successful in 40s
CI / Semgrep Security Scan (pull_request) Successful in 1m11s
CI / Compose Bucket Idempotency (pull_request) Successful in 1m1s
Add .semgrep/security.yml with rules for DocumentBuilderFactory,
SAXParserFactory, and XMLInputFactory without XXE hardening (CWE-611).
Add semgrep-scan CI job — runs in parallel with backend-unit-tests,
local rules only, --error flag fails the build on any match.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-17 14:48:46 +02:00
Marcel
25a39fca9c security(import): harden DocumentBuilderFactory against XXE in MassImportService
Extract XxeSafeXmlParser with all 6 OWASP-recommended features
(disallow-doctype-decl, external-general-entities, external-parameter-entities,
load-external-dtd, XInclude, expandEntityReferences). Make readOds()
package-private; add failing-then-passing regression test and valid-ODS guard test.

POI 5.5.0 does not mitigate this: the vulnerable parser is a custom
DocumentBuilderFactory call in readOds(), not inside POI's internal ODS reader.
The hardening is defence-in-depth, not redundant with POI defaults.

Closes #528

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-17 14:48:03 +02:00
5 changed files with 158 additions and 2 deletions

View File

@@ -276,6 +276,26 @@ jobs:
echo "$dump" | grep -qE "\['add', 'familienarchiv-auth', 'polling'\]" \
|| { echo "FAIL: familienarchiv-auth jail did not resolve to 'polling' backend"; exit 1; }
# ─── Semgrep Security Scan ───────────────────────────────────────────────────
# Catches XXE-unprotected XML parser factories and similar patterns defined in
# .semgrep/security.yml. Runs in parallel with backend-unit-tests for fast feedback.
# Uses local rules only (no SEMGREP_APP_TOKEN / OIDC — act_runner does not support it).
semgrep-scan:
name: Semgrep Security Scan
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install Semgrep
run: pip install semgrep
- name: Run security rules
run: semgrep --config .semgrep/security.yml --error --metrics=off backend/src/
# ─── Compose Bucket-Bootstrap Idempotency ─────────────────────────────────────
# docker-compose.prod.yml's create-buckets service runs on every
# `docker compose up` (one-shot, no restart). Must be idempotent — a

49
.semgrep/security.yml Normal file
View File

@@ -0,0 +1,49 @@
# Semgrep security rules for Familienarchiv backend.
# These rules catch the absence of XXE protection on XML parser factories.
# CWE-611: Improper Restriction of XML External Entity Reference.
# Run: semgrep --config .semgrep/security.yml --error backend/src/
rules:
# DocumentBuilderFactory without XXE hardening.
# All call sites must call setFeature("…disallow-doctype-decl", true) before use.
- id: dbf-xxe-default
patterns:
- pattern: $X = DocumentBuilderFactory.newInstance();
- pattern-not-inside: |
...
$X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
...
message: >
DocumentBuilderFactory without XXE protection (CWE-611).
Call XxeSafeXmlParser.hardenedFactory() instead of DocumentBuilderFactory.newInstance().
languages: [java]
severity: WARNING
# SAXParserFactory without XXE hardening.
- id: sax-xxe-default
patterns:
- pattern: $X = SAXParserFactory.newInstance();
- pattern-not-inside: |
...
$X.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
...
message: >
SAXParserFactory without XXE protection (CWE-611).
Apply disallow-doctype-decl and disable external entity features before use.
languages: [java]
severity: WARNING
# XMLInputFactory without XXE hardening (StAX parser).
- id: stax-xxe-default
patterns:
- pattern: $X = XMLInputFactory.newInstance();
- pattern-not-inside: |
...
$X.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
...
message: >
XMLInputFactory without XXE protection (CWE-611).
Set IS_SUPPORTING_EXTERNAL_ENTITIES to false and SUPPORT_DTD to false before use.
languages: [java]
severity: WARNING

View File

@@ -168,14 +168,14 @@ public class MassImportService {
* Reads an ODS file by parsing its content.xml directly (no extra library needed).
* ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
*/
private List<List<String>> readOds(File file) throws Exception {
List<List<String>> readOds(File file) throws Exception {
List<List<String>> result = new ArrayList<>();
try (ZipFile zip = new ZipFile(file)) {
var entry = zip.getEntry("content.xml");
if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
var factory = DocumentBuilderFactory.newInstance();
var factory = XxeSafeXmlParser.hardenedFactory();
factory.setNamespaceAware(true);
var builder = factory.newDocumentBuilder();
var doc = builder.parse(zip.getInputStream(entry));

View File

@@ -0,0 +1,20 @@
package org.raddatz.familienarchiv.importing;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
class XxeSafeXmlParser {
private XxeSafeXmlParser() {}
static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException {
var factory = DocumentBuilderFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
return factory;
}
}

View File

@@ -21,9 +21,12 @@ import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xml.sax.SAXParseException;
import java.io.File;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
@@ -32,6 +35,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -520,6 +525,25 @@ class MassImportServiceTest {
assertThat(result).isEqualTo("hello");
}
// ─── readOds — XXE security regression ───────────────────────────────────
// Security regression — do not remove. Introduced by issue #528.
@Test
void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception {
File malicious = buildXxeOds(tempDir, "file:///etc/hostname");
assertThatThrownBy(() -> service.readOds(malicious))
.isInstanceOf(SAXParseException.class)
.hasMessageContaining("DOCTYPE is disallowed");
}
@Test
void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception {
File valid = buildValidOds(tempDir, "Mustermann");
List<List<String>> rows = service.readOds(valid);
assertThat(rows).isNotEmpty();
assertThat(rows.get(0)).contains("Mustermann");
}
// ─── helpers ──────────────────────────────────────────────────────────────
/**
@@ -553,4 +577,47 @@ class MassImportServiceTest {
"" // 13: transcription
);
}
/** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */
private File buildXxeOds(Path dir, String entityTarget) throws Exception {
String xml = "<?xml version=\"1.0\"?>"
+ "<!DOCTYPE foo [<!ENTITY xxe SYSTEM \"" + entityTarget + "\">]>"
+ "<office:document-content"
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
+ "<office:body><office:spreadsheet>"
+ "<table:table><table:table-row><table:table-cell>"
+ "<text:p>&xxe;</text:p>"
+ "</table:table-cell></table:table-row></table:table>"
+ "</office:spreadsheet></office:body>"
+ "</office:document-content>";
return writeOdsZip(dir.resolve("malicious.ods"), xml);
}
/** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value. */
private File buildValidOds(Path dir, String cellValue) throws Exception {
String xml = "<?xml version=\"1.0\"?>"
+ "<office:document-content"
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
+ "<office:body><office:spreadsheet>"
+ "<table:table><table:table-row><table:table-cell>"
+ "<text:p>" + cellValue + "</text:p>"
+ "</table:table-cell></table:table-row></table:table>"
+ "</office:spreadsheet></office:body>"
+ "</office:document-content>";
return writeOdsZip(dir.resolve("valid.ods"), xml);
}
private File writeOdsZip(Path destination, String contentXml) throws Exception {
try (OutputStream fos = Files.newOutputStream(destination);
ZipOutputStream zip = new ZipOutputStream(fos)) {
zip.putNextEntry(new ZipEntry("content.xml"));
zip.write(contentXml.getBytes(StandardCharsets.UTF_8));
zip.closeEntry();
}
return destination.toFile();
}
}