security(import): harden DocumentBuilderFactory against XXE in MassImportService

Extract XxeSafeXmlParser with all 6 OWASP-recommended features
(disallow-doctype-decl, external-general-entities, external-parameter-entities,
load-external-dtd, XInclude, expandEntityReferences). Make readOds()
package-private; add failing-then-passing regression test and valid-ODS guard test.

POI 5.5.0 does not mitigate this: the vulnerable parser is a custom
DocumentBuilderFactory call in readOds(), not inside POI's internal ODS reader.
The hardening is defence-in-depth, not redundant with POI defaults.

Closes #528

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-05-17 14:48:03 +02:00
parent e398133907
commit 25a39fca9c
3 changed files with 89 additions and 2 deletions

View File

@@ -168,14 +168,14 @@ public class MassImportService {
* Reads an ODS file by parsing its content.xml directly (no extra library needed).
* ODS is a ZIP archive; content.xml holds the spreadsheet data as XML.
*/
private List<List<String>> readOds(File file) throws Exception {
List<List<String>> readOds(File file) throws Exception {
List<List<String>> result = new ArrayList<>();
try (ZipFile zip = new ZipFile(file)) {
var entry = zip.getEntry("content.xml");
if (entry == null) throw new RuntimeException("Ungültige ODS-Datei: content.xml fehlt");
var factory = DocumentBuilderFactory.newInstance();
var factory = XxeSafeXmlParser.hardenedFactory();
factory.setNamespaceAware(true);
var builder = factory.newDocumentBuilder();
var doc = builder.parse(zip.getInputStream(entry));

View File

@@ -0,0 +1,20 @@
package org.raddatz.familienarchiv.importing;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
class XxeSafeXmlParser {
private XxeSafeXmlParser() {}
static DocumentBuilderFactory hardenedFactory() throws ParserConfigurationException {
var factory = DocumentBuilderFactory.newInstance();
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
return factory;
}
}

View File

@@ -21,9 +21,12 @@ import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xml.sax.SAXParseException;
import java.io.File;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
@@ -32,6 +35,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -520,6 +525,25 @@ class MassImportServiceTest {
assertThat(result).isEqualTo("hello");
}
// ─── readOds — XXE security regression ───────────────────────────────────
// Security regression — do not remove. Introduced by issue #528.
@Test
void readOds_rejects_xxe_doctype_payload(@TempDir Path tempDir) throws Exception {
File malicious = buildXxeOds(tempDir, "file:///etc/hostname");
assertThatThrownBy(() -> service.readOds(malicious))
.isInstanceOf(SAXParseException.class)
.hasMessageContaining("DOCTYPE is disallowed");
}
@Test
void readOds_parses_valid_ods_correctly(@TempDir Path tempDir) throws Exception {
File valid = buildValidOds(tempDir, "Mustermann");
List<List<String>> rows = service.readOds(valid);
assertThat(rows).isNotEmpty();
assertThat(rows.get(0)).contains("Mustermann");
}
// ─── helpers ──────────────────────────────────────────────────────────────
/**
@@ -553,4 +577,47 @@ class MassImportServiceTest {
"" // 13: transcription
);
}
/** Creates a minimal ODS ZIP containing a content.xml with an XXE payload. */
private File buildXxeOds(Path dir, String entityTarget) throws Exception {
String xml = "<?xml version=\"1.0\"?>"
+ "<!DOCTYPE foo [<!ENTITY xxe SYSTEM \"" + entityTarget + "\">]>"
+ "<office:document-content"
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
+ "<office:body><office:spreadsheet>"
+ "<table:table><table:table-row><table:table-cell>"
+ "<text:p>&xxe;</text:p>"
+ "</table:table-cell></table:table-row></table:table>"
+ "</office:spreadsheet></office:body>"
+ "</office:document-content>";
return writeOdsZip(dir.resolve("malicious.ods"), xml);
}
/** Creates a minimal valid ODS ZIP containing a content.xml with the given cell value. */
private File buildValidOds(Path dir, String cellValue) throws Exception {
String xml = "<?xml version=\"1.0\"?>"
+ "<office:document-content"
+ " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\""
+ " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\""
+ " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\">"
+ "<office:body><office:spreadsheet>"
+ "<table:table><table:table-row><table:table-cell>"
+ "<text:p>" + cellValue + "</text:p>"
+ "</table:table-cell></table:table-row></table:table>"
+ "</office:spreadsheet></office:body>"
+ "</office:document-content>";
return writeOdsZip(dir.resolve("valid.ods"), xml);
}
private File writeOdsZip(Path destination, String contentXml) throws Exception {
try (OutputStream fos = Files.newOutputStream(destination);
ZipOutputStream zip = new ZipOutputStream(fos)) {
zip.putNextEntry(new ZipEntry("content.xml"));
zip.write(contentXml.getBytes(StandardCharsets.UTF_8));
zip.closeEntry();
}
return destination.toFile();
}
}