feat(backend): add FileService.downloadFileStream for memory-efficient reads

Thumbnail generation will call this for PDFs up to 50 MB — loading the
full byte[] via downloadFileBytes would cause real memory pressure on
the single-VPS deploy. Stream-based reads let PDFBox parse the first
page without holding the whole file in heap.

Refs #307

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-22 21:41:15 +02:00
parent 2aa3b955f9
commit 07019f54e8
2 changed files with 56 additions and 0 deletions

View File

@@ -112,6 +112,27 @@ public class FileService {
}
}
/**
* Opens a streaming download from S3/MinIO. The caller is responsible for
* closing the returned stream — typically via try-with-resources. Preferred
* over {@link #downloadFileBytes(String)} for large files (multi-MB PDFs
* during thumbnail generation) because it avoids loading the entire file
* into heap memory.
*/
public InputStream downloadFileStream(String s3Key) throws IOException {
try {
GetObjectRequest getObjectRequest = GetObjectRequest.builder()
.bucket(bucketName)
.key(s3Key)
.build();
return s3Client.getObject(getObjectRequest);
} catch (NoSuchKeyException e) {
throw new StorageFileNotFoundException("File not found in storage: " + s3Key);
} catch (S3Exception e) {
throw new IOException("Failed to open stream from storage: " + e.getMessage(), e);
}
}
/**
* Generates a presigned URL for downloading an object from S3/MinIO.
* Valid for 1 hour — covers multi-page documents on CPU-only OCR hardware

View File

@@ -197,4 +197,39 @@ class FileServiceTest {
.isInstanceOf(IOException.class)
.hasMessageContaining("Failed to download");
}
// ─── downloadFileStream ────────────────────────────────────────────────────
@Test
void downloadFileStream_returnsStreamableContent() throws IOException {
byte[] content = "streamed bytes".getBytes();
GetObjectResponse response = GetObjectResponse.builder().contentType("application/pdf").build();
ResponseInputStream<GetObjectResponse> stream = new ResponseInputStream<>(
response, AbortableInputStream.create(new ByteArrayInputStream(content)));
when(s3Client.getObject(any(GetObjectRequest.class))).thenReturn(stream);
try (java.io.InputStream result = fileService.downloadFileStream("documents/file.pdf")) {
assertThat(result.readAllBytes()).isEqualTo(content);
}
}
@Test
void downloadFileStream_throwsStorageFileNotFoundException_whenNoSuchKey() {
NoSuchKeyException ex = NoSuchKeyException.builder().message("not found").statusCode(404).build();
when(s3Client.getObject(any(GetObjectRequest.class))).thenThrow(ex);
assertThatThrownBy(() -> fileService.downloadFileStream("missing/key.pdf"))
.isInstanceOf(FileService.StorageFileNotFoundException.class)
.hasMessageContaining("missing/key.pdf");
}
@Test
void downloadFileStream_throwsIOException_whenS3Exception() {
S3Exception ex = (S3Exception) S3Exception.builder().message("storage error").statusCode(503).build();
when(s3Client.getObject(any(GetObjectRequest.class))).thenThrow(ex);
assertThatThrownBy(() -> fileService.downloadFileStream("documents/file.pdf"))
.isInstanceOf(IOException.class)
.hasMessageContaining("Failed to open stream");
}
}