Fix embedded images in epub import

This commit is contained in:
Stefan Hardegger
2026-03-21 15:59:05 +01:00
parent 1dae7b1737
commit 6ec7b93589
2 changed files with 145 additions and 16 deletions

View File

@@ -26,7 +26,9 @@ import org.springframework.web.multipart.MultipartFile;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Optional; import java.util.Optional;
@Service @Service
@@ -41,6 +43,7 @@ public class EPUBImportService {
private final ReadingPositionRepository readingPositionRepository; private final ReadingPositionRepository readingPositionRepository;
private final HtmlSanitizationService sanitizationService; private final HtmlSanitizationService sanitizationService;
private final ImageService imageService; private final ImageService imageService;
private final LibraryService libraryService;
@Autowired @Autowired
public EPUBImportService(StoryService storyService, public EPUBImportService(StoryService storyService,
@@ -49,7 +52,8 @@ public class EPUBImportService {
TagService tagService, TagService tagService,
ReadingPositionRepository readingPositionRepository, ReadingPositionRepository readingPositionRepository,
HtmlSanitizationService sanitizationService, HtmlSanitizationService sanitizationService,
ImageService imageService) { ImageService imageService,
LibraryService libraryService) {
this.storyService = storyService; this.storyService = storyService;
this.authorService = authorService; this.authorService = authorService;
this.seriesService = seriesService; this.seriesService = seriesService;
@@ -57,6 +61,7 @@ public class EPUBImportService {
this.readingPositionRepository = readingPositionRepository; this.readingPositionRepository = readingPositionRepository;
this.sanitizationService = sanitizationService; this.sanitizationService = sanitizationService;
this.imageService = imageService; this.imageService = imageService;
this.libraryService = libraryService;
} }
public EPUBImportResponse importEPUB(EPUBImportRequest request) { public EPUBImportResponse importEPUB(EPUBImportRequest request) {
@@ -81,22 +86,37 @@ public class EPUBImportService {
Story savedStory = storyService.create(story); Story savedStory = storyService.create(story);
log.info("Story saved successfully with ID: {}", savedStory.getId()); log.info("Story saved successfully with ID: {}", savedStory.getId());
// Process embedded images if content contains any // Step 1: resolve images embedded in the EPUB archive (relative src paths)
String originalContent = story.getContentHtml(); String currentContent = story.getContentHtml();
if (originalContent != null && originalContent.contains("<img")) { if (currentContent != null && currentContent.contains("<img")) {
try { try {
log.info("Processing embedded images for story: {}", savedStory.getId()); log.info("Resolving EPUB-embedded images for story: {}", savedStory.getId());
ImageService.ContentImageProcessingResult imageResult = String resolvedContent = processEpubImages(currentContent, book, savedStory.getId());
imageService.processContentImages(originalContent, savedStory.getId()); if (!resolvedContent.equals(currentContent)) {
log.info("Updating story content with resolved EPUB images");
savedStory.setContentHtml(resolvedContent);
savedStory = storyService.update(savedStory.getId(), savedStory);
currentContent = resolvedContent;
}
} catch (Exception e) {
log.error("EPUB Import - Failed to resolve embedded images for story {}: {}",
savedStory.getId(), e.getMessage(), e);
}
}
// Update story content with processed images if changed // Step 2: download any remaining external (http/https) images
if (!imageResult.getProcessedContent().equals(originalContent)) { if (currentContent != null && currentContent.contains("<img")) {
log.info("Updating story content with processed images"); try {
log.info("Processing external images for story: {}", savedStory.getId());
ImageService.ContentImageProcessingResult imageResult =
imageService.processContentImages(currentContent, savedStory.getId());
if (!imageResult.getProcessedContent().equals(currentContent)) {
log.info("Updating story content with downloaded external images");
savedStory.setContentHtml(imageResult.getProcessedContent()); savedStory.setContentHtml(imageResult.getProcessedContent());
savedStory = storyService.update(savedStory.getId(), savedStory); savedStory = storyService.update(savedStory.getId(), savedStory);
// Log the image processing results log.info("EPUB Import - External image processing completed for story {}. Downloaded {} images.",
log.info("EPUB Import - Image processing completed for story {}. Downloaded {} images.",
savedStory.getId(), imageResult.getDownloadedImages().size()); savedStory.getId(), imageResult.getDownloadedImages().size());
if (imageResult.hasWarnings()) { if (imageResult.hasWarnings()) {
@@ -105,8 +125,7 @@ public class EPUBImportService {
} }
} }
} catch (Exception e) { } catch (Exception e) {
// Log error but don't fail the import log.error("EPUB Import - Failed to process external images for story {}: {}",
log.error("EPUB Import - Failed to process embedded images for story {}: {}",
savedStory.getId(), e.getMessage(), e); savedStory.getId(), e.getMessage(), e);
} }
} }
@@ -452,6 +471,113 @@ public class EPUBImportService {
} }
} }
/**
* Resolves EPUB-internal image references (relative paths) by extracting the image
* bytes from the EPUB resource map, saving them via ImageService, and replacing
* the src attribute with the resulting local API URL.
*/
private String processEpubImages(String htmlContent, Book book, java.util.UUID storyId) {
if (htmlContent == null || !htmlContent.contains("<img")) {
return htmlContent;
}
// Index all image resources by href and by bare filename for flexible lookup
Map<String, Resource> byHref = new HashMap<>();
Map<String, Resource> byFilename = new HashMap<>();
for (Resource resource : book.getResources().getAll()) {
if (resource.getMediaType() != null &&
resource.getMediaType().toString().startsWith("image/")) {
String href = resource.getHref();
if (href != null) {
byHref.put(href, resource);
String filename = href.contains("/") ? href.substring(href.lastIndexOf('/') + 1) : href;
byFilename.putIfAbsent(filename, resource);
}
}
}
if (byHref.isEmpty()) {
log.debug("No image resources found in EPUB for story: {}", storyId);
return htmlContent;
}
String currentLibraryId = libraryService.getCurrentLibraryId();
if (currentLibraryId == null || currentLibraryId.trim().isEmpty()) {
currentLibraryId = "default";
}
org.jsoup.nodes.Document doc = Jsoup.parse(htmlContent);
for (org.jsoup.nodes.Element img : doc.select("img[src]")) {
String src = img.attr("src");
// Skip already-resolved or external URLs
if (src.startsWith("http://") || src.startsWith("https://") ||
src.startsWith("data:") || src.startsWith("/api/")) {
continue;
}
Resource resource = resolveEpubResource(src, byHref, byFilename);
if (resource == null) {
log.warn("Could not find EPUB resource for image src: {}", src);
continue;
}
try {
byte[] imageData = resource.getData();
if (imageData == null || imageData.length == 0) {
log.warn("EPUB image resource has no data for src: {}", src);
continue;
}
String mediaType = resource.getMediaType() != null ?
resource.getMediaType().toString() : "image/jpeg";
String extension = getExtensionFromMediaType(mediaType);
String filename = "epub-img-" + System.currentTimeMillis() + "-" +
(int) (Math.random() * 100000) + "." + extension;
MultipartFile imageFile = new EPUBCoverMultipartFile(imageData, filename, mediaType);
String imagePath = imageService.uploadImage(imageFile, ImageService.ImageType.CONTENT);
String imageUrl = "/api/files/images/" + currentLibraryId + "/" + imagePath;
img.attr("src", imageUrl);
log.debug("Resolved EPUB image: {} -> {}", src, imageUrl);
} catch (Exception e) {
log.error("Failed to save EPUB image {}: {}", src, e.getMessage(), e);
}
}
return doc.body().html();
}
/**
* Tries to match a relative EPUB src path against the resource maps.
* Resolution order: exact href match → strip leading ../ segments → filename only.
*/
private Resource resolveEpubResource(String src, Map<String, Resource> byHref, Map<String, Resource> byFilename) {
if (byHref.containsKey(src)) {
return byHref.get(src);
}
// Strip leading ../ and ./ navigation to get a plain relative path
String normalized = src;
while (normalized.startsWith("../")) {
normalized = normalized.substring(3);
}
if (normalized.startsWith("./")) {
normalized = normalized.substring(2);
}
if (byHref.containsKey(normalized)) {
return byHref.get(normalized);
}
// Fall back to filename-only match
String filename = normalized.contains("/") ?
normalized.substring(normalized.lastIndexOf('/') + 1) : normalized;
return byFilename.get(filename);
}
private String extractAndSaveCoverImage(Book book) { private String extractAndSaveCoverImage(Book book) {
try { try {
Resource coverResource = book.getCoverImage(); Resource coverResource = book.getCoverImage();

View File

@@ -53,6 +53,9 @@ class EPUBImportServiceTest {
@Mock @Mock
private ImageService imageService; private ImageService imageService;
@Mock
private LibraryService libraryService;
@InjectMocks @InjectMocks
private EPUBImportService epubImportService; private EPUBImportService epubImportService;