Fix epub import
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
FROM openjdk:17-jdk-slim
|
||||
FROM eclipse-temurin:17-jdk-jammy
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install Maven and PostgreSQL 15 client tools
|
||||
RUN apt-get update && apt-get install -y wget ca-certificates gnupg maven && \
|
||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
|
||||
echo "deb http://apt.postgresql.org/pub/repos/apt/ bullseye-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
|
||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg && \
|
||||
echo "deb http://apt.postgresql.org/pub/repos/apt/ jammy-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y postgresql-client-15 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -71,45 +71,53 @@ public class EPUBImportService {
|
||||
return EPUBImportResponse.error("Invalid EPUB file format");
|
||||
}
|
||||
|
||||
log.info("Parsing EPUB file: {}", epubFile.getOriginalFilename());
|
||||
Book book = parseEPUBFile(epubFile);
|
||||
|
||||
log.info("Creating story entity from EPUB metadata");
|
||||
Story story = createStoryFromEPUB(book, request);
|
||||
|
||||
log.info("Saving story to database: {}", story.getTitle());
|
||||
Story savedStory = storyService.create(story);
|
||||
log.info("Story saved successfully with ID: {}", savedStory.getId());
|
||||
|
||||
// Process embedded images if content contains any
|
||||
String originalContent = story.getContentHtml();
|
||||
if (originalContent != null && originalContent.contains("<img")) {
|
||||
try {
|
||||
log.info("Processing embedded images for story: {}", savedStory.getId());
|
||||
ImageService.ContentImageProcessingResult imageResult =
|
||||
imageService.processContentImages(originalContent, savedStory.getId());
|
||||
|
||||
// Update story content with processed images if changed
|
||||
if (!imageResult.getProcessedContent().equals(originalContent)) {
|
||||
log.info("Updating story content with processed images");
|
||||
savedStory.setContentHtml(imageResult.getProcessedContent());
|
||||
savedStory = storyService.update(savedStory.getId(), savedStory);
|
||||
|
||||
// Log the image processing results
|
||||
log.debug("EPUB Import - Image processing completed for story {}. Downloaded {} images.",
|
||||
log.info("EPUB Import - Image processing completed for story {}. Downloaded {} images.",
|
||||
savedStory.getId(), imageResult.getDownloadedImages().size());
|
||||
|
||||
if (imageResult.hasWarnings()) {
|
||||
log.debug("EPUB Import - Image processing warnings: {}",
|
||||
log.warn("EPUB Import - Image processing warnings: {}",
|
||||
String.join(", ", imageResult.getWarnings()));
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Log error but don't fail the import
|
||||
System.err.println("EPUB Import - Failed to process embedded images for story " +
|
||||
savedStory.getId() + ": " + e.getMessage());
|
||||
log.error("EPUB Import - Failed to process embedded images for story {}: {}",
|
||||
savedStory.getId(), e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
log.info("Building import response for story: {}", savedStory.getId());
|
||||
EPUBImportResponse response = EPUBImportResponse.success(savedStory.getId(), savedStory.getTitle());
|
||||
response.setWordCount(savedStory.getWordCount());
|
||||
response.setTotalChapters(book.getSpine().size());
|
||||
|
||||
if (request.getPreserveReadingPosition() != null && request.getPreserveReadingPosition()) {
|
||||
log.info("Extracting and saving reading position");
|
||||
ReadingPosition readingPosition = extractReadingPosition(book, savedStory);
|
||||
if (readingPosition != null) {
|
||||
ReadingPosition savedPosition = readingPositionRepository.save(readingPosition);
|
||||
@@ -117,9 +125,11 @@ public class EPUBImportService {
|
||||
}
|
||||
}
|
||||
|
||||
log.info("EPUB import completed successfully for: {}", savedStory.getTitle());
|
||||
return response;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("EPUB import failed with exception: {}", e.getMessage(), e);
|
||||
return EPUBImportResponse.error("Failed to import EPUB: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
@@ -148,9 +158,12 @@ public class EPUBImportService {
|
||||
private Story createStoryFromEPUB(Book book, EPUBImportRequest request) {
|
||||
Metadata metadata = book.getMetadata();
|
||||
|
||||
log.info("Extracting EPUB metadata");
|
||||
String title = extractTitle(metadata);
|
||||
String authorName = extractAuthorName(metadata, request);
|
||||
String description = extractDescription(metadata);
|
||||
|
||||
log.info("Extracting and sanitizing content from {} chapters", book.getSpine().size());
|
||||
String content = extractContent(book);
|
||||
|
||||
Story story = new Story();
|
||||
@@ -160,42 +173,69 @@ public class EPUBImportService {
|
||||
|
||||
// Extract and process cover image
|
||||
if (request.getExtractCover() == null || request.getExtractCover()) {
|
||||
log.info("Extracting cover image");
|
||||
String coverPath = extractAndSaveCoverImage(book);
|
||||
if (coverPath != null) {
|
||||
log.info("Cover image saved at: {}", coverPath);
|
||||
story.setCoverPath(coverPath);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle author assignment
|
||||
try {
|
||||
if (request.getAuthorId() != null) {
|
||||
log.info("Looking up author by ID: {}", request.getAuthorId());
|
||||
try {
|
||||
Author author = authorService.findById(request.getAuthorId());
|
||||
story.setAuthor(author);
|
||||
log.info("Author found and assigned: {}", author.getName());
|
||||
} catch (ResourceNotFoundException e) {
|
||||
log.warn("Author ID {} not found", request.getAuthorId());
|
||||
if (request.getCreateMissingAuthor()) {
|
||||
log.info("Creating new author: {}", authorName);
|
||||
Author newAuthor = createAuthor(authorName);
|
||||
story.setAuthor(newAuthor);
|
||||
log.info("New author created with ID: {}", newAuthor.getId());
|
||||
}
|
||||
}
|
||||
} else if (authorName != null && request.getCreateMissingAuthor()) {
|
||||
log.info("Finding or creating author: {}", authorName);
|
||||
Author author = findOrCreateAuthor(authorName);
|
||||
story.setAuthor(author);
|
||||
log.info("Author assigned: {} (ID: {})", author.getName(), author.getId());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Error handling author assignment: {}", e.getMessage(), e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Handle series assignment
|
||||
try {
|
||||
if (request.getSeriesId() != null && request.getSeriesVolume() != null) {
|
||||
log.info("Looking up series by ID: {}", request.getSeriesId());
|
||||
try {
|
||||
Series series = seriesService.findById(request.getSeriesId());
|
||||
story.setSeries(series);
|
||||
story.setVolume(request.getSeriesVolume());
|
||||
log.info("Series found and assigned: {} (volume {})", series.getName(), request.getSeriesVolume());
|
||||
} catch (ResourceNotFoundException e) {
|
||||
log.warn("Series ID {} not found", request.getSeriesId());
|
||||
if (request.getCreateMissingSeries() && request.getSeriesName() != null) {
|
||||
log.info("Creating new series: {}", request.getSeriesName());
|
||||
Series newSeries = createSeries(request.getSeriesName());
|
||||
story.setSeries(newSeries);
|
||||
story.setVolume(request.getSeriesVolume());
|
||||
log.info("New series created with ID: {}", newSeries.getId());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Error handling series assignment: {}", e.getMessage(), e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Handle tags from request or extract from EPUB metadata
|
||||
try {
|
||||
List<String> allTags = new ArrayList<>();
|
||||
if (request.getTags() != null && !request.getTags().isEmpty()) {
|
||||
allTags.addAll(request.getTags());
|
||||
@@ -207,17 +247,29 @@ public class EPUBImportService {
|
||||
allTags.addAll(epubTags);
|
||||
}
|
||||
|
||||
log.info("Processing {} tags for story", allTags.size());
|
||||
// Remove duplicates and create tags
|
||||
allTags.stream()
|
||||
.distinct()
|
||||
.forEach(tagName -> {
|
||||
try {
|
||||
log.debug("Finding or creating tag: {}", tagName);
|
||||
Tag tag = tagService.findOrCreate(tagName.trim());
|
||||
story.addTag(tag);
|
||||
} catch (Exception e) {
|
||||
log.error("Error creating tag '{}': {}", tagName, e.getMessage(), e);
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
} catch (Exception e) {
|
||||
log.error("Error handling tags: {}", e.getMessage(), e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Extract additional metadata for potential future use
|
||||
extractAdditionalMetadata(metadata, story);
|
||||
|
||||
log.info("Story entity created successfully: {}", title);
|
||||
return story;
|
||||
}
|
||||
|
||||
@@ -244,7 +296,13 @@ public class EPUBImportService {
|
||||
private String extractDescription(Metadata metadata) {
|
||||
List<String> descriptions = metadata.getDescriptions();
|
||||
if (descriptions != null && !descriptions.isEmpty()) {
|
||||
return descriptions.get(0);
|
||||
String description = descriptions.get(0);
|
||||
// Truncate to 1000 characters if necessary
|
||||
if (description != null && description.length() > 1000) {
|
||||
log.info("Description exceeds 1000 characters ({}), truncating...", description.length());
|
||||
return description.substring(0, 997) + "...";
|
||||
}
|
||||
return description;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -188,13 +188,13 @@ public class HtmlSanitizationService {
|
||||
return "";
|
||||
}
|
||||
|
||||
logger.info("Content before sanitization: "+html);
|
||||
logger.debug("Sanitizing HTML content (length: {} characters)", html.length());
|
||||
|
||||
// Preprocess to extract images from figure tags
|
||||
String preprocessed = preprocessFigureTags(html);
|
||||
|
||||
String saniztedHtml = Jsoup.clean(preprocessed, allowlist.preserveRelativeLinks(true));
|
||||
logger.info("Content after sanitization: "+saniztedHtml);
|
||||
logger.debug("Sanitization complete (output length: {} characters)", saniztedHtml.length());
|
||||
return saniztedHtml;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user