Fix epub import
This commit is contained in:
@@ -1,11 +1,11 @@
|
|||||||
FROM openjdk:17-jdk-slim
|
FROM eclipse-temurin:17-jdk-jammy
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install Maven and PostgreSQL 15 client tools
|
# Install Maven and PostgreSQL 15 client tools
|
||||||
RUN apt-get update && apt-get install -y wget ca-certificates gnupg maven && \
|
RUN apt-get update && apt-get install -y wget ca-certificates gnupg maven && \
|
||||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
|
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg && \
|
||||||
echo "deb http://apt.postgresql.org/pub/repos/apt/ bullseye-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
|
echo "deb http://apt.postgresql.org/pub/repos/apt/ jammy-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y postgresql-client-15 && \
|
apt-get install -y postgresql-client-15 && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
@@ -62,64 +62,74 @@ public class EPUBImportService {
|
|||||||
public EPUBImportResponse importEPUB(EPUBImportRequest request) {
|
public EPUBImportResponse importEPUB(EPUBImportRequest request) {
|
||||||
try {
|
try {
|
||||||
MultipartFile epubFile = request.getEpubFile();
|
MultipartFile epubFile = request.getEpubFile();
|
||||||
|
|
||||||
if (epubFile == null || epubFile.isEmpty()) {
|
if (epubFile == null || epubFile.isEmpty()) {
|
||||||
return EPUBImportResponse.error("EPUB file is required");
|
return EPUBImportResponse.error("EPUB file is required");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isValidEPUBFile(epubFile)) {
|
if (!isValidEPUBFile(epubFile)) {
|
||||||
return EPUBImportResponse.error("Invalid EPUB file format");
|
return EPUBImportResponse.error("Invalid EPUB file format");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("Parsing EPUB file: {}", epubFile.getOriginalFilename());
|
||||||
Book book = parseEPUBFile(epubFile);
|
Book book = parseEPUBFile(epubFile);
|
||||||
|
|
||||||
|
log.info("Creating story entity from EPUB metadata");
|
||||||
Story story = createStoryFromEPUB(book, request);
|
Story story = createStoryFromEPUB(book, request);
|
||||||
|
|
||||||
|
log.info("Saving story to database: {}", story.getTitle());
|
||||||
Story savedStory = storyService.create(story);
|
Story savedStory = storyService.create(story);
|
||||||
|
log.info("Story saved successfully with ID: {}", savedStory.getId());
|
||||||
|
|
||||||
// Process embedded images if content contains any
|
// Process embedded images if content contains any
|
||||||
String originalContent = story.getContentHtml();
|
String originalContent = story.getContentHtml();
|
||||||
if (originalContent != null && originalContent.contains("<img")) {
|
if (originalContent != null && originalContent.contains("<img")) {
|
||||||
try {
|
try {
|
||||||
|
log.info("Processing embedded images for story: {}", savedStory.getId());
|
||||||
ImageService.ContentImageProcessingResult imageResult =
|
ImageService.ContentImageProcessingResult imageResult =
|
||||||
imageService.processContentImages(originalContent, savedStory.getId());
|
imageService.processContentImages(originalContent, savedStory.getId());
|
||||||
|
|
||||||
// Update story content with processed images if changed
|
// Update story content with processed images if changed
|
||||||
if (!imageResult.getProcessedContent().equals(originalContent)) {
|
if (!imageResult.getProcessedContent().equals(originalContent)) {
|
||||||
|
log.info("Updating story content with processed images");
|
||||||
savedStory.setContentHtml(imageResult.getProcessedContent());
|
savedStory.setContentHtml(imageResult.getProcessedContent());
|
||||||
savedStory = storyService.update(savedStory.getId(), savedStory);
|
savedStory = storyService.update(savedStory.getId(), savedStory);
|
||||||
|
|
||||||
// Log the image processing results
|
// Log the image processing results
|
||||||
log.debug("EPUB Import - Image processing completed for story {}. Downloaded {} images.",
|
log.info("EPUB Import - Image processing completed for story {}. Downloaded {} images.",
|
||||||
savedStory.getId(), imageResult.getDownloadedImages().size());
|
savedStory.getId(), imageResult.getDownloadedImages().size());
|
||||||
|
|
||||||
if (imageResult.hasWarnings()) {
|
if (imageResult.hasWarnings()) {
|
||||||
log.debug("EPUB Import - Image processing warnings: {}",
|
log.warn("EPUB Import - Image processing warnings: {}",
|
||||||
String.join(", ", imageResult.getWarnings()));
|
String.join(", ", imageResult.getWarnings()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Log error but don't fail the import
|
// Log error but don't fail the import
|
||||||
System.err.println("EPUB Import - Failed to process embedded images for story " +
|
log.error("EPUB Import - Failed to process embedded images for story {}: {}",
|
||||||
savedStory.getId() + ": " + e.getMessage());
|
savedStory.getId(), e.getMessage(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("Building import response for story: {}", savedStory.getId());
|
||||||
EPUBImportResponse response = EPUBImportResponse.success(savedStory.getId(), savedStory.getTitle());
|
EPUBImportResponse response = EPUBImportResponse.success(savedStory.getId(), savedStory.getTitle());
|
||||||
response.setWordCount(savedStory.getWordCount());
|
response.setWordCount(savedStory.getWordCount());
|
||||||
response.setTotalChapters(book.getSpine().size());
|
response.setTotalChapters(book.getSpine().size());
|
||||||
|
|
||||||
if (request.getPreserveReadingPosition() != null && request.getPreserveReadingPosition()) {
|
if (request.getPreserveReadingPosition() != null && request.getPreserveReadingPosition()) {
|
||||||
|
log.info("Extracting and saving reading position");
|
||||||
ReadingPosition readingPosition = extractReadingPosition(book, savedStory);
|
ReadingPosition readingPosition = extractReadingPosition(book, savedStory);
|
||||||
if (readingPosition != null) {
|
if (readingPosition != null) {
|
||||||
ReadingPosition savedPosition = readingPositionRepository.save(readingPosition);
|
ReadingPosition savedPosition = readingPositionRepository.save(readingPosition);
|
||||||
response.setReadingPosition(convertToDto(savedPosition));
|
response.setReadingPosition(convertToDto(savedPosition));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("EPUB import completed successfully for: {}", savedStory.getTitle());
|
||||||
return response;
|
return response;
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
log.error("EPUB import failed with exception: {}", e.getMessage(), e);
|
||||||
return EPUBImportResponse.error("Failed to import EPUB: " + e.getMessage());
|
return EPUBImportResponse.error("Failed to import EPUB: " + e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -147,77 +157,119 @@ public class EPUBImportService {
|
|||||||
|
|
||||||
private Story createStoryFromEPUB(Book book, EPUBImportRequest request) {
|
private Story createStoryFromEPUB(Book book, EPUBImportRequest request) {
|
||||||
Metadata metadata = book.getMetadata();
|
Metadata metadata = book.getMetadata();
|
||||||
|
|
||||||
|
log.info("Extracting EPUB metadata");
|
||||||
String title = extractTitle(metadata);
|
String title = extractTitle(metadata);
|
||||||
String authorName = extractAuthorName(metadata, request);
|
String authorName = extractAuthorName(metadata, request);
|
||||||
String description = extractDescription(metadata);
|
String description = extractDescription(metadata);
|
||||||
|
|
||||||
|
log.info("Extracting and sanitizing content from {} chapters", book.getSpine().size());
|
||||||
String content = extractContent(book);
|
String content = extractContent(book);
|
||||||
|
|
||||||
Story story = new Story();
|
Story story = new Story();
|
||||||
story.setTitle(title);
|
story.setTitle(title);
|
||||||
story.setDescription(description);
|
story.setDescription(description);
|
||||||
story.setContentHtml(sanitizationService.sanitize(content));
|
story.setContentHtml(sanitizationService.sanitize(content));
|
||||||
|
|
||||||
// Extract and process cover image
|
// Extract and process cover image
|
||||||
if (request.getExtractCover() == null || request.getExtractCover()) {
|
if (request.getExtractCover() == null || request.getExtractCover()) {
|
||||||
|
log.info("Extracting cover image");
|
||||||
String coverPath = extractAndSaveCoverImage(book);
|
String coverPath = extractAndSaveCoverImage(book);
|
||||||
if (coverPath != null) {
|
if (coverPath != null) {
|
||||||
|
log.info("Cover image saved at: {}", coverPath);
|
||||||
story.setCoverPath(coverPath);
|
story.setCoverPath(coverPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (request.getAuthorId() != null) {
|
// Handle author assignment
|
||||||
try {
|
try {
|
||||||
Author author = authorService.findById(request.getAuthorId());
|
if (request.getAuthorId() != null) {
|
||||||
|
log.info("Looking up author by ID: {}", request.getAuthorId());
|
||||||
|
try {
|
||||||
|
Author author = authorService.findById(request.getAuthorId());
|
||||||
|
story.setAuthor(author);
|
||||||
|
log.info("Author found and assigned: {}", author.getName());
|
||||||
|
} catch (ResourceNotFoundException e) {
|
||||||
|
log.warn("Author ID {} not found", request.getAuthorId());
|
||||||
|
if (request.getCreateMissingAuthor()) {
|
||||||
|
log.info("Creating new author: {}", authorName);
|
||||||
|
Author newAuthor = createAuthor(authorName);
|
||||||
|
story.setAuthor(newAuthor);
|
||||||
|
log.info("New author created with ID: {}", newAuthor.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (authorName != null && request.getCreateMissingAuthor()) {
|
||||||
|
log.info("Finding or creating author: {}", authorName);
|
||||||
|
Author author = findOrCreateAuthor(authorName);
|
||||||
story.setAuthor(author);
|
story.setAuthor(author);
|
||||||
} catch (ResourceNotFoundException e) {
|
log.info("Author assigned: {} (ID: {})", author.getName(), author.getId());
|
||||||
if (request.getCreateMissingAuthor()) {
|
|
||||||
Author newAuthor = createAuthor(authorName);
|
|
||||||
story.setAuthor(newAuthor);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else if (authorName != null && request.getCreateMissingAuthor()) {
|
} catch (Exception e) {
|
||||||
Author author = findOrCreateAuthor(authorName);
|
log.error("Error handling author assignment: {}", e.getMessage(), e);
|
||||||
story.setAuthor(author);
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (request.getSeriesId() != null && request.getSeriesVolume() != null) {
|
// Handle series assignment
|
||||||
try {
|
try {
|
||||||
Series series = seriesService.findById(request.getSeriesId());
|
if (request.getSeriesId() != null && request.getSeriesVolume() != null) {
|
||||||
story.setSeries(series);
|
log.info("Looking up series by ID: {}", request.getSeriesId());
|
||||||
story.setVolume(request.getSeriesVolume());
|
try {
|
||||||
} catch (ResourceNotFoundException e) {
|
Series series = seriesService.findById(request.getSeriesId());
|
||||||
if (request.getCreateMissingSeries() && request.getSeriesName() != null) {
|
story.setSeries(series);
|
||||||
Series newSeries = createSeries(request.getSeriesName());
|
|
||||||
story.setSeries(newSeries);
|
|
||||||
story.setVolume(request.getSeriesVolume());
|
story.setVolume(request.getSeriesVolume());
|
||||||
|
log.info("Series found and assigned: {} (volume {})", series.getName(), request.getSeriesVolume());
|
||||||
|
} catch (ResourceNotFoundException e) {
|
||||||
|
log.warn("Series ID {} not found", request.getSeriesId());
|
||||||
|
if (request.getCreateMissingSeries() && request.getSeriesName() != null) {
|
||||||
|
log.info("Creating new series: {}", request.getSeriesName());
|
||||||
|
Series newSeries = createSeries(request.getSeriesName());
|
||||||
|
story.setSeries(newSeries);
|
||||||
|
story.setVolume(request.getSeriesVolume());
|
||||||
|
log.info("New series created with ID: {}", newSeries.getId());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error handling series assignment: {}", e.getMessage(), e);
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle tags from request or extract from EPUB metadata
|
// Handle tags from request or extract from EPUB metadata
|
||||||
List<String> allTags = new ArrayList<>();
|
try {
|
||||||
if (request.getTags() != null && !request.getTags().isEmpty()) {
|
List<String> allTags = new ArrayList<>();
|
||||||
allTags.addAll(request.getTags());
|
if (request.getTags() != null && !request.getTags().isEmpty()) {
|
||||||
|
allTags.addAll(request.getTags());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract subjects/keywords from EPUB metadata
|
||||||
|
List<String> epubTags = extractTags(metadata);
|
||||||
|
if (epubTags != null && !epubTags.isEmpty()) {
|
||||||
|
allTags.addAll(epubTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Processing {} tags for story", allTags.size());
|
||||||
|
// Remove duplicates and create tags
|
||||||
|
allTags.stream()
|
||||||
|
.distinct()
|
||||||
|
.forEach(tagName -> {
|
||||||
|
try {
|
||||||
|
log.debug("Finding or creating tag: {}", tagName);
|
||||||
|
Tag tag = tagService.findOrCreate(tagName.trim());
|
||||||
|
story.addTag(tag);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error creating tag '{}': {}", tagName, e.getMessage(), e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error handling tags: {}", e.getMessage(), e);
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract subjects/keywords from EPUB metadata
|
|
||||||
List<String> epubTags = extractTags(metadata);
|
|
||||||
if (epubTags != null && !epubTags.isEmpty()) {
|
|
||||||
allTags.addAll(epubTags);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove duplicates and create tags
|
|
||||||
allTags.stream()
|
|
||||||
.distinct()
|
|
||||||
.forEach(tagName -> {
|
|
||||||
Tag tag = tagService.findOrCreate(tagName.trim());
|
|
||||||
story.addTag(tag);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Extract additional metadata for potential future use
|
// Extract additional metadata for potential future use
|
||||||
extractAdditionalMetadata(metadata, story);
|
extractAdditionalMetadata(metadata, story);
|
||||||
|
|
||||||
|
log.info("Story entity created successfully: {}", title);
|
||||||
return story;
|
return story;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -244,7 +296,13 @@ public class EPUBImportService {
|
|||||||
private String extractDescription(Metadata metadata) {
|
private String extractDescription(Metadata metadata) {
|
||||||
List<String> descriptions = metadata.getDescriptions();
|
List<String> descriptions = metadata.getDescriptions();
|
||||||
if (descriptions != null && !descriptions.isEmpty()) {
|
if (descriptions != null && !descriptions.isEmpty()) {
|
||||||
return descriptions.get(0);
|
String description = descriptions.get(0);
|
||||||
|
// Truncate to 1000 characters if necessary
|
||||||
|
if (description != null && description.length() > 1000) {
|
||||||
|
log.info("Description exceeds 1000 characters ({}), truncating...", description.length());
|
||||||
|
return description.substring(0, 997) + "...";
|
||||||
|
}
|
||||||
|
return description;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -188,13 +188,13 @@ public class HtmlSanitizationService {
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Content before sanitization: "+html);
|
logger.debug("Sanitizing HTML content (length: {} characters)", html.length());
|
||||||
|
|
||||||
// Preprocess to extract images from figure tags
|
// Preprocess to extract images from figure tags
|
||||||
String preprocessed = preprocessFigureTags(html);
|
String preprocessed = preprocessFigureTags(html);
|
||||||
|
|
||||||
String saniztedHtml = Jsoup.clean(preprocessed, allowlist.preserveRelativeLinks(true));
|
String saniztedHtml = Jsoup.clean(preprocessed, allowlist.preserveRelativeLinks(true));
|
||||||
logger.info("Content after sanitization: "+saniztedHtml);
|
logger.debug("Sanitization complete (output length: {} characters)", saniztedHtml.length());
|
||||||
return saniztedHtml;
|
return saniztedHtml;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user