phase 1 and 2 of embedded images
This commit is contained in:
@@ -228,6 +228,38 @@ public class StoryController {
|
||||
Story story = storyService.updateReadingStatus(id, request.getIsRead());
|
||||
return ResponseEntity.ok(convertToDto(story));
|
||||
}
|
||||
|
||||
@PostMapping("/{id}/process-content-images")
|
||||
public ResponseEntity<Map<String, Object>> processContentImages(@PathVariable UUID id, @RequestBody ProcessContentImagesRequest request) {
|
||||
logger.info("Processing content images for story {}", id);
|
||||
|
||||
try {
|
||||
// Process the HTML content to download and replace image URLs
|
||||
ImageService.ContentImageProcessingResult result = imageService.processContentImages(request.getHtmlContent(), id);
|
||||
|
||||
// If there are warnings, let the client decide whether to proceed
|
||||
if (result.hasWarnings()) {
|
||||
return ResponseEntity.ok(Map.of(
|
||||
"processedContent", result.getProcessedContent(),
|
||||
"warnings", result.getWarnings(),
|
||||
"downloadedImages", result.getDownloadedImages(),
|
||||
"hasWarnings", true
|
||||
));
|
||||
}
|
||||
|
||||
// Success - no warnings
|
||||
return ResponseEntity.ok(Map.of(
|
||||
"processedContent", result.getProcessedContent(),
|
||||
"downloadedImages", result.getDownloadedImages(),
|
||||
"hasWarnings", false
|
||||
));
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to process content images for story {}", id, e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
.body(Map.of("error", "Failed to process content images: " + e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
@PostMapping("/reindex")
|
||||
public ResponseEntity<String> manualReindex() {
|
||||
@@ -458,7 +490,14 @@ public class StoryController {
|
||||
story.setDescription(updateReq.getDescription());
|
||||
}
|
||||
if (updateReq.getContentHtml() != null) {
|
||||
story.setContentHtml(sanitizationService.sanitize(updateReq.getContentHtml()));
|
||||
logger.info("Content before sanitization (length: {}): {}",
|
||||
updateReq.getContentHtml().length(),
|
||||
updateReq.getContentHtml().substring(0, Math.min(500, updateReq.getContentHtml().length())));
|
||||
String sanitizedContent = sanitizationService.sanitize(updateReq.getContentHtml());
|
||||
logger.info("Content after sanitization (length: {}): {}",
|
||||
sanitizedContent.length(),
|
||||
sanitizedContent.substring(0, Math.min(500, sanitizedContent.length())));
|
||||
story.setContentHtml(sanitizedContent);
|
||||
}
|
||||
if (updateReq.getSourceUrl() != null) {
|
||||
story.setSourceUrl(updateReq.getSourceUrl());
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
package com.storycove.dto;
|
||||
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
|
||||
public class ProcessContentImagesRequest {
|
||||
|
||||
@NotBlank(message = "HTML content is required")
|
||||
private String htmlContent;
|
||||
|
||||
public ProcessContentImagesRequest() {}
|
||||
|
||||
public ProcessContentImagesRequest(String htmlContent) {
|
||||
this.htmlContent = htmlContent;
|
||||
}
|
||||
|
||||
public String getHtmlContent() {
|
||||
return htmlContent;
|
||||
}
|
||||
|
||||
public void setHtmlContent(String htmlContent) {
|
||||
this.htmlContent = htmlContent;
|
||||
}
|
||||
}
|
||||
@@ -54,7 +54,7 @@ public class HtmlSanitizationService {
|
||||
"p", "br", "div", "span", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"b", "strong", "i", "em", "u", "s", "strike", "del", "ins",
|
||||
"sup", "sub", "small", "big", "mark", "pre", "code",
|
||||
"ul", "ol", "li", "dl", "dt", "dd", "a",
|
||||
"ul", "ol", "li", "dl", "dt", "dd", "a", "img",
|
||||
"table", "thead", "tbody", "tfoot", "tr", "th", "td", "caption",
|
||||
"blockquote", "cite", "q", "hr"
|
||||
));
|
||||
@@ -65,13 +65,13 @@ public class HtmlSanitizationService {
|
||||
}
|
||||
|
||||
private void createSafelist() {
|
||||
this.allowlist = new Safelist();
|
||||
|
||||
this.allowlist = Safelist.relaxed();
|
||||
|
||||
// Add allowed tags
|
||||
if (config.getAllowedTags() != null) {
|
||||
config.getAllowedTags().forEach(allowlist::addTags);
|
||||
}
|
||||
|
||||
|
||||
// Add allowed attributes
|
||||
if (config.getAllowedAttributes() != null) {
|
||||
for (Map.Entry<String, List<String>> entry : config.getAllowedAttributes().entrySet()) {
|
||||
@@ -82,25 +82,33 @@ public class HtmlSanitizationService {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Configure allowed protocols for specific attributes (e.g., href)
|
||||
|
||||
// Special handling for img tags - allow all src attributes and validate later
|
||||
allowlist.removeProtocols("img", "src", "http", "https");
|
||||
// This is the key: preserve relative URLs by not restricting them
|
||||
allowlist.preserveRelativeLinks(true);
|
||||
|
||||
// Configure allowed protocols for other attributes
|
||||
if (config.getAllowedProtocols() != null) {
|
||||
for (Map.Entry<String, Map<String, List<String>>> tagEntry : config.getAllowedProtocols().entrySet()) {
|
||||
String tag = tagEntry.getKey();
|
||||
Map<String, List<String>> attributeProtocols = tagEntry.getValue();
|
||||
|
||||
|
||||
if (attributeProtocols != null) {
|
||||
for (Map.Entry<String, List<String>> attrEntry : attributeProtocols.entrySet()) {
|
||||
String attribute = attrEntry.getKey();
|
||||
List<String> protocols = attrEntry.getValue();
|
||||
|
||||
if (protocols != null) {
|
||||
|
||||
if (protocols != null && !("img".equals(tag) && "src".equals(attribute))) {
|
||||
// Skip img src since we handled it above
|
||||
allowlist.addProtocols(tag, attribute, protocols.toArray(new String[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Configured Jsoup Safelist with preserveRelativeLinks=true for local image URLs");
|
||||
|
||||
// Remove specific attributes if needed (deprecated in favor of protocol control)
|
||||
if (config.getRemovedAttributes() != null) {
|
||||
@@ -133,8 +141,10 @@ public class HtmlSanitizationService {
|
||||
if (html == null || html.trim().isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return Jsoup.clean(html, allowlist);
|
||||
logger.info("Content before sanitization: "+html);
|
||||
String saniztedHtml = Jsoup.clean(html, allowlist.preserveRelativeLinks(true));
|
||||
logger.info("Content after sanitization: "+saniztedHtml);
|
||||
return saniztedHtml;
|
||||
}
|
||||
|
||||
public String extractPlainText(String html) {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.storycove.service;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
@@ -8,18 +10,22 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.*;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.*;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Service
|
||||
public class ImageService {
|
||||
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ImageService.class);
|
||||
|
||||
private static final Set<String> ALLOWED_CONTENT_TYPES = Set.of(
|
||||
"image/jpeg", "image/jpg", "image/png"
|
||||
);
|
||||
@@ -53,14 +59,15 @@ public class ImageService {
|
||||
|
||||
public enum ImageType {
|
||||
COVER("covers"),
|
||||
AVATAR("avatars");
|
||||
|
||||
AVATAR("avatars"),
|
||||
CONTENT("content");
|
||||
|
||||
private final String directory;
|
||||
|
||||
|
||||
ImageType(String directory) {
|
||||
this.directory = directory;
|
||||
}
|
||||
|
||||
|
||||
public String getDirectory() {
|
||||
return directory;
|
||||
}
|
||||
@@ -182,6 +189,9 @@ public class ImageService {
|
||||
maxWidth = avatarMaxSize;
|
||||
maxHeight = avatarMaxSize;
|
||||
break;
|
||||
case CONTENT:
|
||||
// Content images are not resized
|
||||
return new Dimension(originalWidth, originalHeight);
|
||||
default:
|
||||
return new Dimension(originalWidth, originalHeight);
|
||||
}
|
||||
@@ -228,4 +238,224 @@ public class ImageService {
|
||||
String extension = getFileExtension(filename);
|
||||
return ALLOWED_EXTENSIONS.contains(extension);
|
||||
}
|
||||
|
||||
// Content image processing methods
|
||||
|
||||
/**
|
||||
* Process HTML content and download all referenced images, replacing URLs with local paths
|
||||
*/
|
||||
public ContentImageProcessingResult processContentImages(String htmlContent, UUID storyId) {
|
||||
logger.info("Processing content images for story: {}, content length: {}", storyId,
|
||||
htmlContent != null ? htmlContent.length() : 0);
|
||||
|
||||
List<String> warnings = new ArrayList<>();
|
||||
List<String> downloadedImages = new ArrayList<>();
|
||||
|
||||
if (htmlContent == null || htmlContent.trim().isEmpty()) {
|
||||
logger.info("No content to process for story: {}", storyId);
|
||||
return new ContentImageProcessingResult(htmlContent, warnings, downloadedImages);
|
||||
}
|
||||
|
||||
// Find all img tags with src attributes
|
||||
Pattern imgPattern = Pattern.compile("<img[^>]+src=[\"']([^\"']+)[\"'][^>]*>", Pattern.CASE_INSENSITIVE);
|
||||
Matcher matcher = imgPattern.matcher(htmlContent);
|
||||
|
||||
int imageCount = 0;
|
||||
int externalImageCount = 0;
|
||||
|
||||
StringBuffer processedContent = new StringBuffer();
|
||||
|
||||
while (matcher.find()) {
|
||||
String fullImgTag = matcher.group(0);
|
||||
String imageUrl = matcher.group(1);
|
||||
imageCount++;
|
||||
|
||||
logger.info("Found image #{}: {} in tag: {}", imageCount, imageUrl, fullImgTag);
|
||||
|
||||
try {
|
||||
// Skip if it's already a local path or data URL
|
||||
if (imageUrl.startsWith("/") || imageUrl.startsWith("data:")) {
|
||||
logger.info("Skipping local/data URL: {}", imageUrl);
|
||||
matcher.appendReplacement(processedContent, Matcher.quoteReplacement(fullImgTag));
|
||||
continue;
|
||||
}
|
||||
|
||||
externalImageCount++;
|
||||
logger.info("Processing external image #{}: {}", externalImageCount, imageUrl);
|
||||
|
||||
// Download and store the image
|
||||
String localPath = downloadImageFromUrl(imageUrl, storyId);
|
||||
downloadedImages.add(localPath);
|
||||
|
||||
// Generate local URL
|
||||
String localUrl = getLocalImageUrl(storyId, localPath);
|
||||
logger.info("Downloaded image: {} -> {}", imageUrl, localUrl);
|
||||
|
||||
// Replace the src attribute with the local path - handle both single and double quotes
|
||||
String newImgTag = fullImgTag
|
||||
.replaceFirst("src=\"" + Pattern.quote(imageUrl) + "\"", "src=\"" + localUrl + "\"")
|
||||
.replaceFirst("src='" + Pattern.quote(imageUrl) + "'", "src=\"" + localUrl + "\"");
|
||||
|
||||
// If replacement didn't work, try a more generic approach
|
||||
if (newImgTag.equals(fullImgTag)) {
|
||||
logger.warn("Standard replacement failed for image URL: {}, trying generic replacement", imageUrl);
|
||||
newImgTag = fullImgTag.replaceAll("src\\s*=\\s*[\"']?" + Pattern.quote(imageUrl) + "[\"']?", "src=\"" + localUrl + "\"");
|
||||
}
|
||||
|
||||
logger.info("Replaced img tag: {} -> {}", fullImgTag, newImgTag);
|
||||
matcher.appendReplacement(processedContent, Matcher.quoteReplacement(newImgTag));
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to download image: {} - {}", imageUrl, e.getMessage(), e);
|
||||
warnings.add("Failed to download image: " + imageUrl + " - " + e.getMessage());
|
||||
// Keep original URL in case of failure
|
||||
matcher.appendReplacement(processedContent, Matcher.quoteReplacement(fullImgTag));
|
||||
}
|
||||
}
|
||||
|
||||
matcher.appendTail(processedContent);
|
||||
|
||||
logger.info("Finished processing images for story: {}. Found {} total images, {} external. Downloaded {} images, {} warnings",
|
||||
storyId, imageCount, externalImageCount, downloadedImages.size(), warnings.size());
|
||||
|
||||
return new ContentImageProcessingResult(processedContent.toString(), warnings, downloadedImages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Download an image from a URL and store it locally
|
||||
*/
|
||||
private String downloadImageFromUrl(String imageUrl, UUID storyId) throws IOException {
|
||||
URL url = new URL(imageUrl);
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
|
||||
|
||||
// Set a reasonable user agent to avoid blocks
|
||||
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (StoryCove Image Processor)");
|
||||
connection.setConnectTimeout(30000); // 30 seconds
|
||||
connection.setReadTimeout(30000);
|
||||
|
||||
try (InputStream inputStream = connection.getInputStream()) {
|
||||
// Get content type to determine file extension
|
||||
String contentType = connection.getContentType();
|
||||
String extension = getExtensionFromContentType(contentType);
|
||||
|
||||
if (extension == null) {
|
||||
// Try to extract from URL
|
||||
extension = getExtensionFromUrl(imageUrl);
|
||||
}
|
||||
|
||||
if (extension == null || !ALLOWED_EXTENSIONS.contains(extension.toLowerCase())) {
|
||||
throw new IllegalArgumentException("Unsupported image format: " + contentType);
|
||||
}
|
||||
|
||||
// Create directories for content images
|
||||
Path contentDir = Paths.get(getUploadDir(), ImageType.CONTENT.getDirectory(), storyId.toString());
|
||||
Files.createDirectories(contentDir);
|
||||
|
||||
// Generate unique filename
|
||||
String filename = UUID.randomUUID().toString() + "." + extension.toLowerCase();
|
||||
Path filePath = contentDir.resolve(filename);
|
||||
|
||||
// Read and validate the image
|
||||
byte[] imageData = inputStream.readAllBytes();
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(imageData);
|
||||
BufferedImage image = ImageIO.read(bais);
|
||||
|
||||
if (image == null) {
|
||||
throw new IOException("Invalid image format");
|
||||
}
|
||||
|
||||
// Save the image
|
||||
Files.write(filePath, imageData);
|
||||
|
||||
// Return relative path
|
||||
return ImageType.CONTENT.getDirectory() + "/" + storyId.toString() + "/" + filename;
|
||||
|
||||
} finally {
|
||||
connection.disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate local image URL for serving
|
||||
*/
|
||||
private String getLocalImageUrl(UUID storyId, String imagePath) {
|
||||
String currentLibraryId = libraryService.getCurrentLibraryId();
|
||||
if (currentLibraryId == null || currentLibraryId.trim().isEmpty()) {
|
||||
logger.warn("Current library ID is null or empty when generating local image URL for story: {}", storyId);
|
||||
return "/api/files/images/default/" + imagePath;
|
||||
}
|
||||
String localUrl = "/api/files/images/" + currentLibraryId + "/" + imagePath;
|
||||
logger.info("Generated local image URL: {} for story: {}", localUrl, storyId);
|
||||
return localUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get file extension from content type
|
||||
*/
|
||||
private String getExtensionFromContentType(String contentType) {
|
||||
if (contentType == null) return null;
|
||||
|
||||
switch (contentType.toLowerCase()) {
|
||||
case "image/jpeg":
|
||||
case "image/jpg":
|
||||
return "jpg";
|
||||
case "image/png":
|
||||
return "png";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract file extension from URL
|
||||
*/
|
||||
private String getExtensionFromUrl(String url) {
|
||||
try {
|
||||
String path = new URL(url).getPath();
|
||||
int lastDot = path.lastIndexOf('.');
|
||||
if (lastDot > 0 && lastDot < path.length() - 1) {
|
||||
return path.substring(lastDot + 1).toLowerCase();
|
||||
}
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up content images for a story
|
||||
*/
|
||||
public void deleteContentImages(UUID storyId) {
|
||||
try {
|
||||
Path contentDir = Paths.get(getUploadDir(), ImageType.CONTENT.getDirectory(), storyId.toString());
|
||||
if (Files.exists(contentDir)) {
|
||||
Files.walk(contentDir)
|
||||
.sorted(Comparator.reverseOrder())
|
||||
.map(Path::toFile)
|
||||
.forEach(java.io.File::delete);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Log but don't throw - this is cleanup
|
||||
System.err.println("Failed to clean up content images for story " + storyId + ": " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Result class for content image processing
|
||||
*/
|
||||
public static class ContentImageProcessingResult {
|
||||
private final String processedContent;
|
||||
private final List<String> warnings;
|
||||
private final List<String> downloadedImages;
|
||||
|
||||
public ContentImageProcessingResult(String processedContent, List<String> warnings, List<String> downloadedImages) {
|
||||
this.processedContent = processedContent;
|
||||
this.warnings = warnings;
|
||||
this.downloadedImages = downloadedImages;
|
||||
}
|
||||
|
||||
public String getProcessedContent() { return processedContent; }
|
||||
public List<String> getWarnings() { return warnings; }
|
||||
public List<String> getDownloadedImages() { return downloadedImages; }
|
||||
public boolean hasWarnings() { return !warnings.isEmpty(); }
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,7 @@
|
||||
"b", "strong", "i", "em", "u", "s", "strike", "del", "ins",
|
||||
"sup", "sub", "small", "big", "mark", "pre", "code", "kbd", "samp", "var",
|
||||
"ul", "ol", "li", "dl", "dt", "dd",
|
||||
"a", "table", "thead", "tbody", "tfoot", "tr", "th", "td", "caption", "colgroup", "col",
|
||||
"a", "img", "table", "thead", "tbody", "tfoot", "tr", "th", "td", "caption", "colgroup", "col",
|
||||
"blockquote", "cite", "q", "hr", "details", "summary"
|
||||
],
|
||||
"allowedAttributes": {
|
||||
@@ -18,6 +18,7 @@
|
||||
"h5": ["class", "style"],
|
||||
"h6": ["class", "style"],
|
||||
"a": ["class", "href", "title"],
|
||||
"img": ["src", "alt", "width", "height", "class", "style"],
|
||||
"table": ["class", "style"],
|
||||
"th": ["class", "style", "colspan", "rowspan"],
|
||||
"td": ["class", "style", "colspan", "rowspan"],
|
||||
@@ -41,6 +42,9 @@
|
||||
"allowedProtocols": {
|
||||
"a": {
|
||||
"href": ["http", "https", "#", "/"]
|
||||
},
|
||||
"img": {
|
||||
"src": ["http", "https", "data", "/", "cid"]
|
||||
}
|
||||
},
|
||||
"description": "HTML sanitization configuration for StoryCove story content. This configuration is shared between frontend (DOMPurify) and backend (Jsoup) to ensure consistency."
|
||||
|
||||
Reference in New Issue
Block a user