Indexing Issues
This commit is contained in:
@@ -6,6 +6,7 @@ import com.storycove.entity.Story;
|
|||||||
import com.storycove.repository.AuthorRepository;
|
import com.storycove.repository.AuthorRepository;
|
||||||
import com.storycove.repository.CollectionRepository;
|
import com.storycove.repository.CollectionRepository;
|
||||||
import com.storycove.repository.StoryRepository;
|
import com.storycove.repository.StoryRepository;
|
||||||
|
import com.storycove.service.LibraryService;
|
||||||
import com.storycove.service.SearchServiceAdapter;
|
import com.storycove.service.SearchServiceAdapter;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -38,6 +39,9 @@ public class StartupIndexingRunner implements ApplicationRunner {
|
|||||||
@Autowired
|
@Autowired
|
||||||
private CollectionRepository collectionRepository;
|
private CollectionRepository collectionRepository;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private LibraryService libraryService;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run(ApplicationArguments args) throws Exception {
|
public void run(ApplicationArguments args) throws Exception {
|
||||||
logger.info("========================================");
|
logger.info("========================================");
|
||||||
@@ -52,6 +56,20 @@ public class StartupIndexingRunner implements ApplicationRunner {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip indexing if no library is authenticated yet.
|
||||||
|
// Without an active library, SolrService falls back to libraryId="default" for every
|
||||||
|
// document, which would overwrite correctly-indexed documents (indexed with the real
|
||||||
|
// library ID at creation time) and make them invisible to all subsequent searches.
|
||||||
|
// The nightly reindex scheduler (NightlyReindexScheduler) will handle resyncing once
|
||||||
|
// a user has authenticated and a library is active. A manual reindex can also be
|
||||||
|
// triggered via POST /api/admin/search/solr/reindex.
|
||||||
|
if (libraryService.getCurrentLibraryId() == null) {
|
||||||
|
logger.warn("No active library at startup — skipping bulk reindexing to avoid");
|
||||||
|
logger.warn("overwriting documents with an incorrect libraryId.");
|
||||||
|
logger.warn("Trigger POST /api/admin/search/solr/reindex after authentication.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
// Index all stories
|
// Index all stories
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ public class AuthorIndexScheduler {
|
|||||||
this.searchServiceAdapter = searchServiceAdapter;
|
this.searchServiceAdapter = searchServiceAdapter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default
|
@Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default, configurable via SEARCH_AUTHOR_REINDEX_INTERVAL
|
||||||
public void reindexAllAuthors() {
|
public void reindexAllAuthors() {
|
||||||
try {
|
try {
|
||||||
logger.info("Starting scheduled author reindexing...");
|
logger.info("Starting scheduled author reindexing...");
|
||||||
|
|||||||
@@ -5,8 +5,6 @@ import com.storycove.repository.*;
|
|||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Qualifier;
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.context.ApplicationContext;
|
|
||||||
import org.springframework.context.ApplicationContextAware;
|
|
||||||
import org.springframework.core.io.Resource;
|
import org.springframework.core.io.Resource;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
@@ -24,7 +22,7 @@ import java.util.zip.ZipInputStream;
|
|||||||
import java.util.zip.ZipOutputStream;
|
import java.util.zip.ZipOutputStream;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
public class DatabaseManagementService implements ApplicationContextAware {
|
public class DatabaseManagementService {
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
@Qualifier("dataSource") // Use the primary routing datasource
|
@Qualifier("dataSource") // Use the primary routing datasource
|
||||||
@@ -62,13 +60,6 @@ public class DatabaseManagementService implements ApplicationContextAware {
|
|||||||
@Value("${storycove.images.upload-dir:/app/images}")
|
@Value("${storycove.images.upload-dir:/app/images}")
|
||||||
private String uploadDir;
|
private String uploadDir;
|
||||||
|
|
||||||
private ApplicationContext applicationContext;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setApplicationContext(ApplicationContext applicationContext) {
|
|
||||||
this.applicationContext = applicationContext;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper methods to extract database connection details
|
// Helper methods to extract database connection details
|
||||||
private String extractDatabaseUrl() {
|
private String extractDatabaseUrl() {
|
||||||
try (Connection connection = getDataSource().getConnection()) {
|
try (Connection connection = getDataSource().getConnection()) {
|
||||||
@@ -236,12 +227,24 @@ public class DatabaseManagementService implements ApplicationContextAware {
|
|||||||
System.err.println("No files directory found in backup - skipping file restore.");
|
System.err.println("No files directory found in backup - skipping file restore.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6. Trigger complete search index reindex after data restoration
|
// 6. Trigger complete search index reindex after data restoration.
|
||||||
|
// We fetch the data directly from the repositories already injected into this service
|
||||||
|
// and bulk-index it. This avoids the incomplete performCompleteReindex() path which
|
||||||
|
// only recreates the schema without repopulating data.
|
||||||
try {
|
try {
|
||||||
System.err.println("Starting search index reindex after restore...");
|
System.err.println("Starting search index reindex after restore...");
|
||||||
SearchServiceAdapter searchServiceAdapter = applicationContext.getBean(SearchServiceAdapter.class);
|
if (searchServiceAdapter.isSearchServiceAvailable()) {
|
||||||
searchServiceAdapter.performCompleteReindex();
|
List<com.storycove.entity.Story> stories = storyRepository.findAllWithAssociations();
|
||||||
System.err.println("Search index reindex completed successfully.");
|
List<com.storycove.entity.Author> authors = authorRepository.findAll();
|
||||||
|
List<com.storycove.entity.Collection> collections = collectionRepository.findAllWithTags();
|
||||||
|
searchServiceAdapter.bulkIndexStories(stories);
|
||||||
|
searchServiceAdapter.bulkIndexAuthors(authors);
|
||||||
|
searchServiceAdapter.bulkIndexCollections(collections);
|
||||||
|
System.err.println("Search index reindex completed: " + stories.size() + " stories, "
|
||||||
|
+ authors.size() + " authors, " + collections.size() + " collections.");
|
||||||
|
} else {
|
||||||
|
System.err.println("Solr not available — skipping search reindex after restore.");
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
System.err.println("Warning: Failed to reindex search after restore: " + e.getMessage());
|
System.err.println("Warning: Failed to reindex search after restore: " + e.getMessage());
|
||||||
// Don't fail the entire restore for search issues
|
// Don't fail the entire restore for search issues
|
||||||
|
|||||||
@@ -0,0 +1,115 @@
|
|||||||
|
package com.storycove.service;
|
||||||
|
|
||||||
|
import com.storycove.entity.Author;
|
||||||
|
import com.storycove.entity.Collection;
|
||||||
|
import com.storycove.entity.Story;
|
||||||
|
import com.storycove.repository.AuthorRepository;
|
||||||
|
import com.storycove.repository.CollectionRepository;
|
||||||
|
import com.storycove.repository.StoryRepository;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a nightly complete reindex of all entities (stories, authors, collections)
|
||||||
|
* to keep the Solr search index in sync with the database.
|
||||||
|
*
|
||||||
|
* This scheduler runs at 3 AM by default (configurable via storycove.search.nightly-reindex-cron).
|
||||||
|
* It is intentionally skipped when no library is authenticated so that it never indexes documents
|
||||||
|
* with the fallback libraryId="default", which would make them invisible to searches.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
@ConditionalOnProperty(name = "storycove.search.enabled", havingValue = "true", matchIfMissing = true)
|
||||||
|
public class NightlyReindexScheduler {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(NightlyReindexScheduler.class);
|
||||||
|
|
||||||
|
private final StoryRepository storyRepository;
|
||||||
|
private final AuthorRepository authorRepository;
|
||||||
|
private final CollectionRepository collectionRepository;
|
||||||
|
private final SearchServiceAdapter searchServiceAdapter;
|
||||||
|
private final LibraryService libraryService;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
public NightlyReindexScheduler(StoryRepository storyRepository,
|
||||||
|
AuthorRepository authorRepository,
|
||||||
|
CollectionRepository collectionRepository,
|
||||||
|
SearchServiceAdapter searchServiceAdapter,
|
||||||
|
LibraryService libraryService) {
|
||||||
|
this.storyRepository = storyRepository;
|
||||||
|
this.authorRepository = authorRepository;
|
||||||
|
this.collectionRepository = collectionRepository;
|
||||||
|
this.searchServiceAdapter = searchServiceAdapter;
|
||||||
|
this.libraryService = libraryService;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Scheduled(cron = "${storycove.search.nightly-reindex-cron:0 0 3 * * ?}") // 3 AM daily by default
|
||||||
|
public void reindexAll() {
|
||||||
|
logger.info("========================================");
|
||||||
|
logger.info("Starting nightly full search reindexing...");
|
||||||
|
logger.info("========================================");
|
||||||
|
|
||||||
|
if (!searchServiceAdapter.isSearchServiceAvailable()) {
|
||||||
|
logger.warn("Solr is not available — skipping nightly reindexing.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only reindex when a library is active so every document gets the correct libraryId.
|
||||||
|
// Without this guard, documents would be indexed with libraryId="default" and become
|
||||||
|
// invisible to searches that filter by the real library ID.
|
||||||
|
if (libraryService.getCurrentLibraryId() == null) {
|
||||||
|
logger.warn("No active library — skipping nightly reindexing.");
|
||||||
|
logger.warn("A user must authenticate before the nightly reindex can run.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
int storiesIndexed = 0;
|
||||||
|
int authorsIndexed = 0;
|
||||||
|
int collectionsIndexed = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<Story> stories = storyRepository.findAllWithAssociations();
|
||||||
|
if (!stories.isEmpty()) {
|
||||||
|
searchServiceAdapter.bulkIndexStories(stories);
|
||||||
|
storiesIndexed = stories.size();
|
||||||
|
logger.info("Reindexed {} stories", storiesIndexed);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to reindex stories during nightly run", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<Author> authors = authorRepository.findAll();
|
||||||
|
if (!authors.isEmpty()) {
|
||||||
|
searchServiceAdapter.bulkIndexAuthors(authors);
|
||||||
|
authorsIndexed = authors.size();
|
||||||
|
logger.info("Reindexed {} authors", authorsIndexed);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to reindex authors during nightly run", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<Collection> collections = collectionRepository.findAllWithTags();
|
||||||
|
if (!collections.isEmpty()) {
|
||||||
|
searchServiceAdapter.bulkIndexCollections(collections);
|
||||||
|
collectionsIndexed = collections.size();
|
||||||
|
logger.info("Reindexed {} collections", collectionsIndexed);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to reindex collections during nightly run", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
long duration = System.currentTimeMillis() - startTime;
|
||||||
|
logger.info("========================================");
|
||||||
|
logger.info("Nightly reindexing completed in {}ms — {} stories, {} authors, {} collections",
|
||||||
|
duration, storiesIndexed, authorsIndexed, collectionsIndexed);
|
||||||
|
logger.info("========================================");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -88,15 +88,24 @@ public class SearchServiceAdapter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform complete reindex of all data
|
* Recreates the Solr index schema (drops and re-creates cores / clears all documents).
|
||||||
|
*
|
||||||
|
* <p><strong>Warning:</strong> This method only clears the index — it does NOT repopulate
|
||||||
|
* it with data. Callers are responsible for calling {@link #bulkIndexStories},
|
||||||
|
* {@link #bulkIndexAuthors}, and {@link #bulkIndexCollections} afterwards.
|
||||||
|
* Use {@code POST /api/admin/search/solr/reindex} for a full reindex including data.</p>
|
||||||
|
*
|
||||||
|
* @deprecated Prefer the admin endpoint or directly call the bulk-index methods after
|
||||||
|
* fetching data from the repositories.
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public void performCompleteReindex() {
|
public void performCompleteReindex() {
|
||||||
try {
|
try {
|
||||||
recreateIndices();
|
recreateIndices();
|
||||||
logger.info("Search indices recreated successfully");
|
logger.info("Search indices recreated (schema only — data must be re-added separately)");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Failed to perform complete reindex", e);
|
logger.error("Failed to recreate search indices", e);
|
||||||
throw new RuntimeException("Failed to perform complete reindex", e);
|
throw new RuntimeException("Failed to recreate search indices", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ import org.springframework.stereotype.Service;
|
|||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
import org.springframework.validation.annotation.Validated;
|
import org.springframework.validation.annotation.Validated;
|
||||||
|
|
||||||
|
import org.springframework.transaction.support.TransactionSynchronization;
|
||||||
|
import org.springframework.transaction.support.TransactionSynchronizationManager;
|
||||||
|
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
@@ -328,19 +331,19 @@ public class StoryService {
|
|||||||
|
|
||||||
public Story create(@Valid Story story) {
|
public Story create(@Valid Story story) {
|
||||||
validateStoryForCreate(story);
|
validateStoryForCreate(story);
|
||||||
|
|
||||||
// Set up relationships
|
// Set up relationships
|
||||||
if (story.getAuthor() != null && story.getAuthor().getId() != null) {
|
if (story.getAuthor() != null && story.getAuthor().getId() != null) {
|
||||||
Author author = authorService.findById(story.getAuthor().getId());
|
Author author = authorService.findById(story.getAuthor().getId());
|
||||||
story.setAuthor(author);
|
story.setAuthor(author);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (story.getSeries() != null && story.getSeries().getId() != null) {
|
if (story.getSeries() != null && story.getSeries().getId() != null) {
|
||||||
Series series = seriesService.findById(story.getSeries().getId());
|
Series series = seriesService.findById(story.getSeries().getId());
|
||||||
story.setSeries(series);
|
story.setSeries(series);
|
||||||
validateSeriesVolume(series, story.getVolume());
|
validateSeriesVolume(series, story.getVolume());
|
||||||
}
|
}
|
||||||
|
|
||||||
Story savedStory = storyRepository.save(story);
|
Story savedStory = storyRepository.save(story);
|
||||||
|
|
||||||
// Handle tags
|
// Handle tags
|
||||||
@@ -348,27 +351,29 @@ public class StoryService {
|
|||||||
updateStoryTags(savedStory, story.getTags());
|
updateStoryTags(savedStory, story.getTags());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index in search engine
|
// Index AFTER the transaction commits so that Hibernate has already flushed the entity
|
||||||
searchServiceAdapter.indexStory(savedStory);
|
// (setting @CreationTimestamp / @UpdateTimestamp) and all tag relationships are persisted.
|
||||||
|
// Indexing inside the transaction would send null timestamps and incomplete tag data to Solr.
|
||||||
|
scheduleIndexAfterCommit(savedStory);
|
||||||
|
|
||||||
return savedStory;
|
return savedStory;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Story createWithTagNames(@Valid Story story, java.util.List<String> tagNames) {
|
public Story createWithTagNames(@Valid Story story, java.util.List<String> tagNames) {
|
||||||
validateStoryForCreate(story);
|
validateStoryForCreate(story);
|
||||||
|
|
||||||
// Set up relationships
|
// Set up relationships
|
||||||
if (story.getAuthor() != null && story.getAuthor().getId() != null) {
|
if (story.getAuthor() != null && story.getAuthor().getId() != null) {
|
||||||
Author author = authorService.findById(story.getAuthor().getId());
|
Author author = authorService.findById(story.getAuthor().getId());
|
||||||
story.setAuthor(author);
|
story.setAuthor(author);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (story.getSeries() != null && story.getSeries().getId() != null) {
|
if (story.getSeries() != null && story.getSeries().getId() != null) {
|
||||||
Series series = seriesService.findById(story.getSeries().getId());
|
Series series = seriesService.findById(story.getSeries().getId());
|
||||||
story.setSeries(series);
|
story.setSeries(series);
|
||||||
validateSeriesVolume(series, story.getVolume());
|
validateSeriesVolume(series, story.getVolume());
|
||||||
}
|
}
|
||||||
|
|
||||||
Story savedStory = storyRepository.save(story);
|
Story savedStory = storyRepository.save(story);
|
||||||
|
|
||||||
// Handle tags by names
|
// Handle tags by names
|
||||||
@@ -376,12 +381,39 @@ public class StoryService {
|
|||||||
updateStoryTagsByNames(savedStory, tagNames);
|
updateStoryTagsByNames(savedStory, tagNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index in search engine
|
// Index AFTER the transaction commits (same reason as create() above).
|
||||||
searchServiceAdapter.indexStory(savedStory);
|
scheduleIndexAfterCommit(savedStory);
|
||||||
|
|
||||||
return savedStory;
|
return savedStory;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Schedules Solr indexing to run immediately after the current transaction commits.
|
||||||
|
*
|
||||||
|
* <p>Indexing inside the transaction would produce an incomplete document:
|
||||||
|
* <ul>
|
||||||
|
* <li>Hibernate's {@code @CreationTimestamp} / {@code @UpdateTimestamp} are set during flush
|
||||||
|
* (just before the INSERT), so they are {@code null} until then.</li>
|
||||||
|
* <li>Tag and relationship join-table rows are not yet written to the DB.</li>
|
||||||
|
* </ul>
|
||||||
|
* By using {@code afterCommit}, the flush has completed, all timestamps and associations
|
||||||
|
* are present on the (now detached) entity, and we index a consistent snapshot.</p>
|
||||||
|
*
|
||||||
|
* <p>Falls back to immediate indexing when no transaction is active.</p>
|
||||||
|
*/
|
||||||
|
private void scheduleIndexAfterCommit(Story story) {
|
||||||
|
if (TransactionSynchronizationManager.isActualTransactionActive()) {
|
||||||
|
TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() {
|
||||||
|
@Override
|
||||||
|
public void afterCommit() {
|
||||||
|
searchServiceAdapter.indexStory(story);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
searchServiceAdapter.indexStory(story);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Story update(UUID id, @Valid Story storyUpdates) {
|
public Story update(UUID id, @Valid Story storyUpdates) {
|
||||||
Story existingStory = findById(id);
|
Story existingStory = findById(id);
|
||||||
|
|
||||||
|
|||||||
@@ -48,6 +48,11 @@ storycove:
|
|||||||
password: ${APP_PASSWORD} # REQUIRED: No default password for security
|
password: ${APP_PASSWORD} # REQUIRED: No default password for security
|
||||||
search:
|
search:
|
||||||
engine: solr # Apache Solr search engine
|
engine: solr # Apache Solr search engine
|
||||||
|
# Cron for the nightly full reindex (stories + authors + collections). Default: 3 AM daily.
|
||||||
|
# Set to "-" to disable. Override via SEARCH_NIGHTLY_REINDEX_CRON env var.
|
||||||
|
nightly-reindex-cron: ${SEARCH_NIGHTLY_REINDEX_CRON:0 0 3 * * ?}
|
||||||
|
# How often (ms) to reindex authors to refresh derived stats (storyCount, averageRating).
|
||||||
|
author-reindex-interval: ${SEARCH_AUTHOR_REINDEX_INTERVAL:7200000}
|
||||||
solr:
|
solr:
|
||||||
# Connection settings
|
# Connection settings
|
||||||
url: ${SOLR_URL:http://solr:8983/solr}
|
url: ${SOLR_URL:http://solr:8983/solr}
|
||||||
|
|||||||
Reference in New Issue
Block a user