Indexing Issues
This commit is contained in:
@@ -6,6 +6,7 @@ import com.storycove.entity.Story;
|
||||
import com.storycove.repository.AuthorRepository;
|
||||
import com.storycove.repository.CollectionRepository;
|
||||
import com.storycove.repository.StoryRepository;
|
||||
import com.storycove.service.LibraryService;
|
||||
import com.storycove.service.SearchServiceAdapter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -38,6 +39,9 @@ public class StartupIndexingRunner implements ApplicationRunner {
|
||||
@Autowired
|
||||
private CollectionRepository collectionRepository;
|
||||
|
||||
@Autowired
|
||||
private LibraryService libraryService;
|
||||
|
||||
@Override
|
||||
public void run(ApplicationArguments args) throws Exception {
|
||||
logger.info("========================================");
|
||||
@@ -52,6 +56,20 @@ public class StartupIndexingRunner implements ApplicationRunner {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip indexing if no library is authenticated yet.
|
||||
// Without an active library, SolrService falls back to libraryId="default" for every
|
||||
// document, which would overwrite correctly-indexed documents (indexed with the real
|
||||
// library ID at creation time) and make them invisible to all subsequent searches.
|
||||
// The nightly reindex scheduler (NightlyReindexScheduler) will handle resyncing once
|
||||
// a user has authenticated and a library is active. A manual reindex can also be
|
||||
// triggered via POST /api/admin/search/solr/reindex.
|
||||
if (libraryService.getCurrentLibraryId() == null) {
|
||||
logger.warn("No active library at startup — skipping bulk reindexing to avoid");
|
||||
logger.warn("overwriting documents with an incorrect libraryId.");
|
||||
logger.warn("Trigger POST /api/admin/search/solr/reindex after authentication.");
|
||||
return;
|
||||
}
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
// Index all stories
|
||||
|
||||
@@ -25,7 +25,7 @@ public class AuthorIndexScheduler {
|
||||
this.searchServiceAdapter = searchServiceAdapter;
|
||||
}
|
||||
|
||||
@Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default
|
||||
@Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default, configurable via SEARCH_AUTHOR_REINDEX_INTERVAL
|
||||
public void reindexAllAuthors() {
|
||||
try {
|
||||
logger.info("Starting scheduled author reindexing...");
|
||||
|
||||
@@ -5,8 +5,6 @@ import com.storycove.repository.*;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.ApplicationContextAware;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
@@ -24,7 +22,7 @@ import java.util.zip.ZipInputStream;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
@Service
|
||||
public class DatabaseManagementService implements ApplicationContextAware {
|
||||
public class DatabaseManagementService {
|
||||
|
||||
@Autowired
|
||||
@Qualifier("dataSource") // Use the primary routing datasource
|
||||
@@ -62,13 +60,6 @@ public class DatabaseManagementService implements ApplicationContextAware {
|
||||
@Value("${storycove.images.upload-dir:/app/images}")
|
||||
private String uploadDir;
|
||||
|
||||
private ApplicationContext applicationContext;
|
||||
|
||||
@Override
|
||||
public void setApplicationContext(ApplicationContext applicationContext) {
|
||||
this.applicationContext = applicationContext;
|
||||
}
|
||||
|
||||
// Helper methods to extract database connection details
|
||||
private String extractDatabaseUrl() {
|
||||
try (Connection connection = getDataSource().getConnection()) {
|
||||
@@ -236,12 +227,24 @@ public class DatabaseManagementService implements ApplicationContextAware {
|
||||
System.err.println("No files directory found in backup - skipping file restore.");
|
||||
}
|
||||
|
||||
// 6. Trigger complete search index reindex after data restoration
|
||||
// 6. Trigger complete search index reindex after data restoration.
|
||||
// We fetch the data directly from the repositories already injected into this service
|
||||
// and bulk-index it. This avoids the incomplete performCompleteReindex() path which
|
||||
// only recreates the schema without repopulating data.
|
||||
try {
|
||||
System.err.println("Starting search index reindex after restore...");
|
||||
SearchServiceAdapter searchServiceAdapter = applicationContext.getBean(SearchServiceAdapter.class);
|
||||
searchServiceAdapter.performCompleteReindex();
|
||||
System.err.println("Search index reindex completed successfully.");
|
||||
if (searchServiceAdapter.isSearchServiceAvailable()) {
|
||||
List<com.storycove.entity.Story> stories = storyRepository.findAllWithAssociations();
|
||||
List<com.storycove.entity.Author> authors = authorRepository.findAll();
|
||||
List<com.storycove.entity.Collection> collections = collectionRepository.findAllWithTags();
|
||||
searchServiceAdapter.bulkIndexStories(stories);
|
||||
searchServiceAdapter.bulkIndexAuthors(authors);
|
||||
searchServiceAdapter.bulkIndexCollections(collections);
|
||||
System.err.println("Search index reindex completed: " + stories.size() + " stories, "
|
||||
+ authors.size() + " authors, " + collections.size() + " collections.");
|
||||
} else {
|
||||
System.err.println("Solr not available — skipping search reindex after restore.");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("Warning: Failed to reindex search after restore: " + e.getMessage());
|
||||
// Don't fail the entire restore for search issues
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
package com.storycove.service;
|
||||
|
||||
import com.storycove.entity.Author;
|
||||
import com.storycove.entity.Collection;
|
||||
import com.storycove.entity.Story;
|
||||
import com.storycove.repository.AuthorRepository;
|
||||
import com.storycove.repository.CollectionRepository;
|
||||
import com.storycove.repository.StoryRepository;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Performs a nightly complete reindex of all entities (stories, authors, collections)
|
||||
* to keep the Solr search index in sync with the database.
|
||||
*
|
||||
* This scheduler runs at 3 AM by default (configurable via storycove.search.nightly-reindex-cron).
|
||||
* It is intentionally skipped when no library is authenticated so that it never indexes documents
|
||||
* with the fallback libraryId="default", which would make them invisible to searches.
|
||||
*/
|
||||
@Component
|
||||
@ConditionalOnProperty(name = "storycove.search.enabled", havingValue = "true", matchIfMissing = true)
|
||||
public class NightlyReindexScheduler {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(NightlyReindexScheduler.class);
|
||||
|
||||
private final StoryRepository storyRepository;
|
||||
private final AuthorRepository authorRepository;
|
||||
private final CollectionRepository collectionRepository;
|
||||
private final SearchServiceAdapter searchServiceAdapter;
|
||||
private final LibraryService libraryService;
|
||||
|
||||
@Autowired
|
||||
public NightlyReindexScheduler(StoryRepository storyRepository,
|
||||
AuthorRepository authorRepository,
|
||||
CollectionRepository collectionRepository,
|
||||
SearchServiceAdapter searchServiceAdapter,
|
||||
LibraryService libraryService) {
|
||||
this.storyRepository = storyRepository;
|
||||
this.authorRepository = authorRepository;
|
||||
this.collectionRepository = collectionRepository;
|
||||
this.searchServiceAdapter = searchServiceAdapter;
|
||||
this.libraryService = libraryService;
|
||||
}
|
||||
|
||||
@Scheduled(cron = "${storycove.search.nightly-reindex-cron:0 0 3 * * ?}") // 3 AM daily by default
|
||||
public void reindexAll() {
|
||||
logger.info("========================================");
|
||||
logger.info("Starting nightly full search reindexing...");
|
||||
logger.info("========================================");
|
||||
|
||||
if (!searchServiceAdapter.isSearchServiceAvailable()) {
|
||||
logger.warn("Solr is not available — skipping nightly reindexing.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Only reindex when a library is active so every document gets the correct libraryId.
|
||||
// Without this guard, documents would be indexed with libraryId="default" and become
|
||||
// invisible to searches that filter by the real library ID.
|
||||
if (libraryService.getCurrentLibraryId() == null) {
|
||||
logger.warn("No active library — skipping nightly reindexing.");
|
||||
logger.warn("A user must authenticate before the nightly reindex can run.");
|
||||
return;
|
||||
}
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
int storiesIndexed = 0;
|
||||
int authorsIndexed = 0;
|
||||
int collectionsIndexed = 0;
|
||||
|
||||
try {
|
||||
List<Story> stories = storyRepository.findAllWithAssociations();
|
||||
if (!stories.isEmpty()) {
|
||||
searchServiceAdapter.bulkIndexStories(stories);
|
||||
storiesIndexed = stories.size();
|
||||
logger.info("Reindexed {} stories", storiesIndexed);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to reindex stories during nightly run", e);
|
||||
}
|
||||
|
||||
try {
|
||||
List<Author> authors = authorRepository.findAll();
|
||||
if (!authors.isEmpty()) {
|
||||
searchServiceAdapter.bulkIndexAuthors(authors);
|
||||
authorsIndexed = authors.size();
|
||||
logger.info("Reindexed {} authors", authorsIndexed);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to reindex authors during nightly run", e);
|
||||
}
|
||||
|
||||
try {
|
||||
List<Collection> collections = collectionRepository.findAllWithTags();
|
||||
if (!collections.isEmpty()) {
|
||||
searchServiceAdapter.bulkIndexCollections(collections);
|
||||
collectionsIndexed = collections.size();
|
||||
logger.info("Reindexed {} collections", collectionsIndexed);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to reindex collections during nightly run", e);
|
||||
}
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
logger.info("========================================");
|
||||
logger.info("Nightly reindexing completed in {}ms — {} stories, {} authors, {} collections",
|
||||
duration, storiesIndexed, authorsIndexed, collectionsIndexed);
|
||||
logger.info("========================================");
|
||||
}
|
||||
}
|
||||
@@ -88,15 +88,24 @@ public class SearchServiceAdapter {
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform complete reindex of all data
|
||||
* Recreates the Solr index schema (drops and re-creates cores / clears all documents).
|
||||
*
|
||||
* <p><strong>Warning:</strong> This method only clears the index — it does NOT repopulate
|
||||
* it with data. Callers are responsible for calling {@link #bulkIndexStories},
|
||||
* {@link #bulkIndexAuthors}, and {@link #bulkIndexCollections} afterwards.
|
||||
* Use {@code POST /api/admin/search/solr/reindex} for a full reindex including data.</p>
|
||||
*
|
||||
* @deprecated Prefer the admin endpoint or directly call the bulk-index methods after
|
||||
* fetching data from the repositories.
|
||||
*/
|
||||
@Deprecated
|
||||
public void performCompleteReindex() {
|
||||
try {
|
||||
recreateIndices();
|
||||
logger.info("Search indices recreated successfully");
|
||||
logger.info("Search indices recreated (schema only — data must be re-added separately)");
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to perform complete reindex", e);
|
||||
throw new RuntimeException("Failed to perform complete reindex", e);
|
||||
logger.error("Failed to recreate search indices", e);
|
||||
throw new RuntimeException("Failed to recreate search indices", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,9 @@ import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
|
||||
import org.springframework.transaction.support.TransactionSynchronization;
|
||||
import org.springframework.transaction.support.TransactionSynchronizationManager;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
@@ -328,19 +331,19 @@ public class StoryService {
|
||||
|
||||
public Story create(@Valid Story story) {
|
||||
validateStoryForCreate(story);
|
||||
|
||||
|
||||
// Set up relationships
|
||||
if (story.getAuthor() != null && story.getAuthor().getId() != null) {
|
||||
Author author = authorService.findById(story.getAuthor().getId());
|
||||
story.setAuthor(author);
|
||||
}
|
||||
|
||||
|
||||
if (story.getSeries() != null && story.getSeries().getId() != null) {
|
||||
Series series = seriesService.findById(story.getSeries().getId());
|
||||
story.setSeries(series);
|
||||
validateSeriesVolume(series, story.getVolume());
|
||||
}
|
||||
|
||||
|
||||
Story savedStory = storyRepository.save(story);
|
||||
|
||||
// Handle tags
|
||||
@@ -348,27 +351,29 @@ public class StoryService {
|
||||
updateStoryTags(savedStory, story.getTags());
|
||||
}
|
||||
|
||||
// Index in search engine
|
||||
searchServiceAdapter.indexStory(savedStory);
|
||||
// Index AFTER the transaction commits so that Hibernate has already flushed the entity
|
||||
// (setting @CreationTimestamp / @UpdateTimestamp) and all tag relationships are persisted.
|
||||
// Indexing inside the transaction would send null timestamps and incomplete tag data to Solr.
|
||||
scheduleIndexAfterCommit(savedStory);
|
||||
|
||||
return savedStory;
|
||||
}
|
||||
|
||||
public Story createWithTagNames(@Valid Story story, java.util.List<String> tagNames) {
|
||||
validateStoryForCreate(story);
|
||||
|
||||
|
||||
// Set up relationships
|
||||
if (story.getAuthor() != null && story.getAuthor().getId() != null) {
|
||||
Author author = authorService.findById(story.getAuthor().getId());
|
||||
story.setAuthor(author);
|
||||
}
|
||||
|
||||
|
||||
if (story.getSeries() != null && story.getSeries().getId() != null) {
|
||||
Series series = seriesService.findById(story.getSeries().getId());
|
||||
story.setSeries(series);
|
||||
validateSeriesVolume(series, story.getVolume());
|
||||
}
|
||||
|
||||
|
||||
Story savedStory = storyRepository.save(story);
|
||||
|
||||
// Handle tags by names
|
||||
@@ -376,12 +381,39 @@ public class StoryService {
|
||||
updateStoryTagsByNames(savedStory, tagNames);
|
||||
}
|
||||
|
||||
// Index in search engine
|
||||
searchServiceAdapter.indexStory(savedStory);
|
||||
// Index AFTER the transaction commits (same reason as create() above).
|
||||
scheduleIndexAfterCommit(savedStory);
|
||||
|
||||
return savedStory;
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules Solr indexing to run immediately after the current transaction commits.
|
||||
*
|
||||
* <p>Indexing inside the transaction would produce an incomplete document:
|
||||
* <ul>
|
||||
* <li>Hibernate's {@code @CreationTimestamp} / {@code @UpdateTimestamp} are set during flush
|
||||
* (just before the INSERT), so they are {@code null} until then.</li>
|
||||
* <li>Tag and relationship join-table rows are not yet written to the DB.</li>
|
||||
* </ul>
|
||||
* By using {@code afterCommit}, the flush has completed, all timestamps and associations
|
||||
* are present on the (now detached) entity, and we index a consistent snapshot.</p>
|
||||
*
|
||||
* <p>Falls back to immediate indexing when no transaction is active.</p>
|
||||
*/
|
||||
private void scheduleIndexAfterCommit(Story story) {
|
||||
if (TransactionSynchronizationManager.isActualTransactionActive()) {
|
||||
TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() {
|
||||
@Override
|
||||
public void afterCommit() {
|
||||
searchServiceAdapter.indexStory(story);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
searchServiceAdapter.indexStory(story);
|
||||
}
|
||||
}
|
||||
|
||||
public Story update(UUID id, @Valid Story storyUpdates) {
|
||||
Story existingStory = findById(id);
|
||||
|
||||
|
||||
@@ -48,6 +48,11 @@ storycove:
|
||||
password: ${APP_PASSWORD} # REQUIRED: No default password for security
|
||||
search:
|
||||
engine: solr # Apache Solr search engine
|
||||
# Cron for the nightly full reindex (stories + authors + collections). Default: 3 AM daily.
|
||||
# Set to "-" to disable. Override via SEARCH_NIGHTLY_REINDEX_CRON env var.
|
||||
nightly-reindex-cron: ${SEARCH_NIGHTLY_REINDEX_CRON:0 0 3 * * ?}
|
||||
# How often (ms) to reindex authors to refresh derived stats (storyCount, averageRating).
|
||||
author-reindex-interval: ${SEARCH_AUTHOR_REINDEX_INTERVAL:7200000}
|
||||
solr:
|
||||
# Connection settings
|
||||
url: ${SOLR_URL:http://solr:8983/solr}
|
||||
|
||||
Reference in New Issue
Block a user