Indexing Issues

This commit is contained in:
Stefan Hardegger
2026-02-23 09:45:43 +01:00
parent 02fa9dab4f
commit f8bf90e0c7
7 changed files with 211 additions and 29 deletions

View File

@@ -6,6 +6,7 @@ import com.storycove.entity.Story;
import com.storycove.repository.AuthorRepository;
import com.storycove.repository.CollectionRepository;
import com.storycove.repository.StoryRepository;
import com.storycove.service.LibraryService;
import com.storycove.service.SearchServiceAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -38,6 +39,9 @@ public class StartupIndexingRunner implements ApplicationRunner {
@Autowired
private CollectionRepository collectionRepository;
@Autowired
private LibraryService libraryService;
@Override
public void run(ApplicationArguments args) throws Exception {
logger.info("========================================");
@@ -52,6 +56,20 @@ public class StartupIndexingRunner implements ApplicationRunner {
return;
}
// Skip indexing if no library is authenticated yet.
// Without an active library, SolrService falls back to libraryId="default" for every
// document, which would overwrite correctly-indexed documents (indexed with the real
// library ID at creation time) and make them invisible to all subsequent searches.
// The nightly reindex scheduler (NightlyReindexScheduler) will handle resyncing once
// a user has authenticated and a library is active. A manual reindex can also be
// triggered via POST /api/admin/search/solr/reindex.
if (libraryService.getCurrentLibraryId() == null) {
logger.warn("No active library at startup — skipping bulk reindexing to avoid");
logger.warn("overwriting documents with an incorrect libraryId.");
logger.warn("Trigger POST /api/admin/search/solr/reindex after authentication.");
return;
}
long startTime = System.currentTimeMillis();
// Index all stories

View File

@@ -25,7 +25,7 @@ public class AuthorIndexScheduler {
this.searchServiceAdapter = searchServiceAdapter;
}
@Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default
@Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default, configurable via SEARCH_AUTHOR_REINDEX_INTERVAL
public void reindexAllAuthors() {
try {
logger.info("Starting scheduled author reindexing...");

View File

@@ -5,8 +5,6 @@ import com.storycove.repository.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@@ -24,7 +22,7 @@ import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
@Service
public class DatabaseManagementService implements ApplicationContextAware {
public class DatabaseManagementService {
@Autowired
@Qualifier("dataSource") // Use the primary routing datasource
@@ -62,13 +60,6 @@ public class DatabaseManagementService implements ApplicationContextAware {
@Value("${storycove.images.upload-dir:/app/images}")
private String uploadDir;
private ApplicationContext applicationContext;
@Override
public void setApplicationContext(ApplicationContext applicationContext) {
this.applicationContext = applicationContext;
}
// Helper methods to extract database connection details
private String extractDatabaseUrl() {
try (Connection connection = getDataSource().getConnection()) {
@@ -236,12 +227,24 @@ public class DatabaseManagementService implements ApplicationContextAware {
System.err.println("No files directory found in backup - skipping file restore.");
}
// 6. Trigger complete search index reindex after data restoration
// 6. Trigger complete search index reindex after data restoration.
// We fetch the data directly from the repositories already injected into this service
// and bulk-index it. This avoids the incomplete performCompleteReindex() path which
// only recreates the schema without repopulating data.
try {
System.err.println("Starting search index reindex after restore...");
SearchServiceAdapter searchServiceAdapter = applicationContext.getBean(SearchServiceAdapter.class);
searchServiceAdapter.performCompleteReindex();
System.err.println("Search index reindex completed successfully.");
if (searchServiceAdapter.isSearchServiceAvailable()) {
List<com.storycove.entity.Story> stories = storyRepository.findAllWithAssociations();
List<com.storycove.entity.Author> authors = authorRepository.findAll();
List<com.storycove.entity.Collection> collections = collectionRepository.findAllWithTags();
searchServiceAdapter.bulkIndexStories(stories);
searchServiceAdapter.bulkIndexAuthors(authors);
searchServiceAdapter.bulkIndexCollections(collections);
System.err.println("Search index reindex completed: " + stories.size() + " stories, "
+ authors.size() + " authors, " + collections.size() + " collections.");
} else {
System.err.println("Solr not available — skipping search reindex after restore.");
}
} catch (Exception e) {
System.err.println("Warning: Failed to reindex search after restore: " + e.getMessage());
// Don't fail the entire restore for search issues

View File

@@ -0,0 +1,115 @@
package com.storycove.service;
import com.storycove.entity.Author;
import com.storycove.entity.Collection;
import com.storycove.entity.Story;
import com.storycove.repository.AuthorRepository;
import com.storycove.repository.CollectionRepository;
import com.storycove.repository.StoryRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* Performs a nightly complete reindex of all entities (stories, authors, collections)
* to keep the Solr search index in sync with the database.
*
* This scheduler runs at 3 AM by default (configurable via storycove.search.nightly-reindex-cron).
* It is intentionally skipped when no library is authenticated so that it never indexes documents
* with the fallback libraryId="default", which would make them invisible to searches.
*/
@Component
@ConditionalOnProperty(name = "storycove.search.enabled", havingValue = "true", matchIfMissing = true)
public class NightlyReindexScheduler {
private static final Logger logger = LoggerFactory.getLogger(NightlyReindexScheduler.class);
private final StoryRepository storyRepository;
private final AuthorRepository authorRepository;
private final CollectionRepository collectionRepository;
private final SearchServiceAdapter searchServiceAdapter;
private final LibraryService libraryService;
@Autowired
public NightlyReindexScheduler(StoryRepository storyRepository,
AuthorRepository authorRepository,
CollectionRepository collectionRepository,
SearchServiceAdapter searchServiceAdapter,
LibraryService libraryService) {
this.storyRepository = storyRepository;
this.authorRepository = authorRepository;
this.collectionRepository = collectionRepository;
this.searchServiceAdapter = searchServiceAdapter;
this.libraryService = libraryService;
}
@Scheduled(cron = "${storycove.search.nightly-reindex-cron:0 0 3 * * ?}") // 3 AM daily by default
public void reindexAll() {
logger.info("========================================");
logger.info("Starting nightly full search reindexing...");
logger.info("========================================");
if (!searchServiceAdapter.isSearchServiceAvailable()) {
logger.warn("Solr is not available — skipping nightly reindexing.");
return;
}
// Only reindex when a library is active so every document gets the correct libraryId.
// Without this guard, documents would be indexed with libraryId="default" and become
// invisible to searches that filter by the real library ID.
if (libraryService.getCurrentLibraryId() == null) {
logger.warn("No active library — skipping nightly reindexing.");
logger.warn("A user must authenticate before the nightly reindex can run.");
return;
}
long startTime = System.currentTimeMillis();
int storiesIndexed = 0;
int authorsIndexed = 0;
int collectionsIndexed = 0;
try {
List<Story> stories = storyRepository.findAllWithAssociations();
if (!stories.isEmpty()) {
searchServiceAdapter.bulkIndexStories(stories);
storiesIndexed = stories.size();
logger.info("Reindexed {} stories", storiesIndexed);
}
} catch (Exception e) {
logger.error("Failed to reindex stories during nightly run", e);
}
try {
List<Author> authors = authorRepository.findAll();
if (!authors.isEmpty()) {
searchServiceAdapter.bulkIndexAuthors(authors);
authorsIndexed = authors.size();
logger.info("Reindexed {} authors", authorsIndexed);
}
} catch (Exception e) {
logger.error("Failed to reindex authors during nightly run", e);
}
try {
List<Collection> collections = collectionRepository.findAllWithTags();
if (!collections.isEmpty()) {
searchServiceAdapter.bulkIndexCollections(collections);
collectionsIndexed = collections.size();
logger.info("Reindexed {} collections", collectionsIndexed);
}
} catch (Exception e) {
logger.error("Failed to reindex collections during nightly run", e);
}
long duration = System.currentTimeMillis() - startTime;
logger.info("========================================");
logger.info("Nightly reindexing completed in {}ms — {} stories, {} authors, {} collections",
duration, storiesIndexed, authorsIndexed, collectionsIndexed);
logger.info("========================================");
}
}

View File

@@ -88,15 +88,24 @@ public class SearchServiceAdapter {
}
/**
* Perform complete reindex of all data
* Recreates the Solr index schema (drops and re-creates cores / clears all documents).
*
* <p><strong>Warning:</strong> This method only clears the index — it does NOT repopulate
* it with data. Callers are responsible for calling {@link #bulkIndexStories},
* {@link #bulkIndexAuthors}, and {@link #bulkIndexCollections} afterwards.
* Use {@code POST /api/admin/search/solr/reindex} for a full reindex including data.</p>
*
* @deprecated Prefer the admin endpoint or directly call the bulk-index methods after
* fetching data from the repositories.
*/
@Deprecated
public void performCompleteReindex() {
try {
recreateIndices();
logger.info("Search indices recreated successfully");
logger.info("Search indices recreated (schema only — data must be re-added separately)");
} catch (Exception e) {
logger.error("Failed to perform complete reindex", e);
throw new RuntimeException("Failed to perform complete reindex", e);
logger.error("Failed to recreate search indices", e);
throw new RuntimeException("Failed to recreate search indices", e);
}
}

View File

@@ -19,6 +19,9 @@ import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.validation.annotation.Validated;
import org.springframework.transaction.support.TransactionSynchronization;
import org.springframework.transaction.support.TransactionSynchronizationManager;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashSet;
@@ -348,8 +351,10 @@ public class StoryService {
updateStoryTags(savedStory, story.getTags());
}
// Index in search engine
searchServiceAdapter.indexStory(savedStory);
// Index AFTER the transaction commits so that Hibernate has already flushed the entity
// (setting @CreationTimestamp / @UpdateTimestamp) and all tag relationships are persisted.
// Indexing inside the transaction would send null timestamps and incomplete tag data to Solr.
scheduleIndexAfterCommit(savedStory);
return savedStory;
}
@@ -376,12 +381,39 @@ public class StoryService {
updateStoryTagsByNames(savedStory, tagNames);
}
// Index in search engine
searchServiceAdapter.indexStory(savedStory);
// Index AFTER the transaction commits (same reason as create() above).
scheduleIndexAfterCommit(savedStory);
return savedStory;
}
/**
* Schedules Solr indexing to run immediately after the current transaction commits.
*
* <p>Indexing inside the transaction would produce an incomplete document:
* <ul>
* <li>Hibernate's {@code @CreationTimestamp} / {@code @UpdateTimestamp} are set during flush
* (just before the INSERT), so they are {@code null} until then.</li>
* <li>Tag and relationship join-table rows are not yet written to the DB.</li>
* </ul>
* By using {@code afterCommit}, the flush has completed, all timestamps and associations
* are present on the (now detached) entity, and we index a consistent snapshot.</p>
*
* <p>Falls back to immediate indexing when no transaction is active.</p>
*/
private void scheduleIndexAfterCommit(Story story) {
if (TransactionSynchronizationManager.isActualTransactionActive()) {
TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() {
@Override
public void afterCommit() {
searchServiceAdapter.indexStory(story);
}
});
} else {
searchServiceAdapter.indexStory(story);
}
}
public Story update(UUID id, @Valid Story storyUpdates) {
Story existingStory = findById(id);

View File

@@ -48,6 +48,11 @@ storycove:
password: ${APP_PASSWORD} # REQUIRED: No default password for security
search:
engine: solr # Apache Solr search engine
# Cron for the nightly full reindex (stories + authors + collections). Default: 3 AM daily.
# Set to "-" to disable. Override via SEARCH_NIGHTLY_REINDEX_CRON env var.
nightly-reindex-cron: ${SEARCH_NIGHTLY_REINDEX_CRON:0 0 3 * * ?}
# How often (ms) to reindex authors to refresh derived stats (storyCount, averageRating).
author-reindex-interval: ${SEARCH_AUTHOR_REINDEX_INTERVAL:7200000}
solr:
# Connection settings
url: ${SOLR_URL:http://solr:8983/solr}