Indexing Issues

2026-02-23 09:45:43 +01:00
parent 02fa9dab4f
commit f8bf90e0c7
7 changed files with 211 additions and 29 deletions
--- a/backend/src/main/java/com/storycove/config/StartupIndexingRunner.java
+++ b/backend/src/main/java/com/storycove/config/StartupIndexingRunner.java
@@ -6,6 +6,7 @@ import com.storycove.entity.Story;
 import com.storycove.repository.AuthorRepository;
 import com.storycove.repository.CollectionRepository;
 import com.storycove.repository.StoryRepository;
+import com.storycove.service.LibraryService;
 import com.storycove.service.SearchServiceAdapter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -38,6 +39,9 @@ public class StartupIndexingRunner implements ApplicationRunner {
    @Autowired
    private CollectionRepository collectionRepository;

+    @Autowired
+    private LibraryService libraryService;
+
    @Override
    public void run(ApplicationArguments args) throws Exception {
        logger.info("========================================");
@@ -52,6 +56,20 @@ public class StartupIndexingRunner implements ApplicationRunner {
                return;
            }

+            // Skip indexing if no library is authenticated yet.
+            // Without an active library, SolrService falls back to libraryId="default" for every
+            // document, which would overwrite correctly-indexed documents (indexed with the real
+            // library ID at creation time) and make them invisible to all subsequent searches.
+            // The nightly reindex scheduler (NightlyReindexScheduler) will handle resyncing once
+            // a user has authenticated and a library is active. A manual reindex can also be
+            // triggered via POST /api/admin/search/solr/reindex.
+            if (libraryService.getCurrentLibraryId() == null) {
+                logger.warn("No active library at startup — skipping bulk reindexing to avoid");
+                logger.warn("overwriting documents with an incorrect libraryId.");
+                logger.warn("Trigger POST /api/admin/search/solr/reindex after authentication.");
+                return;
+            }
+
            long startTime = System.currentTimeMillis();

            // Index all stories
--- a/backend/src/main/java/com/storycove/service/AuthorIndexScheduler.java
+++ b/backend/src/main/java/com/storycove/service/AuthorIndexScheduler.java
@@ -25,7 +25,7 @@ public class AuthorIndexScheduler {
        this.searchServiceAdapter = searchServiceAdapter;
    }

-    @Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default
+    @Scheduled(fixedRateString = "${storycove.search.author-reindex-interval:7200000}") // 2 hours default, configurable via SEARCH_AUTHOR_REINDEX_INTERVAL
    public void reindexAllAuthors() {
        try {
            logger.info("Starting scheduled author reindexing...");
--- a/backend/src/main/java/com/storycove/service/DatabaseManagementService.java
+++ b/backend/src/main/java/com/storycove/service/DatabaseManagementService.java
@@ -5,8 +5,6 @@ import com.storycove.repository.*;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.beans.factory.annotation.Value;
-import org.springframework.context.ApplicationContext;
-import org.springframework.context.ApplicationContextAware;
 import org.springframework.core.io.Resource;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
@@ -24,7 +22,7 @@ import java.util.zip.ZipInputStream;
 import java.util.zip.ZipOutputStream;

@Service
-public class DatabaseManagementService implements ApplicationContextAware {
+public class DatabaseManagementService {

    @Autowired
    @Qualifier("dataSource") // Use the primary routing datasource
@@ -62,13 +60,6 @@ public class DatabaseManagementService implements ApplicationContextAware {
    @Value("${storycove.images.upload-dir:/app/images}")
    private String uploadDir;
    
-    private ApplicationContext applicationContext;
-    
-    @Override
-    public void setApplicationContext(ApplicationContext applicationContext) {
-        this.applicationContext = applicationContext;
-    }
-
    // Helper methods to extract database connection details
    private String extractDatabaseUrl() {
        try (Connection connection = getDataSource().getConnection()) {
@@ -236,12 +227,24 @@ public class DatabaseManagementService implements ApplicationContextAware {
                System.err.println("No files directory found in backup - skipping file restore.");
            }
            
-            // 6. Trigger complete search index reindex after data restoration
+            // 6. Trigger complete search index reindex after data restoration.
+            // We fetch the data directly from the repositories already injected into this service
+            // and bulk-index it. This avoids the incomplete performCompleteReindex() path which
+            // only recreates the schema without repopulating data.
            try {
                System.err.println("Starting search index reindex after restore...");
-                SearchServiceAdapter searchServiceAdapter = applicationContext.getBean(SearchServiceAdapter.class);
-                searchServiceAdapter.performCompleteReindex();
-                System.err.println("Search index reindex completed successfully.");
+                if (searchServiceAdapter.isSearchServiceAvailable()) {
+                    List<com.storycove.entity.Story> stories = storyRepository.findAllWithAssociations();
+                    List<com.storycove.entity.Author> authors = authorRepository.findAll();
+                    List<com.storycove.entity.Collection> collections = collectionRepository.findAllWithTags();
+                    searchServiceAdapter.bulkIndexStories(stories);
+                    searchServiceAdapter.bulkIndexAuthors(authors);
+                    searchServiceAdapter.bulkIndexCollections(collections);
+                    System.err.println("Search index reindex completed: " + stories.size() + " stories, "
+                        + authors.size() + " authors, " + collections.size() + " collections.");
+                } else {
+                    System.err.println("Solr not available — skipping search reindex after restore.");
+                }
            } catch (Exception e) {
                System.err.println("Warning: Failed to reindex search after restore: " + e.getMessage());
                // Don't fail the entire restore for search issues
--- a/backend/src/main/java/com/storycove/service/NightlyReindexScheduler.java
+++ b/backend/src/main/java/com/storycove/service/NightlyReindexScheduler.java
@@ -0,0 +1,115 @@
+package com.storycove.service;
+
+import com.storycove.entity.Author;
+import com.storycove.entity.Collection;
+import com.storycove.entity.Story;
+import com.storycove.repository.AuthorRepository;
+import com.storycove.repository.CollectionRepository;
+import com.storycove.repository.StoryRepository;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+
+/**
+ * Performs a nightly complete reindex of all entities (stories, authors, collections)
+ * to keep the Solr search index in sync with the database.
+ *
+ * This scheduler runs at 3 AM by default (configurable via storycove.search.nightly-reindex-cron).
+ * It is intentionally skipped when no library is authenticated so that it never indexes documents
+ * with the fallback libraryId="default", which would make them invisible to searches.
+ */
+@Component
+@ConditionalOnProperty(name = "storycove.search.enabled", havingValue = "true", matchIfMissing = true)
+public class NightlyReindexScheduler {
+
+    private static final Logger logger = LoggerFactory.getLogger(NightlyReindexScheduler.class);
+
+    private final StoryRepository storyRepository;
+    private final AuthorRepository authorRepository;
+    private final CollectionRepository collectionRepository;
+    private final SearchServiceAdapter searchServiceAdapter;
+    private final LibraryService libraryService;
+
+    @Autowired
+    public NightlyReindexScheduler(StoryRepository storyRepository,
+                                   AuthorRepository authorRepository,
+                                   CollectionRepository collectionRepository,
+                                   SearchServiceAdapter searchServiceAdapter,
+                                   LibraryService libraryService) {
+        this.storyRepository = storyRepository;
+        this.authorRepository = authorRepository;
+        this.collectionRepository = collectionRepository;
+        this.searchServiceAdapter = searchServiceAdapter;
+        this.libraryService = libraryService;
+    }
+
+    @Scheduled(cron = "${storycove.search.nightly-reindex-cron:0 0 3 * * ?}") // 3 AM daily by default
+    public void reindexAll() {
+        logger.info("========================================");
+        logger.info("Starting nightly full search reindexing...");
+        logger.info("========================================");
+
+        if (!searchServiceAdapter.isSearchServiceAvailable()) {
+            logger.warn("Solr is not available — skipping nightly reindexing.");
+            return;
+        }
+
+        // Only reindex when a library is active so every document gets the correct libraryId.
+        // Without this guard, documents would be indexed with libraryId="default" and become
+        // invisible to searches that filter by the real library ID.
+        if (libraryService.getCurrentLibraryId() == null) {
+            logger.warn("No active library — skipping nightly reindexing.");
+            logger.warn("A user must authenticate before the nightly reindex can run.");
+            return;
+        }
+
+        long startTime = System.currentTimeMillis();
+        int storiesIndexed = 0;
+        int authorsIndexed = 0;
+        int collectionsIndexed = 0;
+
+        try {
+            List<Story> stories = storyRepository.findAllWithAssociations();
+            if (!stories.isEmpty()) {
+                searchServiceAdapter.bulkIndexStories(stories);
+                storiesIndexed = stories.size();
+                logger.info("Reindexed {} stories", storiesIndexed);
+            }
+        } catch (Exception e) {
+            logger.error("Failed to reindex stories during nightly run", e);
+        }
+
+        try {
+            List<Author> authors = authorRepository.findAll();
+            if (!authors.isEmpty()) {
+                searchServiceAdapter.bulkIndexAuthors(authors);
+                authorsIndexed = authors.size();
+                logger.info("Reindexed {} authors", authorsIndexed);
+            }
+        } catch (Exception e) {
+            logger.error("Failed to reindex authors during nightly run", e);
+        }
+
+        try {
+            List<Collection> collections = collectionRepository.findAllWithTags();
+            if (!collections.isEmpty()) {
+                searchServiceAdapter.bulkIndexCollections(collections);
+                collectionsIndexed = collections.size();
+                logger.info("Reindexed {} collections", collectionsIndexed);
+            }
+        } catch (Exception e) {
+            logger.error("Failed to reindex collections during nightly run", e);
+        }
+
+        long duration = System.currentTimeMillis() - startTime;
+        logger.info("========================================");
+        logger.info("Nightly reindexing completed in {}ms — {} stories, {} authors, {} collections",
+            duration, storiesIndexed, authorsIndexed, collectionsIndexed);
+        logger.info("========================================");
+    }
+}
--- a/backend/src/main/java/com/storycove/service/SearchServiceAdapter.java
+++ b/backend/src/main/java/com/storycove/service/SearchServiceAdapter.java
@@ -88,15 +88,24 @@ public class SearchServiceAdapter {
    }

    /**
-     * Perform complete reindex of all data
+     * Recreates the Solr index schema (drops and re-creates cores / clears all documents).
+     *
+     * <p><strong>Warning:</strong> This method only clears the index — it does NOT repopulate
+     * it with data. Callers are responsible for calling {@link #bulkIndexStories},
+     * {@link #bulkIndexAuthors}, and {@link #bulkIndexCollections} afterwards.
+     * Use {@code POST /api/admin/search/solr/reindex} for a full reindex including data.</p>
+     *
+     * @deprecated Prefer the admin endpoint or directly call the bulk-index methods after
+     *             fetching data from the repositories.
     */
+    @Deprecated
    public void performCompleteReindex() {
        try {
            recreateIndices();
-            logger.info("Search indices recreated successfully");
+            logger.info("Search indices recreated (schema only — data must be re-added separately)");
        } catch (Exception e) {
-            logger.error("Failed to perform complete reindex", e);
-            throw new RuntimeException("Failed to perform complete reindex", e);
+            logger.error("Failed to recreate search indices", e);
+            throw new RuntimeException("Failed to recreate search indices", e);
        }
    }

--- a/backend/src/main/java/com/storycove/service/StoryService.java
+++ b/backend/src/main/java/com/storycove/service/StoryService.java
@@ -19,6 +19,9 @@ import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
 import org.springframework.validation.annotation.Validated;

+import org.springframework.transaction.support.TransactionSynchronization;
+import org.springframework.transaction.support.TransactionSynchronizationManager;
+
 import java.time.LocalDateTime;
 import java.util.ArrayList;
 import java.util.HashSet;
@@ -348,8 +351,10 @@ public class StoryService {
            updateStoryTags(savedStory, story.getTags());
        }

-        // Index in search engine
-        searchServiceAdapter.indexStory(savedStory);
+        // Index AFTER the transaction commits so that Hibernate has already flushed the entity
+        // (setting @CreationTimestamp / @UpdateTimestamp) and all tag relationships are persisted.
+        // Indexing inside the transaction would send null timestamps and incomplete tag data to Solr.
+        scheduleIndexAfterCommit(savedStory);

        return savedStory;
    }
@@ -376,12 +381,39 @@ public class StoryService {
            updateStoryTagsByNames(savedStory, tagNames);
        }

-        // Index in search engine
-        searchServiceAdapter.indexStory(savedStory);
+        // Index AFTER the transaction commits (same reason as create() above).
+        scheduleIndexAfterCommit(savedStory);

        return savedStory;
    }

+    /**
+     * Schedules Solr indexing to run immediately after the current transaction commits.
+     *
+     * <p>Indexing inside the transaction would produce an incomplete document:
+     * <ul>
+     *   <li>Hibernate's {@code @CreationTimestamp} / {@code @UpdateTimestamp} are set during flush
+     *       (just before the INSERT), so they are {@code null} until then.</li>
+     *   <li>Tag and relationship join-table rows are not yet written to the DB.</li>
+     * </ul>
+     * By using {@code afterCommit}, the flush has completed, all timestamps and associations
+     * are present on the (now detached) entity, and we index a consistent snapshot.</p>
+     *
+     * <p>Falls back to immediate indexing when no transaction is active.</p>
+     */
+    private void scheduleIndexAfterCommit(Story story) {
+        if (TransactionSynchronizationManager.isActualTransactionActive()) {
+            TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() {
+                @Override
+                public void afterCommit() {
+                    searchServiceAdapter.indexStory(story);
+                }
+            });
+        } else {
+            searchServiceAdapter.indexStory(story);
+        }
+    }
+
    public Story update(UUID id, @Valid Story storyUpdates) {
        Story existingStory = findById(id);
        
--- a/backend/src/main/resources/application.yml
+++ b/backend/src/main/resources/application.yml
@@ -48,6 +48,11 @@ storycove:
    password: ${APP_PASSWORD}  # REQUIRED: No default password for security
  search:
    engine: solr  # Apache Solr search engine
+    # Cron for the nightly full reindex (stories + authors + collections). Default: 3 AM daily.
+    # Set to "-" to disable. Override via SEARCH_NIGHTLY_REINDEX_CRON env var.
+    nightly-reindex-cron: ${SEARCH_NIGHTLY_REINDEX_CRON:0 0 3 * * ?}
+    # How often (ms) to reindex authors to refresh derived stats (storyCount, averageRating).
+    author-reindex-interval: ${SEARCH_AUTHOR_REINDEX_INTERVAL:7200000}
  solr:
    # Connection settings
    url: ${SOLR_URL:http://solr:8983/solr}