diff --git a/.env.example b/.env.example index e1c05f3..75c02ea 100644 --- a/.env.example +++ b/.env.example @@ -14,11 +14,18 @@ JWT_SECRET=secure_jwt_secret_here # Application Authentication APP_PASSWORD=application_password_here +# Search Engine Configuration +SEARCH_ENGINE=typesense + # Typesense Search Configuration TYPESENSE_API_KEY=secure_api_key_here TYPESENSE_ENABLED=true TYPESENSE_REINDEX_INTERVAL=3600000 +# OpenSearch Configuration +OPENSEARCH_USERNAME=admin +OPENSEARCH_PASSWORD=secure_opensearch_password_here + # Image Storage IMAGE_STORAGE_PATH=/app/images diff --git a/OPENSEARCH_MIGRATION_SPECIFICATION.md b/OPENSEARCH_MIGRATION_SPECIFICATION.md new file mode 100644 index 0000000..a0efab8 --- /dev/null +++ b/OPENSEARCH_MIGRATION_SPECIFICATION.md @@ -0,0 +1,889 @@ +# StoryCove Search Migration Specification: Typesense to OpenSearch + +## Executive Summary + +This document specifies the migration from Typesense to OpenSearch for the StoryCove application. The migration will be implemented using a parallel approach, maintaining Typesense functionality while gradually transitioning to OpenSearch, ensuring zero downtime and the ability to rollback if needed. + +**Migration Goals:** +- Solve random query reliability issues +- Improve complex filtering performance +- Maintain feature parity during transition +- Zero downtime migration +- Improved developer experience + +--- + +## Current State Analysis + +### Typesense Implementation Overview + +**Service Architecture:** +- `TypesenseService.java` (~2000 lines) - Primary search service +- 3 search indexes: Stories, Authors, Collections +- Multi-library support with dynamic collection names +- Integration with Spring Boot backend + +**Core Functionality:** +1. **Full-text Search**: Stories, Authors with complex query building +2. **Random Story Selection**: `_rand()` function with fallback logic +3. **Advanced Filtering**: 15+ filter conditions with boolean logic +4. **Faceting**: Tag aggregations and counts +5. **Autocomplete**: Search suggestions with typeahead +6. **CRUD Operations**: Index/update/delete for all entity types + +**Current Issues Identified:** +- `_rand()` function unreliability requiring complex fallback logic +- Complex filter query building with escaping issues +- Limited aggregation capabilities +- Inconsistent API behavior across query patterns +- Multi-collection management complexity + +### Data Models and Schema + +**Story Index Fields:** +```java +// Core fields +UUID id, String title, String description, String sourceUrl +Integer wordCount, Integer rating, Integer volume +Boolean isRead, LocalDateTime lastReadAt, Integer readingPosition + +// Relationships +UUID authorId, String authorName +UUID seriesId, String seriesName +List tagNames + +// Metadata +LocalDateTime createdAt, LocalDateTime updatedAt +String coverPath, String sourceDomain +``` + +**Author Index Fields:** +```java +UUID id, String name, String notes +Integer authorRating, Double averageStoryRating, Integer storyCount +List urls, String avatarImagePath +LocalDateTime createdAt, LocalDateTime updatedAt +``` + +**Collection Index Fields:** +```java +UUID id, String name, String description +List tagNames, Boolean archived +LocalDateTime createdAt, LocalDateTime updatedAt +Integer storyCount, Integer currentPosition +``` + +### API Endpoints Current State + +**Search Endpoints Analysis:** + +**✅ USED by Frontend (Must Implement):** +- `GET /api/stories/search` - Main story search with complex filtering (CRITICAL) +- `GET /api/stories/random` - Random story selection with filters (CRITICAL) +- `GET /api/authors/search-typesense` - Author search (HIGH) +- `GET /api/tags/autocomplete` - Tag suggestions (MEDIUM) +- `POST /api/stories/reindex-typesense` - Admin reindex operations (MEDIUM) +- `POST /api/authors/reindex-typesense` - Admin reindex operations (MEDIUM) +- `POST /api/stories/recreate-typesense-collection` - Admin recreate (MEDIUM) +- `POST /api/authors/recreate-typesense-collection` - Admin recreate (MEDIUM) + +**❌ UNUSED by Frontend (Skip Implementation):** +- `GET /api/stories/search/suggestions` - Not used by frontend +- `GET /api/authors/search` - Superseded by typesense version +- `GET /api/series/search` - Not used by frontend +- `GET /api/tags/search` - Superseded by autocomplete +- `POST /api/search/reindex` - Not used by frontend +- `GET /api/search/health` - Not used by frontend + +**Scope Reduction: ~40% fewer endpoints to implement** + +**Search Parameters (Stories):** +``` +query, page, size, authors[], tags[], minRating, maxRating +sortBy, sortDir, facetBy[] +minWordCount, maxWordCount, createdAfter, createdBefore +lastReadAfter, lastReadBefore, unratedOnly, readingStatus +hasReadingProgress, hasCoverImage, sourceDomain, seriesFilter +minTagCount, popularOnly, hiddenGemsOnly +``` + +--- + +## Target OpenSearch Architecture + +### Service Layer Design + +**New Components:** +``` +OpenSearchService.java - Primary search service (mirrors TypesenseService API) +OpenSearchConfig.java - Configuration and client setup +SearchMigrationService.java - Handles parallel operation during migration +SearchServiceAdapter.java - Abstraction layer for service switching +``` + +**Index Strategy:** +- **Single-node deployment** for development/small installations +- **Index-per-library** approach: `stories-{libraryId}`, `authors-{libraryId}`, `collections-{libraryId}` +- **Index templates** for consistent mapping across libraries +- **Aliases** for easy switching and zero-downtime updates + +### OpenSearch Index Mappings + +**Stories Index Mapping:** +```json +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "analysis": { + "analyzer": { + "story_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "stop", "snowball"] + } + } + } + }, + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "title": { + "type": "text", + "analyzer": "story_analyzer", + "fields": {"keyword": {"type": "keyword"}} + }, + "description": { + "type": "text", + "analyzer": "story_analyzer" + }, + "authorName": { + "type": "text", + "analyzer": "story_analyzer", + "fields": {"keyword": {"type": "keyword"}} + }, + "seriesName": { + "type": "text", + "fields": {"keyword": {"type": "keyword"}} + }, + "tagNames": {"type": "keyword"}, + "wordCount": {"type": "integer"}, + "rating": {"type": "integer"}, + "volume": {"type": "integer"}, + "isRead": {"type": "boolean"}, + "readingPosition": {"type": "integer"}, + "lastReadAt": {"type": "date"}, + "createdAt": {"type": "date"}, + "updatedAt": {"type": "date"}, + "coverPath": {"type": "keyword"}, + "sourceUrl": {"type": "keyword"}, + "sourceDomain": {"type": "keyword"} + } + } +} +``` + +**Authors Index Mapping:** +```json +{ + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "name": { + "type": "text", + "analyzer": "story_analyzer", + "fields": {"keyword": {"type": "keyword"}} + }, + "notes": {"type": "text"}, + "authorRating": {"type": "integer"}, + "averageStoryRating": {"type": "float"}, + "storyCount": {"type": "integer"}, + "urls": {"type": "keyword"}, + "avatarImagePath": {"type": "keyword"}, + "createdAt": {"type": "date"}, + "updatedAt": {"type": "date"} + } + } +} +``` + +**Collections Index Mapping:** +```json +{ + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "name": { + "type": "text", + "fields": {"keyword": {"type": "keyword"}} + }, + "description": {"type": "text"}, + "tagNames": {"type": "keyword"}, + "archived": {"type": "boolean"}, + "storyCount": {"type": "integer"}, + "currentPosition": {"type": "integer"}, + "createdAt": {"type": "date"}, + "updatedAt": {"type": "date"} + } + } +} +``` + +### Query Translation Strategy + +**Random Story Queries:** +```java +// Typesense (problematic) +String sortBy = seed != null ? "_rand(" + seed + ")" : "_rand()"; + +// OpenSearch (reliable) +QueryBuilder randomQuery = QueryBuilders.functionScoreQuery( + QueryBuilders.boolQuery().must(filters), + ScoreFunctionBuilders.randomFunction(seed != null ? seed.intValue() : null) +); +``` + +**Complex Filtering:** +```java +// Build bool query with multiple filter conditions +BoolQueryBuilder boolQuery = QueryBuilders.boolQuery() + .must(QueryBuilders.multiMatchQuery(query, "title", "description", "authorName")) + .filter(QueryBuilders.termsQuery("tagNames", tags)) + .filter(QueryBuilders.rangeQuery("wordCount").gte(minWords).lte(maxWords)) + .filter(QueryBuilders.rangeQuery("rating").gte(minRating).lte(maxRating)); +``` + +**Faceting/Aggregations:** +```java +// Tags aggregation +AggregationBuilder tagsAgg = AggregationBuilders + .terms("tags") + .field("tagNames") + .size(100); + +// Rating ranges +AggregationBuilder ratingRanges = AggregationBuilders + .range("rating_ranges") + .field("rating") + .addRange("unrated", 0, 1) + .addRange("low", 1, 3) + .addRange("high", 4, 6); +``` + +--- + +## Revised Implementation Phases (Scope Reduced by 40%) + +### Phase 1: Infrastructure Setup (Week 1) + +**Objectives:** +- Add OpenSearch to Docker Compose +- Create basic OpenSearch service +- Establish index templates and mappings +- **Focus**: Only stories, authors, and tags indexes (skip series, collections) + +**Deliverables:** +1. **Docker Compose Updates:** +```yaml +opensearch: + image: opensearchproject/opensearch:2.11.0 + environment: + - discovery.type=single-node + - DISABLE_SECURITY_PLUGIN=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx1g + ports: + - "9200:9200" + volumes: + - opensearch_data:/usr/share/opensearch/data +``` + +2. **OpenSearchConfig.java:** +```java +@Configuration +@ConditionalOnProperty(name = "storycove.opensearch.enabled", havingValue = "true") +public class OpenSearchConfig { + @Bean + public OpenSearchClient openSearchClient() { + // Client configuration + } +} +``` + +3. **Basic Index Creation:** + - Create index templates for stories, authors, collections + - Implement index creation with proper mappings + - Add health check endpoint + +**Success Criteria:** +- OpenSearch container starts successfully +- Basic connectivity established +- Index templates created and validated + +### Phase 2: Core Service Implementation (Week 2) + +**Objectives:** +- Implement OpenSearchService with core functionality +- Create service abstraction layer +- Implement basic search operations +- **Focus**: Only critical endpoints (stories search, random, authors) + +**Deliverables:** +1. **OpenSearchService.java** - Core service implementing: + - `indexStory()`, `updateStory()`, `deleteStory()` + - `searchStories()` with basic query support (CRITICAL) + - `getRandomStoryId()` with reliable seed support (CRITICAL) + - `indexAuthor()`, `updateAuthor()`, `deleteAuthor()` + - `searchAuthors()` for authors page (HIGH) + - `bulkIndexStories()`, `bulkIndexAuthors()` for initial data loading + +2. **SearchServiceAdapter.java** - Abstraction layer: +```java +@Service +public class SearchServiceAdapter { + @Autowired(required = false) + private TypesenseService typesenseService; + + @Autowired(required = false) + private OpenSearchService openSearchService; + + @Value("${storycove.search.provider:typesense}") + private String searchProvider; + + public SearchResultDto searchStories(...) { + return "opensearch".equals(searchProvider) + ? openSearchService.searchStories(...) + : typesenseService.searchStories(...); + } +} +``` + +3. **Basic Query Implementation:** + - Full-text search across title/description/author + - Basic filtering (tags, rating, word count) + - Pagination and sorting + +**Success Criteria:** +- Basic search functionality working +- Service abstraction layer functional +- Can switch between Typesense and OpenSearch via configuration + +### Phase 3: Advanced Features Implementation (Week 3) + +**Objectives:** +- Implement complex filtering (all 15+ filter types) +- Add random story functionality +- Implement faceting/aggregations +- Add autocomplete/suggestions + +**Deliverables:** +1. **Complex Query Builder:** + - All filter conditions from original implementation + - Date range filtering with proper timezone handling + - Boolean logic for reading status, coverage, series filters + +2. **Random Story Implementation:** +```java +public Optional getRandomStoryId(String searchQuery, List tags, Long seed, ...) { + BoolQueryBuilder baseQuery = buildFilterQuery(searchQuery, tags, ...); + + QueryBuilder randomQuery = QueryBuilders.functionScoreQuery( + baseQuery, + ScoreFunctionBuilders.randomFunction(seed != null ? seed.intValue() : null) + ); + + SearchRequest request = new SearchRequest("stories-" + getCurrentLibraryId()) + .source(new SearchSourceBuilder() + .query(randomQuery) + .size(1) + .fetchSource(new String[]{"id"}, null)); + + // Execute and return result +} +``` + +3. **Faceting Implementation:** + - Tag aggregations with counts + - Rating range aggregations + - Author aggregations + - Custom facet builders + +4. **Autocomplete Service:** + - Suggest-based implementation using completion fields + - Prefix matching for story titles and author names + +**Success Criteria:** +- All filter conditions working correctly +- Random story selection reliable with seed support +- Faceting returns accurate counts +- Autocomplete responsive and accurate + +### Phase 4: Data Migration & Parallel Operation (Week 4) + +**Objectives:** +- Implement bulk data migration from database +- Enable parallel operation (write to both systems) +- Comprehensive testing of OpenSearch functionality + +**Deliverables:** +1. **Migration Service:** +```java +@Service +public class SearchMigrationService { + public void performFullMigration() { + // Migrate all libraries + List libraries = libraryService.findAll(); + for (Library library : libraries) { + migrateLibraryData(library); + } + } + + private void migrateLibraryData(Library library) { + // Create indexes for library + // Bulk load stories, authors, collections + // Verify data integrity + } +} +``` + +2. **Dual-Write Implementation:** + - Modify all entity update operations to write to both systems + - Add configuration flag for dual-write mode + - Error handling for partial failures + +3. **Data Validation Tools:** + - Compare search result counts between systems + - Validate random story selection consistency + - Check faceting accuracy + +**Success Criteria:** +- Complete data migration with 100% accuracy +- Dual-write operations working without errors +- Search result parity between systems verified + +### Phase 5: API Integration & Testing (Week 5) + +**Objectives:** +- Update controller endpoints to use OpenSearch +- Comprehensive integration testing +- Performance testing and optimization + +**Deliverables:** +1. **Controller Updates:** + - Modify controllers to use SearchServiceAdapter + - Add migration controls for gradual rollout + - Implement A/B testing capability + +2. **Integration Tests:** +```java +@SpringBootTest +@TestMethodOrder(OrderAnnotation.class) +public class OpenSearchIntegrationTest { + @Test + @Order(1) + void testBasicSearch() { + // Test basic story search functionality + } + + @Test + @Order(2) + void testComplexFiltering() { + // Test all 15+ filter conditions + } + + @Test + @Order(3) + void testRandomStory() { + // Test random story with and without seed + } + + @Test + @Order(4) + void testFaceting() { + // Test aggregation accuracy + } +} +``` + +3. **Performance Testing:** + - Load testing with realistic data volumes + - Query performance benchmarking + - Memory usage monitoring + +**Success Criteria:** +- All integration tests passing +- Performance meets or exceeds Typesense baseline +- Memory usage within acceptable limits (< 2GB) + +### Phase 6: Production Rollout & Monitoring (Week 6) + +**Objectives:** +- Production deployment with feature flags +- Gradual user migration with monitoring +- Rollback capability testing + +**Deliverables:** +1. **Feature Flag Implementation:** +```java +@Component +public class SearchFeatureFlags { + @Value("${storycove.search.opensearch.enabled:false}") + private boolean openSearchEnabled; + + @Value("${storycove.search.opensearch.percentage:0}") + private int rolloutPercentage; + + public boolean shouldUseOpenSearch(String userId) { + if (!openSearchEnabled) return false; + return userId.hashCode() % 100 < rolloutPercentage; + } +} +``` + +2. **Monitoring & Alerting:** + - Query performance metrics + - Error rate monitoring + - Search result accuracy validation + - User experience metrics + +3. **Rollback Procedures:** + - Immediate rollback to Typesense capability + - Data consistency verification + - Performance rollback triggers + +**Success Criteria:** +- Successful production deployment +- Zero user-facing issues during rollout +- Monitoring showing improved performance +- Rollback procedures validated + +### Phase 7: Cleanup & Documentation (Week 7) + +**Objectives:** +- Remove Typesense dependencies +- Update documentation +- Performance optimization + +**Deliverables:** +1. **Code Cleanup:** + - Remove TypesenseService and related classes + - Clean up Docker Compose configuration + - Remove unused dependencies + +2. **Documentation Updates:** + - Update deployment documentation + - Search API documentation + - Troubleshooting guides + +3. **Performance Tuning:** + - Index optimization + - Query performance tuning + - Resource allocation optimization + +**Success Criteria:** +- Typesense completely removed +- Documentation up to date +- Optimized performance in production + +--- + +## Data Migration Strategy + +### Pre-Migration Validation + +**Data Integrity Checks:** +1. Count validation: Ensure all stories/authors/collections are present +2. Field validation: Verify all required fields are populated +3. Relationship validation: Check author-story and series-story relationships +4. Library separation: Ensure proper multi-library data isolation + +**Migration Process:** + +1. **Index Creation:** +```java +// Create indexes with proper mappings for each library +for (Library library : libraries) { + String storiesIndex = "stories-" + library.getId(); + createIndexWithMapping(storiesIndex, getStoriesMapping()); + createIndexWithMapping("authors-" + library.getId(), getAuthorsMapping()); + createIndexWithMapping("collections-" + library.getId(), getCollectionsMapping()); +} +``` + +2. **Bulk Data Loading:** +```java +// Load in batches to manage memory usage +int batchSize = 1000; +List allStories = storyService.findByLibraryId(libraryId); + +for (int i = 0; i < allStories.size(); i += batchSize) { + List batch = allStories.subList(i, Math.min(i + batchSize, allStories.size())); + List documents = batch.stream() + .map(this::convertToSearchDocument) + .collect(Collectors.toList()); + + bulkIndexStories(documents, "stories-" + libraryId); +} +``` + +3. **Post-Migration Validation:** + - Count comparison between database and OpenSearch + - Spot-check random records for field accuracy + - Test search functionality with known queries + - Verify faceting counts match expected values + +### Rollback Strategy + +**Immediate Rollback Triggers:** +- Search error rate > 1% +- Query performance degradation > 50% +- Data inconsistency detected +- Memory usage > 4GB sustained + +**Rollback Process:** +1. Update feature flag to disable OpenSearch +2. Verify Typesense still operational +3. Clear OpenSearch indexes to free resources +4. Investigate and document issues + +**Data Consistency During Rollback:** +- Continue dual-write during investigation +- Re-sync any missed updates to OpenSearch +- Validate data integrity before retry + +--- + +## Testing Strategy + +### Unit Tests + +**OpenSearchService Unit Tests:** +```java +@ExtendWith(MockitoExtension.class) +class OpenSearchServiceTest { + @Mock private OpenSearchClient client; + @InjectMocks private OpenSearchService service; + + @Test + void testSearchStoriesBasicQuery() { + // Mock OpenSearch response + // Test basic search functionality + } + + @Test + void testComplexFilterQuery() { + // Test complex boolean query building + } + + @Test + void testRandomStorySelection() { + // Test random query with seed + } +} +``` + +**Query Builder Tests:** +- Test all 15+ filter conditions +- Validate query structure and parameters +- Test edge cases and null handling + +### Integration Tests + +**Full Search Integration:** +```java +@SpringBootTest +@Testcontainers +class OpenSearchIntegrationTest { + @Container + static OpenSearchContainer opensearch = new OpenSearchContainer("opensearchproject/opensearch:2.11.0"); + + @Test + void testEndToEndStorySearch() { + // Insert test data + // Perform search via controller + // Validate results + } +} +``` + +### Performance Tests + +**Load Testing Scenarios:** +1. **Concurrent Search Load:** + - 50 concurrent users performing searches + - Mixed query complexity + - Duration: 10 minutes + +2. **Bulk Indexing Performance:** + - Index 10,000 stories in batches + - Measure throughput and memory usage + +3. **Random Query Performance:** + - 1000 random story requests with different seeds + - Compare with Typesense baseline + +### Acceptance Tests + +**Functional Requirements:** +- All existing search functionality preserved +- Random story selection improved reliability +- Faceting accuracy maintained +- Multi-library separation working + +**Performance Requirements:** +- Search response time < 100ms for 95th percentile +- Random story selection < 50ms +- Index update operations < 10ms +- Memory usage < 2GB in production + +--- + +## Risk Analysis & Mitigation + +### Technical Risks + +**Risk: OpenSearch Memory Usage** +- *Probability: Medium* +- *Impact: High* +- *Mitigation: Resource monitoring, index optimization, container limits* + +**Risk: Query Performance Regression** +- *Probability: Low* +- *Impact: High* +- *Mitigation: Performance testing, query optimization, caching layer* + +**Risk: Data Migration Accuracy** +- *Probability: Low* +- *Impact: Critical* +- *Mitigation: Comprehensive validation, dual-write verification, rollback procedures* + +**Risk: Complex Filter Compatibility** +- *Probability: Medium* +- *Impact: Medium* +- *Mitigation: Extensive testing, gradual rollout, feature flags* + +### Operational Risks + +**Risk: Production Deployment Issues** +- *Probability: Medium* +- *Impact: High* +- *Mitigation: Staging environment testing, gradual rollout, immediate rollback capability* + +**Risk: Team Learning Curve** +- *Probability: High* +- *Impact: Low* +- *Mitigation: Documentation, training, gradual responsibility transfer* + +### Business Continuity + +**Zero-Downtime Requirements:** +- Maintain Typesense during entire migration +- Feature flag-based switching +- Immediate rollback capability +- Health monitoring with automated alerts + +--- + +## Success Criteria + +### Functional Requirements ✅ +- [ ] All search functionality migrated successfully +- [ ] Random story selection working reliably with seeds +- [ ] Complex filtering (15+ conditions) working accurately +- [ ] Faceting/aggregation results match expected values +- [ ] Multi-library support maintained +- [ ] Autocomplete functionality preserved + +### Performance Requirements ✅ +- [ ] Search response time ≤ 100ms (95th percentile) +- [ ] Random story selection ≤ 50ms +- [ ] Index operations ≤ 10ms +- [ ] Memory usage ≤ 2GB sustained +- [ ] Zero search downtime during migration + +### Technical Requirements ✅ +- [ ] Code quality maintained (test coverage ≥ 80%) +- [ ] Documentation updated and comprehensive +- [ ] Monitoring and alerting implemented +- [ ] Rollback procedures tested and validated +- [ ] Typesense dependencies cleanly removed + +--- + +## Timeline Summary + +| Phase | Duration | Key Deliverables | Risk Level | +|-------|----------|------------------|------------| +| 1. Infrastructure | 1 week | Docker setup, basic service | Low | +| 2. Core Service | 1 week | Basic search operations | Medium | +| 3. Advanced Features | 1 week | Complex filtering, random queries | High | +| 4. Data Migration | 1 week | Full data migration, dual-write | High | +| 5. API Integration | 1 week | Controller updates, testing | Medium | +| 6. Production Rollout | 1 week | Gradual deployment, monitoring | High | +| 7. Cleanup | 1 week | Remove Typesense, documentation | Low | + +**Total Estimated Duration: 7 weeks** + +--- + +## Configuration Management + +### Environment Variables + +```bash +# OpenSearch Configuration +OPENSEARCH_HOST=opensearch +OPENSEARCH_PORT=9200 +OPENSEARCH_USERNAME=admin +OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD} + +# Feature Flags +STORYCOVE_OPENSEARCH_ENABLED=true +STORYCOVE_SEARCH_PROVIDER=opensearch +STORYCOVE_SEARCH_DUAL_WRITE=true +STORYCOVE_OPENSEARCH_ROLLOUT_PERCENTAGE=100 + +# Performance Tuning +OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx2g +STORYCOVE_SEARCH_BATCH_SIZE=1000 +STORYCOVE_SEARCH_TIMEOUT=30s +``` + +### Docker Compose Updates + +```yaml +# Add to docker-compose.yml +opensearch: + image: opensearchproject/opensearch:2.11.0 + environment: + - discovery.type=single-node + - DISABLE_SECURITY_PLUGIN=true + - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx2g + volumes: + - opensearch_data:/usr/share/opensearch/data + networks: + - storycove-network + +volumes: + opensearch_data: +``` + +--- + +## Conclusion + +This specification provides a comprehensive roadmap for migrating StoryCove from Typesense to OpenSearch. The phased approach ensures minimal risk while delivering improved reliability and performance, particularly for random story queries. + +The parallel implementation strategy allows for thorough validation and provides confidence in the migration while maintaining the ability to rollback if issues arise. Upon successful completion, StoryCove will have a more robust and scalable search infrastructure that better supports its growth and feature requirements. + +**Next Steps:** +1. Review and approve this specification +2. Set up development environment with OpenSearch +3. Begin Phase 1 implementation +4. Establish monitoring and success metrics +5. Execute migration according to timeline + +--- + +*Document Version: 1.0* +*Last Updated: 2025-01-17* +*Author: Claude Code Assistant* \ No newline at end of file diff --git a/backend/pom.xml b/backend/pom.xml index 5943bb5..845e02c 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -49,6 +49,10 @@ org.springframework.boot spring-boot-starter-validation + + org.springframework.boot + spring-boot-starter-actuator + org.postgresql postgresql @@ -84,6 +88,19 @@ typesense-java 1.3.0 + + org.opensearch.client + opensearch-java + 3.2.0 + + + org.apache.httpcomponents.core5 + httpcore5 + + + org.apache.httpcomponents.core5 + httpcore5-h2 + com.positiondev.epublib epublib-core diff --git a/backend/src/main/java/com/storycove/config/OpenSearchConfig.java b/backend/src/main/java/com/storycove/config/OpenSearchConfig.java new file mode 100644 index 0000000..5f5e40f --- /dev/null +++ b/backend/src/main/java/com/storycove/config/OpenSearchConfig.java @@ -0,0 +1,191 @@ +package com.storycove.config; + +import org.apache.hc.client5.http.auth.AuthScope; +import org.apache.hc.client5.http.auth.UsernamePasswordCredentials; +import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider; +import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManager; +import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder; +import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder; +import org.apache.hc.core5.http.HttpHost; +import org.apache.hc.core5.util.Timeout; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.transport.OpenSearchTransport; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import java.io.FileInputStream; +import java.security.KeyStore; +import java.security.cert.X509Certificate; + +@Configuration +@EnableConfigurationProperties(OpenSearchProperties.class) +public class OpenSearchConfig { + + private static final Logger logger = LoggerFactory.getLogger(OpenSearchConfig.class); + + private final OpenSearchProperties properties; + + public OpenSearchConfig(OpenSearchProperties properties) { + this.properties = properties; + } + + @Bean + @ConditionalOnProperty(name = "storycove.search.engine", havingValue = "opensearch") + public OpenSearchClient openSearchClient() throws Exception { + logger.info("Initializing OpenSearch client for profile: {}", properties.getProfile()); + + // Create credentials provider + BasicCredentialsProvider credentialsProvider = createCredentialsProvider(); + + // Create SSL context based on environment + SSLContext sslContext = createSSLContext(); + + // Create connection manager with pooling + PoolingAsyncClientConnectionManager connectionManager = createConnectionManager(sslContext); + + // Create the transport with all configurations + OpenSearchTransport transport = ApacheHttpClient5TransportBuilder + .builder(new HttpHost(properties.getScheme(), properties.getHost(), properties.getPort())) + .setHttpClientConfigCallback(httpClientBuilder -> { + httpClientBuilder + .setDefaultCredentialsProvider(credentialsProvider) + .setConnectionManager(connectionManager); + + // Set timeouts + httpClientBuilder.setDefaultRequestConfig( + org.apache.hc.client5.http.config.RequestConfig.custom() + .setConnectionRequestTimeout(Timeout.ofMilliseconds(properties.getConnection().getTimeout())) + .setResponseTimeout(Timeout.ofMilliseconds(properties.getConnection().getSocketTimeout())) + .build() + ); + + return httpClientBuilder; + }) + .build(); + + OpenSearchClient client = new OpenSearchClient(transport); + + // Test connection + testConnection(client); + + return client; + } + + private BasicCredentialsProvider createCredentialsProvider() { + BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + credentialsProvider.setCredentials( + new AuthScope(properties.getHost(), properties.getPort()), + new UsernamePasswordCredentials( + properties.getUsername(), + properties.getPassword() != null ? properties.getPassword().toCharArray() : new char[0] + ) + ); + return credentialsProvider; + } + + private SSLContext createSSLContext() throws Exception { + SSLContext sslContext; + + if (isProduction() && !properties.getSecurity().isTrustAllCertificates()) { + // Production SSL configuration with proper certificate validation + sslContext = createProductionSSLContext(); + } else { + // Development SSL configuration (trust all certificates) + sslContext = createDevelopmentSSLContext(); + } + + return sslContext; + } + + private SSLContext createProductionSSLContext() throws Exception { + logger.info("Configuring production SSL context with certificate validation"); + + SSLContext sslContext = SSLContext.getInstance("TLS"); + + // Load custom keystore/truststore if provided + if (properties.getSecurity().getTruststorePath() != null) { + KeyStore trustStore = KeyStore.getInstance("JKS"); + try (FileInputStream fis = new FileInputStream(properties.getSecurity().getTruststorePath())) { + trustStore.load(fis, properties.getSecurity().getTruststorePassword().toCharArray()); + } + + javax.net.ssl.TrustManagerFactory tmf = + javax.net.ssl.TrustManagerFactory.getInstance(javax.net.ssl.TrustManagerFactory.getDefaultAlgorithm()); + tmf.init(trustStore); + + sslContext.init(null, tmf.getTrustManagers(), null); + } else { + // Use default system SSL context for production + sslContext.init(null, null, null); + } + + return sslContext; + } + + private SSLContext createDevelopmentSSLContext() throws Exception { + logger.warn("Configuring development SSL context - TRUSTING ALL CERTIFICATES (not for production!)"); + + SSLContext sslContext = SSLContext.getInstance("TLS"); + sslContext.init(null, new TrustManager[] { + new X509TrustManager() { + public X509Certificate[] getAcceptedIssuers() { return null; } + public void checkClientTrusted(X509Certificate[] certs, String authType) {} + public void checkServerTrusted(X509Certificate[] certs, String authType) {} + } + }, null); + + return sslContext; + } + + private PoolingAsyncClientConnectionManager createConnectionManager(SSLContext sslContext) { + PoolingAsyncClientConnectionManagerBuilder builder = PoolingAsyncClientConnectionManagerBuilder.create(); + + // Configure TLS strategy + if (properties.getScheme().equals("https")) { + if (isProduction() && properties.getSecurity().isSslVerification()) { + // Production TLS with hostname verification + builder.setTlsStrategy(ClientTlsStrategyBuilder.create() + .setSslContext(sslContext) + .build()); + } else { + // Development TLS without hostname verification + builder.setTlsStrategy(ClientTlsStrategyBuilder.create() + .setSslContext(sslContext) + .setHostnameVerifier((hostname, session) -> true) + .build()); + } + } + + PoolingAsyncClientConnectionManager connectionManager = builder.build(); + + // Configure connection pool settings + connectionManager.setMaxTotal(properties.getConnection().getMaxConnectionsTotal()); + connectionManager.setDefaultMaxPerRoute(properties.getConnection().getMaxConnectionsPerRoute()); + + return connectionManager; + } + + private boolean isProduction() { + return "production".equalsIgnoreCase(properties.getProfile()); + } + + private void testConnection(OpenSearchClient client) { + try { + var response = client.info(); + logger.info("OpenSearch connection successful - Version: {}, Cluster: {}", + response.version().number(), + response.clusterName()); + } catch (Exception e) { + logger.error("Failed to connect to OpenSearch cluster", e); + throw new RuntimeException("OpenSearch connection failed", e); + } + } +} \ No newline at end of file diff --git a/backend/src/main/java/com/storycove/config/OpenSearchProperties.java b/backend/src/main/java/com/storycove/config/OpenSearchProperties.java new file mode 100644 index 0000000..a84b4e7 --- /dev/null +++ b/backend/src/main/java/com/storycove/config/OpenSearchProperties.java @@ -0,0 +1,164 @@ +package com.storycove.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +@Component +@ConfigurationProperties(prefix = "storycove.opensearch") +public class OpenSearchProperties { + + private String host = "localhost"; + private int port = 9200; + private String scheme = "https"; + private String username = "admin"; + private String password; + private String profile = "development"; + + private Security security = new Security(); + private Connection connection = new Connection(); + private Indices indices = new Indices(); + private Bulk bulk = new Bulk(); + private Health health = new Health(); + + // Getters and setters + public String getHost() { return host; } + public void setHost(String host) { this.host = host; } + + public int getPort() { return port; } + public void setPort(int port) { this.port = port; } + + public String getScheme() { return scheme; } + public void setScheme(String scheme) { this.scheme = scheme; } + + public String getUsername() { return username; } + public void setUsername(String username) { this.username = username; } + + public String getPassword() { return password; } + public void setPassword(String password) { this.password = password; } + + public String getProfile() { return profile; } + public void setProfile(String profile) { this.profile = profile; } + + public Security getSecurity() { return security; } + public void setSecurity(Security security) { this.security = security; } + + public Connection getConnection() { return connection; } + public void setConnection(Connection connection) { this.connection = connection; } + + public Indices getIndices() { return indices; } + public void setIndices(Indices indices) { this.indices = indices; } + + public Bulk getBulk() { return bulk; } + public void setBulk(Bulk bulk) { this.bulk = bulk; } + + public Health getHealth() { return health; } + public void setHealth(Health health) { this.health = health; } + + public static class Security { + private boolean sslVerification = false; + private boolean trustAllCertificates = true; + private String keystorePath; + private String keystorePassword; + private String truststorePath; + private String truststorePassword; + + // Getters and setters + public boolean isSslVerification() { return sslVerification; } + public void setSslVerification(boolean sslVerification) { this.sslVerification = sslVerification; } + + public boolean isTrustAllCertificates() { return trustAllCertificates; } + public void setTrustAllCertificates(boolean trustAllCertificates) { this.trustAllCertificates = trustAllCertificates; } + + public String getKeystorePath() { return keystorePath; } + public void setKeystorePath(String keystorePath) { this.keystorePath = keystorePath; } + + public String getKeystorePassword() { return keystorePassword; } + public void setKeystorePassword(String keystorePassword) { this.keystorePassword = keystorePassword; } + + public String getTruststorePath() { return truststorePath; } + public void setTruststorePath(String truststorePath) { this.truststorePath = truststorePath; } + + public String getTruststorePassword() { return truststorePassword; } + public void setTruststorePassword(String truststorePassword) { this.truststorePassword = truststorePassword; } + } + + public static class Connection { + private int timeout = 30000; + private int socketTimeout = 60000; + private int maxConnectionsPerRoute = 10; + private int maxConnectionsTotal = 30; + private boolean retryOnFailure = true; + private int maxRetries = 3; + + // Getters and setters + public int getTimeout() { return timeout; } + public void setTimeout(int timeout) { this.timeout = timeout; } + + public int getSocketTimeout() { return socketTimeout; } + public void setSocketTimeout(int socketTimeout) { this.socketTimeout = socketTimeout; } + + public int getMaxConnectionsPerRoute() { return maxConnectionsPerRoute; } + public void setMaxConnectionsPerRoute(int maxConnectionsPerRoute) { this.maxConnectionsPerRoute = maxConnectionsPerRoute; } + + public int getMaxConnectionsTotal() { return maxConnectionsTotal; } + public void setMaxConnectionsTotal(int maxConnectionsTotal) { this.maxConnectionsTotal = maxConnectionsTotal; } + + public boolean isRetryOnFailure() { return retryOnFailure; } + public void setRetryOnFailure(boolean retryOnFailure) { this.retryOnFailure = retryOnFailure; } + + public int getMaxRetries() { return maxRetries; } + public void setMaxRetries(int maxRetries) { this.maxRetries = maxRetries; } + } + + public static class Indices { + private int defaultShards = 1; + private int defaultReplicas = 0; + private String refreshInterval = "1s"; + + // Getters and setters + public int getDefaultShards() { return defaultShards; } + public void setDefaultShards(int defaultShards) { this.defaultShards = defaultShards; } + + public int getDefaultReplicas() { return defaultReplicas; } + public void setDefaultReplicas(int defaultReplicas) { this.defaultReplicas = defaultReplicas; } + + public String getRefreshInterval() { return refreshInterval; } + public void setRefreshInterval(String refreshInterval) { this.refreshInterval = refreshInterval; } + } + + public static class Bulk { + private int actions = 1000; + private long size = 5242880; // 5MB + private int timeout = 10000; + private int concurrentRequests = 1; + + // Getters and setters + public int getActions() { return actions; } + public void setActions(int actions) { this.actions = actions; } + + public long getSize() { return size; } + public void setSize(long size) { this.size = size; } + + public int getTimeout() { return timeout; } + public void setTimeout(int timeout) { this.timeout = timeout; } + + public int getConcurrentRequests() { return concurrentRequests; } + public void setConcurrentRequests(int concurrentRequests) { this.concurrentRequests = concurrentRequests; } + } + + public static class Health { + private int checkInterval = 30000; + private int slowQueryThreshold = 5000; + private boolean enableMetrics = true; + + // Getters and setters + public int getCheckInterval() { return checkInterval; } + public void setCheckInterval(int checkInterval) { this.checkInterval = checkInterval; } + + public int getSlowQueryThreshold() { return slowQueryThreshold; } + public void setSlowQueryThreshold(int slowQueryThreshold) { this.slowQueryThreshold = slowQueryThreshold; } + + public boolean isEnableMetrics() { return enableMetrics; } + public void setEnableMetrics(boolean enableMetrics) { this.enableMetrics = enableMetrics; } + } +} \ No newline at end of file diff --git a/backend/src/main/java/com/storycove/service/OpenSearchHealthService.java b/backend/src/main/java/com/storycove/service/OpenSearchHealthService.java new file mode 100644 index 0000000..779097a --- /dev/null +++ b/backend/src/main/java/com/storycove/service/OpenSearchHealthService.java @@ -0,0 +1,133 @@ +package com.storycove.service; + +import com.storycove.config.OpenSearchProperties; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.cluster.HealthRequest; +import org.opensearch.client.opensearch.cluster.HealthResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.actuate.health.Health; +import org.springframework.boot.actuate.health.HealthIndicator; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; + +import java.time.LocalDateTime; +import java.util.concurrent.atomic.AtomicReference; + +@Service +@ConditionalOnProperty(name = "storycove.search.engine", havingValue = "opensearch") +public class OpenSearchHealthService implements HealthIndicator { + + private static final Logger logger = LoggerFactory.getLogger(OpenSearchHealthService.class); + + private final OpenSearchClient openSearchClient; + private final OpenSearchProperties properties; + + private final AtomicReference lastKnownHealth = new AtomicReference<>(Health.unknown().build()); + private LocalDateTime lastCheckTime = LocalDateTime.now(); + + @Autowired + public OpenSearchHealthService(OpenSearchClient openSearchClient, OpenSearchProperties properties) { + this.openSearchClient = openSearchClient; + this.properties = properties; + } + + @Override + public Health health() { + return lastKnownHealth.get(); + } + + @Scheduled(fixedDelayString = "#{@openSearchProperties.health.checkInterval}") + public void performHealthCheck() { + try { + HealthResponse clusterHealth = openSearchClient.cluster().health( + HealthRequest.of(h -> h.timeout(t -> t.time("10s"))) + ); + + Health.Builder healthBuilder = Health.up() + .withDetail("cluster_name", clusterHealth.clusterName()) + .withDetail("status", clusterHealth.status().jsonValue()) + .withDetail("number_of_nodes", clusterHealth.numberOfNodes()) + .withDetail("number_of_data_nodes", clusterHealth.numberOfDataNodes()) + .withDetail("active_primary_shards", clusterHealth.activePrimaryShards()) + .withDetail("active_shards", clusterHealth.activeShards()) + .withDetail("relocating_shards", clusterHealth.relocatingShards()) + .withDetail("initializing_shards", clusterHealth.initializingShards()) + .withDetail("unassigned_shards", clusterHealth.unassignedShards()) + .withDetail("last_check", LocalDateTime.now()); + + // Check if cluster status is concerning + switch (clusterHealth.status()) { + case Red: + healthBuilder = Health.down() + .withDetail("reason", "Cluster status is RED - some primary shards are unassigned"); + break; + case Yellow: + if (isProduction()) { + healthBuilder = Health.down() + .withDetail("reason", "Cluster status is YELLOW - some replica shards are unassigned (critical in production)"); + } else { + // Yellow is acceptable in development (single node clusters) + healthBuilder.withDetail("warning", "Cluster status is YELLOW - acceptable for development"); + } + break; + case Green: + // All good + break; + } + + lastKnownHealth.set(healthBuilder.build()); + lastCheckTime = LocalDateTime.now(); + + if (properties.getHealth().isEnableMetrics()) { + logMetrics(clusterHealth); + } + + } catch (Exception e) { + logger.error("OpenSearch health check failed", e); + Health unhealthyStatus = Health.down() + .withDetail("error", e.getMessage()) + .withDetail("last_successful_check", lastCheckTime) + .withDetail("current_time", LocalDateTime.now()) + .build(); + lastKnownHealth.set(unhealthyStatus); + } + } + + private void logMetrics(HealthResponse clusterHealth) { + logger.info("OpenSearch Cluster Metrics - Status: {}, Nodes: {}, Active Shards: {}, Unassigned: {}", + clusterHealth.status().jsonValue(), + clusterHealth.numberOfNodes(), + clusterHealth.activeShards(), + clusterHealth.unassignedShards()); + } + + private boolean isProduction() { + return "production".equalsIgnoreCase(properties.getProfile()); + } + + /** + * Manual health check for immediate status + */ + public boolean isClusterHealthy() { + Health currentHealth = lastKnownHealth.get(); + return currentHealth.getStatus() == org.springframework.boot.actuate.health.Status.UP; + } + + /** + * Get detailed cluster information + */ + public String getClusterInfo() { + try { + var info = openSearchClient.info(); + return String.format("OpenSearch %s (Cluster: %s, Lucene: %s)", + info.version().number(), + info.clusterName(), + info.version().luceneVersion()); + } catch (Exception e) { + return "Unable to retrieve cluster information: " + e.getMessage(); + } + } +} \ No newline at end of file diff --git a/backend/src/main/java/com/storycove/service/OpenSearchService.java b/backend/src/main/java/com/storycove/service/OpenSearchService.java new file mode 100644 index 0000000..23e770b --- /dev/null +++ b/backend/src/main/java/com/storycove/service/OpenSearchService.java @@ -0,0 +1,280 @@ +package com.storycove.service; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.storycove.config.OpenSearchProperties; +import com.storycove.dto.AuthorSearchDto; +import com.storycove.dto.SearchResultDto; +import com.storycove.dto.StorySearchDto; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.mapping.TypeMapping; +import org.opensearch.client.opensearch.indices.CreateIndexRequest; +import org.opensearch.client.opensearch.indices.ExistsRequest; +import org.opensearch.client.opensearch.indices.IndexSettings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Lazy; +import org.springframework.core.io.ClassPathResource; +import org.springframework.stereotype.Service; + +import jakarta.annotation.PostConstruct; +import java.io.IOException; +import java.io.InputStream; +import java.util.*; + +@Service +@ConditionalOnProperty(name = "storycove.search.engine", havingValue = "opensearch") +public class OpenSearchService { + + private static final Logger logger = LoggerFactory.getLogger(OpenSearchService.class); + + private final OpenSearchClient openSearchClient; + private final LibraryService libraryService; + private final ReadingTimeService readingTimeService; + private final ObjectMapper objectMapper; + private final OpenSearchProperties properties; + + // Services for complete reindexing (avoiding circular dependencies with @Lazy) + @Autowired + @Lazy + private StoryService storyService; + + @Autowired + @Lazy + private AuthorService authorService; + + @Autowired + @Lazy + private CollectionService collectionService; + + @Autowired + public OpenSearchService(OpenSearchClient openSearchClient, + LibraryService libraryService, + ReadingTimeService readingTimeService, + ObjectMapper objectMapper, + OpenSearchProperties properties) { + this.openSearchClient = openSearchClient; + this.libraryService = libraryService; + this.readingTimeService = readingTimeService; + this.objectMapper = objectMapper; + this.properties = properties; + } + + // Index names are now dynamic based on active library + private String getStoriesIndex() { + var library = libraryService.getCurrentLibrary(); + return library != null ? "stories_" + library.getId() : "stories"; + } + + private String getAuthorsIndex() { + var library = libraryService.getCurrentLibrary(); + return library != null ? "authors_" + library.getId() : "authors"; + } + + private String getCollectionsIndex() { + var library = libraryService.getCurrentLibrary(); + return library != null ? "collections_" + library.getId() : "collections"; + } + + @PostConstruct + public void initializeIndexes() { + try { + createStoriesIndexIfNotExists(); + createAuthorsIndexIfNotExists(); + createCollectionsIndexIfNotExists(); + } catch (Exception e) { + logger.error("Failed to initialize OpenSearch indexes", e); + } + } + + /** + * Initialize indexes for the current active library + */ + public void initializeIndexesForCurrentLibrary() { + try { + logger.info("Initializing OpenSearch indexes for current library"); + createStoriesIndexIfNotExists(); + createAuthorsIndexIfNotExists(); + createCollectionsIndexIfNotExists(); + logger.info("Successfully initialized OpenSearch indexes for current library"); + } catch (Exception e) { + logger.error("Failed to initialize OpenSearch indexes for current library", e); + throw new RuntimeException("OpenSearch index initialization failed", e); + } + } + + /** + * Test OpenSearch connection + */ + public boolean testConnection() { + try { + var response = openSearchClient.info(); + logger.info("OpenSearch connection successful. Version: {}", response.version().number()); + return true; + } catch (Exception e) { + logger.error("Failed to connect to OpenSearch", e); + return false; + } + } + + /** + * Load index configuration from JSON file + */ + private JsonNode loadIndexConfiguration(String mappingFile) throws IOException { + ClassPathResource resource = new ClassPathResource("opensearch/mappings/" + mappingFile); + try (InputStream inputStream = resource.getInputStream()) { + return objectMapper.readTree(inputStream); + } + } + + /** + * Create index from JSON configuration + */ + private void createIndexFromConfiguration(String indexName, String mappingFile) throws IOException { + if (!indexExists(indexName)) { + logger.info("Creating OpenSearch index: {} from {}", indexName, mappingFile); + + // For now, create indexes with programmatic configuration + // TODO: Implement full JSON parsing when OpenSearch Java client supports it better + createProgrammaticIndex(indexName, mappingFile); + } + } + + /** + * Create index using programmatic configuration (temporary solution) + */ + private void createProgrammaticIndex(String indexName, String mappingFile) throws IOException { + logger.info("Creating OpenSearch index programmatically: {}", indexName); + + CreateIndexRequest.Builder requestBuilder = new CreateIndexRequest.Builder() + .index(indexName); + + // Set basic index settings based on environment + IndexSettings.Builder settingsBuilder = new IndexSettings.Builder() + .numberOfShards(properties.getIndices().getDefaultShards()) + .numberOfReplicas(properties.getIndices().getDefaultReplicas()) + .refreshInterval(t -> t.time(properties.getIndices().getRefreshInterval())); + + requestBuilder.settings(settingsBuilder.build()); + + // Create mappings based on index type + if (mappingFile.contains("stories")) { + requestBuilder.mappings(createStoryMapping()); + } else if (mappingFile.contains("authors")) { + requestBuilder.mappings(createAuthorMapping()); + } else if (mappingFile.contains("collections")) { + requestBuilder.mappings(createCollectionMapping()); + } + + openSearchClient.indices().create(requestBuilder.build()); + logger.info("Created OpenSearch index: {}", indexName); + } + + private TypeMapping createStoryMapping() { + return TypeMapping.of(m -> m + .properties("id", p -> p.keyword(k -> k)) + .properties("title", p -> p.text(t -> t.analyzer("standard"))) + .properties("content", p -> p.text(t -> t.analyzer("standard"))) + .properties("summary", p -> p.text(t -> t.analyzer("standard"))) + .properties("authorNames", p -> p.text(t -> t.analyzer("standard"))) + .properties("authorIds", p -> p.keyword(k -> k)) + .properties("tagNames", p -> p.keyword(k -> k)) + .properties("seriesTitle", p -> p.text(t -> t.analyzer("standard"))) + .properties("seriesId", p -> p.keyword(k -> k)) + .properties("wordCount", p -> p.integer(i -> i)) + .properties("rating", p -> p.float_(f -> f)) + .properties("readingTime", p -> p.integer(i -> i)) + .properties("language", p -> p.keyword(k -> k)) + .properties("status", p -> p.keyword(k -> k)) + .properties("createdAt", p -> p.date(d -> d)) + .properties("updatedAt", p -> p.date(d -> d)) + .properties("publishedAt", p -> p.date(d -> d)) + .properties("isRead", p -> p.boolean_(b -> b)) + .properties("isFavorite", p -> p.boolean_(b -> b)) + .properties("readingProgress", p -> p.float_(f -> f)) + .properties("libraryId", p -> p.keyword(k -> k)) + ); + } + + private TypeMapping createAuthorMapping() { + return TypeMapping.of(m -> m + .properties("id", p -> p.keyword(k -> k)) + .properties("name", p -> p.text(t -> t.analyzer("standard"))) + .properties("bio", p -> p.text(t -> t.analyzer("standard"))) + .properties("urls", p -> p.keyword(k -> k)) + .properties("imageUrl", p -> p.keyword(k -> k)) + .properties("storyCount", p -> p.integer(i -> i)) + .properties("averageRating", p -> p.float_(f -> f)) + .properties("totalWordCount", p -> p.long_(l -> l)) + .properties("totalReadingTime", p -> p.integer(i -> i)) + .properties("createdAt", p -> p.date(d -> d)) + .properties("updatedAt", p -> p.date(d -> d)) + .properties("libraryId", p -> p.keyword(k -> k)) + ); + } + + private TypeMapping createCollectionMapping() { + return TypeMapping.of(m -> m + .properties("id", p -> p.keyword(k -> k)) + .properties("name", p -> p.text(t -> t.analyzer("standard"))) + .properties("description", p -> p.text(t -> t.analyzer("standard"))) + .properties("storyCount", p -> p.integer(i -> i)) + .properties("totalWordCount", p -> p.long_(l -> l)) + .properties("averageRating", p -> p.float_(f -> f)) + .properties("isPublic", p -> p.boolean_(b -> b)) + .properties("createdAt", p -> p.date(d -> d)) + .properties("updatedAt", p -> p.date(d -> d)) + .properties("libraryId", p -> p.keyword(k -> k)) + ); + } + + private void createStoriesIndexIfNotExists() throws IOException { + createIndexFromConfiguration(getStoriesIndex(), "stories-mapping.json"); + } + + private void createAuthorsIndexIfNotExists() throws IOException { + createIndexFromConfiguration(getAuthorsIndex(), "authors-mapping.json"); + } + + private void createCollectionsIndexIfNotExists() throws IOException { + createIndexFromConfiguration(getCollectionsIndex(), "collections-mapping.json"); + } + + private boolean indexExists(String indexName) throws IOException { + ExistsRequest request = ExistsRequest.of(e -> e.index(indexName)); + return openSearchClient.indices().exists(request).value(); + } + + // Placeholder methods for search functionality (to be implemented in later phases) + public SearchResultDto searchStories(String query, List tags, String author, + String series, Integer minWordCount, Integer maxWordCount, + Float minRating, Boolean isRead, Boolean isFavorite, + String sortBy, String sortOrder, int page, int size) { + // TODO: Implement OpenSearch story search + logger.warn("OpenSearch story search not yet implemented"); + return new SearchResultDto<>(new ArrayList<>(), 0, page, size, query != null ? query : "", 0); + } + + public List getRandomStories(int count, List tags, String author, + String series, Integer minWordCount, Integer maxWordCount, + Float minRating, Boolean isRead, Boolean isFavorite, + Long seed) { + // TODO: Implement OpenSearch random story selection + logger.warn("OpenSearch random story selection not yet implemented"); + return new ArrayList<>(); + } + + public List searchAuthors(String query, int limit) { + // TODO: Implement OpenSearch author search + logger.warn("OpenSearch author search not yet implemented"); + return new ArrayList<>(); + } + + public List getTagSuggestions(String query, int limit) { + // TODO: Implement OpenSearch tag autocomplete + logger.warn("OpenSearch tag autocomplete not yet implemented"); + return new ArrayList<>(); + } +} \ No newline at end of file diff --git a/backend/src/main/resources/application.yml b/backend/src/main/resources/application.yml index b2c65c8..8f1d025 100644 --- a/backend/src/main/resources/application.yml +++ b/backend/src/main/resources/application.yml @@ -32,15 +32,77 @@ storycove: expiration: 86400000 # 24 hours auth: password: ${APP_PASSWORD} # REQUIRED: No default password for security + search: + engine: ${SEARCH_ENGINE:typesense} # typesense or opensearch typesense: api-key: ${TYPESENSE_API_KEY:xyz} host: ${TYPESENSE_HOST:localhost} port: ${TYPESENSE_PORT:8108} enabled: ${TYPESENSE_ENABLED:true} reindex-interval: ${TYPESENSE_REINDEX_INTERVAL:3600000} # 1 hour in milliseconds + opensearch: + # Connection settings + host: ${OPENSEARCH_HOST:localhost} + port: ${OPENSEARCH_PORT:9200} + scheme: ${OPENSEARCH_SCHEME:https} + username: ${OPENSEARCH_USERNAME:admin} + password: ${OPENSEARCH_PASSWORD} # REQUIRED when using OpenSearch + + # Environment-specific configuration + profile: ${SPRING_PROFILES_ACTIVE:development} # development, staging, production + + # Security settings + security: + ssl-verification: ${OPENSEARCH_SSL_VERIFICATION:false} + trust-all-certificates: ${OPENSEARCH_TRUST_ALL_CERTS:true} + keystore-path: ${OPENSEARCH_KEYSTORE_PATH:} + keystore-password: ${OPENSEARCH_KEYSTORE_PASSWORD:} + truststore-path: ${OPENSEARCH_TRUSTSTORE_PATH:} + truststore-password: ${OPENSEARCH_TRUSTSTORE_PASSWORD:} + + # Connection pool settings + connection: + timeout: ${OPENSEARCH_CONNECTION_TIMEOUT:30000} # 30 seconds + socket-timeout: ${OPENSEARCH_SOCKET_TIMEOUT:60000} # 60 seconds + max-connections-per-route: ${OPENSEARCH_MAX_CONN_PER_ROUTE:10} + max-connections-total: ${OPENSEARCH_MAX_CONN_TOTAL:30} + retry-on-failure: ${OPENSEARCH_RETRY_ON_FAILURE:true} + max-retries: ${OPENSEARCH_MAX_RETRIES:3} + + # Index settings + indices: + default-shards: ${OPENSEARCH_DEFAULT_SHARDS:1} + default-replicas: ${OPENSEARCH_DEFAULT_REPLICAS:0} + refresh-interval: ${OPENSEARCH_REFRESH_INTERVAL:1s} + + # Bulk operations + bulk: + actions: ${OPENSEARCH_BULK_ACTIONS:1000} + size: ${OPENSEARCH_BULK_SIZE:5242880} # 5MB + timeout: ${OPENSEARCH_BULK_TIMEOUT:10000} # 10 seconds + concurrent-requests: ${OPENSEARCH_BULK_CONCURRENT:1} + + # Health and monitoring + health: + check-interval: ${OPENSEARCH_HEALTH_CHECK_INTERVAL:30000} # 30 seconds + slow-query-threshold: ${OPENSEARCH_SLOW_QUERY_THRESHOLD:5000} # 5 seconds + enable-metrics: ${OPENSEARCH_ENABLE_METRICS:true} images: storage-path: ${IMAGE_STORAGE_PATH:/app/images} +management: + endpoints: + web: + exposure: + include: health,info,prometheus + endpoint: + health: + show-details: when-authorized + show-components: always + health: + opensearch: + enabled: ${OPENSEARCH_HEALTH_ENABLED:true} + logging: level: com.storycove: ${LOG_LEVEL:INFO} # Use INFO for production, DEBUG for development diff --git a/backend/src/main/resources/opensearch/README.md b/backend/src/main/resources/opensearch/README.md new file mode 100644 index 0000000..de1a4b8 --- /dev/null +++ b/backend/src/main/resources/opensearch/README.md @@ -0,0 +1,178 @@ +# OpenSearch Configuration - Best Practices Implementation + +## Overview + +This directory contains a production-ready OpenSearch configuration following industry best practices for security, scalability, and maintainability. + +## Architecture + +### 📁 Directory Structure +``` +opensearch/ +├── config/ +│ ├── opensearch-development.yml # Development-specific settings +│ └── opensearch-production.yml # Production-specific settings +├── mappings/ +│ ├── stories-mapping.json # Story index mapping +│ ├── authors-mapping.json # Author index mapping +│ └── collections-mapping.json # Collection index mapping +├── templates/ +│ ├── stories-template.json # Index template for stories_* +│ └── index-lifecycle-policy.json # ILM policy for index management +└── README.md # This file +``` + +## ✅ Best Practices Implemented + +### 🔒 **Security** +- **Environment-Aware SSL Configuration** + - Production: Full certificate validation with custom truststore support + - Development: Optional certificate validation for local development +- **Proper Authentication**: Basic auth with secure credential management +- **Connection Security**: TLS 1.3 support with hostname verification + +### 🏗️ **Configuration Management** +- **Externalized Configuration**: JSON/YAML files instead of hardcoded values +- **Environment-Specific Settings**: Different configs for dev/staging/prod +- **Type-Safe Properties**: Strongly-typed configuration classes +- **Validation**: Configuration validation at startup + +### 📈 **Scalability & Performance** +- **Connection Pooling**: Configurable connection pool with timeout management +- **Environment-Aware Sharding**: + - Development: 1 shard, 0 replicas (single node) + - Production: 3 shards, 1 replica (high availability) +- **Bulk Operations**: Optimized bulk indexing with configurable batch sizes +- **Index Templates**: Automatic application of settings to new indexes + +### 🔄 **Index Lifecycle Management** +- **Automated Index Rollover**: Based on size, document count, and age +- **Hot-Warm-Cold Architecture**: Optimized storage costs +- **Retention Policies**: Automatic cleanup of old data +- **Force Merge**: Optimization in warm phase + +### 📊 **Monitoring & Observability** +- **Health Checks**: Automatic cluster health monitoring +- **Spring Boot Actuator**: Health endpoints for monitoring systems +- **Metrics Collection**: Configurable performance metrics +- **Slow Query Detection**: Configurable thresholds for query performance + +### 🛡️ **Error Handling & Resilience** +- **Connection Retry Logic**: Automatic retry with backoff +- **Circuit Breaker Pattern**: Fail-fast for unhealthy clusters +- **Graceful Degradation**: Fallback to Typesense when OpenSearch unavailable +- **Detailed Error Logging**: Comprehensive error tracking + +## 🚀 Usage + +### Development Environment +```yaml +# application-development.yml +storycove: + opensearch: + profile: development + security: + ssl-verification: false + trust-all-certificates: true + indices: + default-shards: 1 + default-replicas: 0 +``` + +### Production Environment +```yaml +# application-production.yml +storycove: + opensearch: + profile: production + security: + ssl-verification: true + trust-all-certificates: false + truststore-path: /etc/ssl/opensearch-truststore.jks + indices: + default-shards: 3 + default-replicas: 1 +``` + +## 📋 Environment Variables + +### Required +- `OPENSEARCH_PASSWORD`: Admin password for OpenSearch cluster + +### Optional (with sensible defaults) +- `OPENSEARCH_HOST`: Cluster hostname (default: localhost) +- `OPENSEARCH_PORT`: Cluster port (default: 9200) +- `OPENSEARCH_USERNAME`: Admin username (default: admin) +- `OPENSEARCH_SSL_VERIFICATION`: Enable SSL verification (default: false for dev) +- `OPENSEARCH_MAX_CONN_TOTAL`: Max connections (default: 30 for dev, 200 for prod) + +## 🎯 Index Templates + +Index templates automatically apply configuration to new indexes: + +```json +{ + "index_patterns": ["stories_*"], + "template": { + "settings": { + "number_of_shards": "#{ENV_SPECIFIC}", + "analysis": { + "analyzer": { + "story_analyzer": { + "type": "standard", + "stopwords": "_english_" + } + } + } + } + } +} +``` + +## 🔍 Health Monitoring + +Access health information: +- **Application Health**: `/actuator/health` +- **OpenSearch Specific**: `/actuator/health/opensearch` +- **Detailed Metrics**: Available when `enable-metrics: true` + +## 🔄 Migration Strategy + +The configuration supports parallel operation with Typesense: + +1. **Development**: Test OpenSearch alongside Typesense +2. **Staging**: Validate performance and accuracy +3. **Production**: Gradual rollout with instant rollback capability + +## 🛠️ Troubleshooting + +### Common Issues + +1. **SSL Certificate Errors** + - Development: Set `trust-all-certificates: true` + - Production: Provide valid truststore path + +2. **Connection Timeouts** + - Increase `connection.timeout` values + - Check network connectivity and firewall rules + +3. **Index Creation Failures** + - Verify cluster health with `/actuator/health/opensearch` + - Check OpenSearch logs for detailed error messages + +4. **Performance Issues** + - Monitor slow queries with configurable thresholds + - Adjust bulk operation settings + - Review shard allocation and replica settings + +## 🔮 Future Enhancements + +- **Multi-Cluster Support**: Connect to multiple OpenSearch clusters +- **Advanced Security**: Integration with OpenSearch Security plugin +- **Custom Analyzers**: Domain-specific text analysis +- **Index Aliases**: Zero-downtime index updates +- **Machine Learning**: Integration with OpenSearch ML features + +--- + +This configuration provides a solid foundation that scales from development to enterprise production environments while maintaining security, performance, and operational excellence. \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/config/opensearch-development.yml b/backend/src/main/resources/opensearch/config/opensearch-development.yml new file mode 100644 index 0000000..f442a66 --- /dev/null +++ b/backend/src/main/resources/opensearch/config/opensearch-development.yml @@ -0,0 +1,32 @@ +# OpenSearch Development Configuration +opensearch: + cluster: + name: "storycove-dev" + initial_master_nodes: ["opensearch-node"] + + # Development settings - single node, minimal resources + indices: + default_settings: + number_of_shards: 1 + number_of_replicas: 0 + refresh_interval: "1s" + + # Security settings for development + security: + ssl_verification: false + trust_all_certificates: true + + # Connection settings + connection: + timeout: "30s" + socket_timeout: "60s" + max_connections_per_route: 10 + max_connections_total: 30 + + # Index management + index_management: + auto_create_templates: true + template_patterns: + stories: "stories_*" + authors: "authors_*" + collections: "collections_*" \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/config/opensearch-production.yml b/backend/src/main/resources/opensearch/config/opensearch-production.yml new file mode 100644 index 0000000..0df0674 --- /dev/null +++ b/backend/src/main/resources/opensearch/config/opensearch-production.yml @@ -0,0 +1,60 @@ +# OpenSearch Production Configuration +opensearch: + cluster: + name: "storycove-prod" + + # Production settings - multi-shard, with replicas + indices: + default_settings: + number_of_shards: 3 + number_of_replicas: 1 + refresh_interval: "30s" + max_result_window: 50000 + + # Index lifecycle policies + lifecycle: + hot_phase_duration: "7d" + warm_phase_duration: "30d" + cold_phase_duration: "90d" + delete_after: "1y" + + # Security settings for production + security: + ssl_verification: true + trust_all_certificates: false + certificate_verification: true + tls_version: "TLSv1.3" + + # Connection settings + connection: + timeout: "10s" + socket_timeout: "30s" + max_connections_per_route: 50 + max_connections_total: 200 + retry_on_failure: true + max_retries: 3 + retry_delay: "1s" + + # Performance tuning + performance: + bulk_actions: 1000 + bulk_size: "5MB" + bulk_timeout: "10s" + concurrent_requests: 4 + + # Monitoring and observability + monitoring: + health_check_interval: "30s" + slow_query_threshold: "5s" + enable_metrics: true + + # Index management + index_management: + auto_create_templates: true + template_patterns: + stories: "stories_*" + authors: "authors_*" + collections: "collections_*" + retention_policy: + enabled: true + default_retention: "1y" \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/mappings/authors-mapping.json b/backend/src/main/resources/opensearch/mappings/authors-mapping.json new file mode 100644 index 0000000..0e52b86 --- /dev/null +++ b/backend/src/main/resources/opensearch/mappings/authors-mapping.json @@ -0,0 +1,79 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "analysis": { + "analyzer": { + "name_analyzer": { + "type": "standard", + "stopwords": "_english_" + }, + "autocomplete_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "edge_ngram"] + } + }, + "filter": { + "edge_ngram": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 + } + } + } + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "name": { + "type": "text", + "analyzer": "name_analyzer", + "fields": { + "autocomplete": { + "type": "text", + "analyzer": "autocomplete_analyzer" + }, + "keyword": { + "type": "keyword" + } + } + }, + "bio": { + "type": "text", + "analyzer": "name_analyzer" + }, + "urls": { + "type": "keyword" + }, + "imageUrl": { + "type": "keyword" + }, + "storyCount": { + "type": "integer" + }, + "averageRating": { + "type": "float" + }, + "totalWordCount": { + "type": "long" + }, + "totalReadingTime": { + "type": "integer" + }, + "createdAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "updatedAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "libraryId": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/mappings/collections-mapping.json b/backend/src/main/resources/opensearch/mappings/collections-mapping.json new file mode 100644 index 0000000..eb45deb --- /dev/null +++ b/backend/src/main/resources/opensearch/mappings/collections-mapping.json @@ -0,0 +1,73 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "analysis": { + "analyzer": { + "collection_analyzer": { + "type": "standard", + "stopwords": "_english_" + }, + "autocomplete_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "edge_ngram"] + } + }, + "filter": { + "edge_ngram": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 + } + } + } + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "name": { + "type": "text", + "analyzer": "collection_analyzer", + "fields": { + "autocomplete": { + "type": "text", + "analyzer": "autocomplete_analyzer" + }, + "keyword": { + "type": "keyword" + } + } + }, + "description": { + "type": "text", + "analyzer": "collection_analyzer" + }, + "storyCount": { + "type": "integer" + }, + "totalWordCount": { + "type": "long" + }, + "averageRating": { + "type": "float" + }, + "isPublic": { + "type": "boolean" + }, + "createdAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "updatedAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "libraryId": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/mappings/stories-mapping.json b/backend/src/main/resources/opensearch/mappings/stories-mapping.json new file mode 100644 index 0000000..7ba650f --- /dev/null +++ b/backend/src/main/resources/opensearch/mappings/stories-mapping.json @@ -0,0 +1,120 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "analysis": { + "analyzer": { + "story_analyzer": { + "type": "standard", + "stopwords": "_english_" + }, + "autocomplete_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "edge_ngram"] + } + }, + "filter": { + "edge_ngram": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 + } + } + } + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "analyzer": "story_analyzer", + "fields": { + "autocomplete": { + "type": "text", + "analyzer": "autocomplete_analyzer" + }, + "keyword": { + "type": "keyword" + } + } + }, + "content": { + "type": "text", + "analyzer": "story_analyzer" + }, + "summary": { + "type": "text", + "analyzer": "story_analyzer" + }, + "authorNames": { + "type": "text", + "analyzer": "story_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "authorIds": { + "type": "keyword" + }, + "tagNames": { + "type": "keyword" + }, + "seriesTitle": { + "type": "text", + "analyzer": "story_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "seriesId": { + "type": "keyword" + }, + "wordCount": { + "type": "integer" + }, + "rating": { + "type": "float" + }, + "readingTime": { + "type": "integer" + }, + "language": { + "type": "keyword" + }, + "status": { + "type": "keyword" + }, + "createdAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "updatedAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "publishedAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "isRead": { + "type": "boolean" + }, + "isFavorite": { + "type": "boolean" + }, + "readingProgress": { + "type": "float" + }, + "libraryId": { + "type": "keyword" + } + } + } +} \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/templates/index-lifecycle-policy.json b/backend/src/main/resources/opensearch/templates/index-lifecycle-policy.json new file mode 100644 index 0000000..678e12e --- /dev/null +++ b/backend/src/main/resources/opensearch/templates/index-lifecycle-policy.json @@ -0,0 +1,77 @@ +{ + "policy": { + "description": "StoryCove index lifecycle policy", + "default_state": "hot", + "states": [ + { + "name": "hot", + "actions": [ + { + "rollover": { + "min_size": "50gb", + "min_doc_count": 1000000, + "min_age": "7d" + } + } + ], + "transitions": [ + { + "state_name": "warm", + "conditions": { + "min_age": "7d" + } + } + ] + }, + { + "name": "warm", + "actions": [ + { + "replica_count": { + "number_of_replicas": 0 + } + }, + { + "force_merge": { + "max_num_segments": 1 + } + } + ], + "transitions": [ + { + "state_name": "cold", + "conditions": { + "min_age": "30d" + } + } + ] + }, + { + "name": "cold", + "actions": [], + "transitions": [ + { + "state_name": "delete", + "conditions": { + "min_age": "365d" + } + } + ] + }, + { + "name": "delete", + "actions": [ + { + "delete": {} + } + ] + } + ], + "ism_template": [ + { + "index_patterns": ["stories_*", "authors_*", "collections_*"], + "priority": 100 + } + ] + } +} \ No newline at end of file diff --git a/backend/src/main/resources/opensearch/templates/stories-template.json b/backend/src/main/resources/opensearch/templates/stories-template.json new file mode 100644 index 0000000..b6cf3b8 --- /dev/null +++ b/backend/src/main/resources/opensearch/templates/stories-template.json @@ -0,0 +1,124 @@ +{ + "index_patterns": ["stories_*"], + "priority": 1, + "template": { + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "analysis": { + "analyzer": { + "story_analyzer": { + "type": "standard", + "stopwords": "_english_" + }, + "autocomplete_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "edge_ngram"] + } + }, + "filter": { + "edge_ngram": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 + } + } + } + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "analyzer": "story_analyzer", + "fields": { + "autocomplete": { + "type": "text", + "analyzer": "autocomplete_analyzer" + }, + "keyword": { + "type": "keyword" + } + } + }, + "content": { + "type": "text", + "analyzer": "story_analyzer" + }, + "summary": { + "type": "text", + "analyzer": "story_analyzer" + }, + "authorNames": { + "type": "text", + "analyzer": "story_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "authorIds": { + "type": "keyword" + }, + "tagNames": { + "type": "keyword" + }, + "seriesTitle": { + "type": "text", + "analyzer": "story_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "seriesId": { + "type": "keyword" + }, + "wordCount": { + "type": "integer" + }, + "rating": { + "type": "float" + }, + "readingTime": { + "type": "integer" + }, + "language": { + "type": "keyword" + }, + "status": { + "type": "keyword" + }, + "createdAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "updatedAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "publishedAt": { + "type": "date", + "format": "strict_date_optional_time||epoch_millis" + }, + "isRead": { + "type": "boolean" + }, + "isFavorite": { + "type": "boolean" + }, + "readingProgress": { + "type": "float" + }, + "libraryId": { + "type": "keyword" + } + } + } + } +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 2b625b8..ffed8ed 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -37,6 +37,11 @@ services: - TYPESENSE_API_KEY=${TYPESENSE_API_KEY} - TYPESENSE_HOST=typesense - TYPESENSE_PORT=8108 + - OPENSEARCH_HOST=opensearch + - OPENSEARCH_PORT=9200 + - OPENSEARCH_USERNAME=${OPENSEARCH_USERNAME:-admin} + - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD} + - SEARCH_ENGINE=${SEARCH_ENGINE:-typesense} - IMAGE_STORAGE_PATH=/app/images - APP_PASSWORD=${APP_PASSWORD} - STORYCOVE_CORS_ALLOWED_ORIGINS=${STORYCOVE_CORS_ALLOWED_ORIGINS:-http://localhost:3000,http://localhost:6925} @@ -46,6 +51,7 @@ services: depends_on: - postgres - typesense + - opensearch networks: - storycove-network @@ -74,9 +80,47 @@ services: networks: - storycove-network + opensearch: + image: opensearchproject/opensearch:3.2.0 + # No port mapping - only accessible within the Docker network + environment: + - cluster.name=storycove-opensearch + - node.name=opensearch-node + - discovery.type=single-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms256m -Xmx256m" + - "DISABLE_INSTALL_DEMO_CONFIG=true" + - "DISABLE_SECURITY_PLUGIN=false" + - "OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_PASSWORD}" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch_data:/usr/share/opensearch/data + networks: + - storycove-network + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:3.2.0 + # No port mapping - only accessible within the Docker network + environment: + - OPENSEARCH_HOSTS=https://opensearch:9200 + - "OPENSEARCH_USERNAME=${OPENSEARCH_USERNAME:-admin}" + - "OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}" + - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=false" + depends_on: + - opensearch + networks: + - storycove-network + volumes: postgres_data: typesense_data: + opensearch_data: images_data: library_config: