From fcad028959bd28df804da0ec0b7103dab23fe2be Mon Sep 17 00:00:00 2001 From: Stefan Hardegger Date: Mon, 28 Jul 2025 13:52:09 +0200 Subject: [PATCH] scraping and improvements --- .../controller/AuthorController.java | 36 +- .../controller/CollectionController.java | 9 +- .../storycove/controller/StoryController.java | 7 +- .../com/storycove/service/AuthorService.java | 82 +-- .../storycove/service/TypesenseService.java | 19 - .../storycove/service/AuthorServiceTest.java | 135 ++++- frontend/next.config.js | 21 +- frontend/package-lock.json | 225 +++++++++ frontend/package.json | 21 +- frontend/src/app/add-story/page.tsx | 155 +++++- frontend/src/app/scrape/author/route.ts | 72 +++ frontend/src/app/scrape/bulk/route.ts | 292 +++++++++++ frontend/src/app/scrape/story/route.ts | 85 ++++ frontend/src/app/stories/import/bulk/page.tsx | 300 +++++++++++ frontend/src/components/layout/Header.tsx | 62 ++- frontend/src/components/ui/Dropdown.tsx | 98 ++++ frontend/src/lib/scraper/config/sites.json | 334 ++++++++++++ frontend/src/lib/scraper/scraper.ts | 379 ++++++++++++++ .../lib/scraper/strategies/contentCleaner.ts | 164 ++++++ frontend/src/lib/scraper/strategies/index.ts | 3 + .../lib/scraper/strategies/linkExtractor.ts | 98 ++++ .../lib/scraper/strategies/textExtractor.ts | 144 ++++++ frontend/src/lib/scraper/types.ts | 248 +++++++++ frontend/src/lib/scraper/utils/cache.ts | 35 ++ frontend/src/lib/scraper/utils/rateLimit.ts | 23 + frontend/src/lib/scraper/utils/urlParser.ts | 61 +++ frontend/tsconfig.tsbuildinfo | 2 +- nginx.conf | 18 +- package-lock.json | 304 ++++++++++- storycove-scraper-spec.md | 474 ++++++++++++++++++ swissmilk_hamburger.pdf | Bin 0 -> 2363988 bytes 31 files changed, 3788 insertions(+), 118 deletions(-) create mode 100644 frontend/src/app/scrape/author/route.ts create mode 100644 frontend/src/app/scrape/bulk/route.ts create mode 100644 frontend/src/app/scrape/story/route.ts create mode 100644 frontend/src/app/stories/import/bulk/page.tsx create mode 100644 frontend/src/components/ui/Dropdown.tsx create mode 100644 frontend/src/lib/scraper/config/sites.json create mode 100644 frontend/src/lib/scraper/scraper.ts create mode 100644 frontend/src/lib/scraper/strategies/contentCleaner.ts create mode 100644 frontend/src/lib/scraper/strategies/index.ts create mode 100644 frontend/src/lib/scraper/strategies/linkExtractor.ts create mode 100644 frontend/src/lib/scraper/strategies/textExtractor.ts create mode 100644 frontend/src/lib/scraper/types.ts create mode 100644 frontend/src/lib/scraper/utils/cache.ts create mode 100644 frontend/src/lib/scraper/utils/rateLimit.ts create mode 100644 frontend/src/lib/scraper/utils/urlParser.ts create mode 100644 storycove-scraper-spec.md create mode 100644 swissmilk_hamburger.pdf diff --git a/backend/src/main/java/com/storycove/controller/AuthorController.java b/backend/src/main/java/com/storycove/controller/AuthorController.java index 021ad57..89cc1c2 100644 --- a/backend/src/main/java/com/storycove/controller/AuthorController.java +++ b/backend/src/main/java/com/storycove/controller/AuthorController.java @@ -65,10 +65,12 @@ public class AuthorController { @PostMapping public ResponseEntity createAuthor(@Valid @RequestBody CreateAuthorRequest request) { + logger.info("Creating new author: {}", request.getName()); Author author = new Author(); updateAuthorFromRequest(author, request); Author savedAuthor = authorService.create(author); + logger.info("Successfully created author: {} (ID: {})", savedAuthor.getName(), savedAuthor.getId()); return ResponseEntity.status(HttpStatus.CREATED).body(convertToDto(savedAuthor)); } @@ -81,13 +83,7 @@ public class AuthorController { @RequestParam(required = false, name = "authorRating") Integer rating, @RequestParam(required = false, name = "avatar") MultipartFile avatarFile) { - System.out.println("DEBUG: MULTIPART PUT called with:"); - System.out.println(" - name: " + name); - System.out.println(" - notes: " + notes); - System.out.println(" - urls: " + urls); - System.out.println(" - rating: " + rating); - System.out.println(" - avatar: " + (avatarFile != null ? avatarFile.getOriginalFilename() : "null")); - + logger.info("Updating author with multipart data (ID: {})", id); try { Author existingAuthor = authorService.findById(id); @@ -104,7 +100,6 @@ public class AuthorController { // Handle rating update if (rating != null) { - System.out.println("DEBUG: Setting author rating via PUT: " + rating); existingAuthor.setAuthorRating(rating); } @@ -115,6 +110,7 @@ public class AuthorController { } Author updatedAuthor = authorService.update(id, existingAuthor); + logger.info("Successfully updated author: {} via multipart", updatedAuthor.getName()); return ResponseEntity.ok(convertToDto(updatedAuthor)); } catch (Exception e) { @@ -125,31 +121,27 @@ public class AuthorController { @PutMapping(value = "/{id}", consumes = "application/json") public ResponseEntity updateAuthorJson(@PathVariable UUID id, @Valid @RequestBody UpdateAuthorRequest request) { - System.out.println("DEBUG: JSON PUT called with:"); - System.out.println(" - name: " + request.getName()); - System.out.println(" - notes: " + request.getNotes()); - System.out.println(" - urls: " + request.getUrls()); - System.out.println(" - rating: " + request.getRating()); + logger.info("Updating author with JSON data: {} (ID: {})", request.getName(), id); Author existingAuthor = authorService.findById(id); updateAuthorFromRequest(existingAuthor, request); Author updatedAuthor = authorService.update(id, existingAuthor); + logger.info("Successfully updated author: {} via JSON", updatedAuthor.getName()); return ResponseEntity.ok(convertToDto(updatedAuthor)); } @PutMapping("/{id}") public ResponseEntity updateAuthorGeneric(@PathVariable UUID id, HttpServletRequest request) { - System.out.println("DEBUG: GENERIC PUT called!"); - System.out.println(" - Content-Type: " + request.getContentType()); - System.out.println(" - Method: " + request.getMethod()); return ResponseEntity.status(415).body("Unsupported Media Type. Expected multipart/form-data or application/json"); } @DeleteMapping("/{id}") public ResponseEntity deleteAuthor(@PathVariable UUID id) { + logger.info("Deleting author with ID: {}", id); authorService.delete(id); + logger.info("Successfully deleted author with ID: {}", id); return ResponseEntity.ok(Map.of("message", "Author deleted successfully")); } @@ -177,11 +169,8 @@ public class AuthorController { @PostMapping("/{id}/rating") public ResponseEntity rateAuthor(@PathVariable UUID id, @RequestBody RatingRequest request) { - System.out.println("DEBUG: Rating author " + id + " with rating " + request.getRating()); Author author = authorService.setRating(id, request.getRating()); - System.out.println("DEBUG: After setRating, author rating is: " + author.getAuthorRating()); AuthorDto dto = convertToDto(author); - System.out.println("DEBUG: Final DTO rating is: " + dto.getAuthorRating()); return ResponseEntity.ok(dto); } @@ -211,9 +200,7 @@ public class AuthorController { @PostMapping("/{id}/test-rating/{rating}") public ResponseEntity> testSetRating(@PathVariable UUID id, @PathVariable Integer rating) { try { - System.out.println("DEBUG: Test setting rating " + rating + " for author " + id); Author author = authorService.setRating(id, rating); - System.out.println("DEBUG: After test setRating, got: " + author.getAuthorRating()); return ResponseEntity.ok(Map.of( "success", true, @@ -231,13 +218,11 @@ public class AuthorController { @PostMapping("/{id}/test-put-rating") public ResponseEntity> testPutWithRating(@PathVariable UUID id, @RequestParam Integer rating) { try { - System.out.println("DEBUG: Test PUT with rating " + rating + " for author " + id); Author existingAuthor = authorService.findById(id); existingAuthor.setAuthorRating(rating); Author updatedAuthor = authorService.update(id, existingAuthor); - System.out.println("DEBUG: After PUT update, rating is: " + updatedAuthor.getAuthorRating()); return ResponseEntity.ok(Map.of( "success", true, @@ -389,7 +374,6 @@ public class AuthorController { author.setUrls(updateReq.getUrls()); } if (updateReq.getRating() != null) { - System.out.println("DEBUG: Setting author rating via JSON: " + updateReq.getRating()); author.setAuthorRating(updateReq.getRating()); } } @@ -402,9 +386,6 @@ public class AuthorController { dto.setNotes(author.getNotes()); dto.setAvatarImagePath(author.getAvatarImagePath()); - // Debug logging for author rating - System.out.println("DEBUG: Converting author " + author.getName() + - " with rating: " + author.getAuthorRating()); dto.setAuthorRating(author.getAuthorRating()); dto.setUrls(author.getUrls()); @@ -415,7 +396,6 @@ public class AuthorController { // Calculate and set average story rating dto.setAverageStoryRating(authorService.calculateAverageStoryRating(author.getId())); - System.out.println("DEBUG: DTO authorRating set to: " + dto.getAuthorRating()); return dto; } diff --git a/backend/src/main/java/com/storycove/controller/CollectionController.java b/backend/src/main/java/com/storycove/controller/CollectionController.java index d315e67..5afa11e 100644 --- a/backend/src/main/java/com/storycove/controller/CollectionController.java +++ b/backend/src/main/java/com/storycove/controller/CollectionController.java @@ -56,8 +56,6 @@ public class CollectionController { @RequestParam(required = false) List tags, @RequestParam(defaultValue = "false") boolean archived) { - logger.info("COLLECTIONS: Search request - search='{}', tags={}, archived={}, page={}, limit={}", - search, tags, archived, page, limit); // MANDATORY: Use Typesense for all search/filter operations SearchResultDto results = collectionService.searchCollections(search, tags, archived, page, limit); @@ -94,13 +92,14 @@ public class CollectionController { */ @PostMapping public ResponseEntity createCollection(@Valid @RequestBody CreateCollectionRequest request) { + logger.info("Creating new collection: {}", request.getName()); Collection collection = collectionService.createCollection( request.getName(), request.getDescription(), request.getTagNames(), request.getStoryIds() ); - + logger.info("Successfully created collection: {} (ID: {})", collection.getName(), collection.getId()); return ResponseEntity.status(HttpStatus.CREATED).body(collection); } @@ -115,6 +114,7 @@ public class CollectionController { @RequestParam(required = false) List storyIds, @RequestParam(required = false, name = "coverImage") MultipartFile coverImage) { + logger.info("Creating new collection with image: {}", name); try { // Create collection first Collection collection = collectionService.createCollection(name, description, tags, storyIds); @@ -128,6 +128,7 @@ public class CollectionController { ); } + logger.info("Successfully created collection with image: {} (ID: {})", collection.getName(), collection.getId()); return ResponseEntity.status(HttpStatus.CREATED).body(collection); } catch (Exception e) { @@ -160,7 +161,9 @@ public class CollectionController { */ @DeleteMapping("/{id}") public ResponseEntity> deleteCollection(@PathVariable UUID id) { + logger.info("Deleting collection with ID: {}", id); collectionService.deleteCollection(id); + logger.info("Successfully deleted collection with ID: {}", id); return ResponseEntity.ok(Map.of("message", "Collection deleted successfully")); } diff --git a/backend/src/main/java/com/storycove/controller/StoryController.java b/backend/src/main/java/com/storycove/controller/StoryController.java index 31740d7..2a7fa4c 100644 --- a/backend/src/main/java/com/storycove/controller/StoryController.java +++ b/backend/src/main/java/com/storycove/controller/StoryController.java @@ -86,23 +86,29 @@ public class StoryController { @PostMapping public ResponseEntity createStory(@Valid @RequestBody CreateStoryRequest request) { + logger.info("Creating new story: {}", request.getTitle()); Story story = new Story(); updateStoryFromRequest(story, request); Story savedStory = storyService.createWithTagNames(story, request.getTagNames()); + logger.info("Successfully created story: {} (ID: {})", savedStory.getTitle(), savedStory.getId()); return ResponseEntity.status(HttpStatus.CREATED).body(convertToDto(savedStory)); } @PutMapping("/{id}") public ResponseEntity updateStory(@PathVariable UUID id, @Valid @RequestBody UpdateStoryRequest request) { + logger.info("Updating story: {} (ID: {})", request.getTitle(), id); Story updatedStory = storyService.updateWithTagNames(id, request); + logger.info("Successfully updated story: {}", updatedStory.getTitle()); return ResponseEntity.ok(convertToDto(updatedStory)); } @DeleteMapping("/{id}") public ResponseEntity deleteStory(@PathVariable UUID id) { + logger.info("Deleting story with ID: {}", id); storyService.delete(id); + logger.info("Successfully deleted story with ID: {}", id); return ResponseEntity.ok(Map.of("message", "Story deleted successfully")); } @@ -212,7 +218,6 @@ public class StoryController { @RequestParam(required = false) String sortBy, @RequestParam(required = false) String sortDir) { - logger.info("CONTROLLER DEBUG: Search request - query='{}', tags={}, authors={}", query, tags, authors); if (typesenseService != null) { SearchResultDto results = typesenseService.searchStories( diff --git a/backend/src/main/java/com/storycove/service/AuthorService.java b/backend/src/main/java/com/storycove/service/AuthorService.java index e2f8ce0..ac4ec23 100644 --- a/backend/src/main/java/com/storycove/service/AuthorService.java +++ b/backend/src/main/java/com/storycove/service/AuthorService.java @@ -31,7 +31,7 @@ public class AuthorService { private final TypesenseService typesenseService; @Autowired - public AuthorService(AuthorRepository authorRepository, TypesenseService typesenseService) { + public AuthorService(AuthorRepository authorRepository, @Autowired(required = false) TypesenseService typesenseService) { this.authorRepository = authorRepository; this.typesenseService = typesenseService; } @@ -133,10 +133,12 @@ public class AuthorService { Author savedAuthor = authorRepository.save(author); // Index in Typesense - try { - typesenseService.indexAuthor(savedAuthor); - } catch (Exception e) { - logger.warn("Failed to index author in Typesense: " + savedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.indexAuthor(savedAuthor); + } catch (Exception e) { + logger.warn("Failed to index author in Typesense: " + savedAuthor.getName(), e); + } } return savedAuthor; @@ -155,10 +157,12 @@ public class AuthorService { Author savedAuthor = authorRepository.save(existingAuthor); // Update in Typesense - try { - typesenseService.updateAuthor(savedAuthor); - } catch (Exception e) { - logger.warn("Failed to update author in Typesense: " + savedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.updateAuthor(savedAuthor); + } catch (Exception e) { + logger.warn("Failed to update author in Typesense: " + savedAuthor.getName(), e); + } } return savedAuthor; @@ -175,10 +179,12 @@ public class AuthorService { authorRepository.delete(author); // Remove from Typesense - try { - typesenseService.deleteAuthor(id.toString()); - } catch (Exception e) { - logger.warn("Failed to delete author from Typesense: " + author.getName(), e); + if (typesenseService != null) { + try { + typesenseService.deleteAuthor(id.toString()); + } catch (Exception e) { + logger.warn("Failed to delete author from Typesense: " + author.getName(), e); + } } } @@ -188,10 +194,12 @@ public class AuthorService { Author savedAuthor = authorRepository.save(author); // Update in Typesense - try { - typesenseService.updateAuthor(savedAuthor); - } catch (Exception e) { - logger.warn("Failed to update author in Typesense after adding URL: " + savedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.updateAuthor(savedAuthor); + } catch (Exception e) { + logger.warn("Failed to update author in Typesense after adding URL: " + savedAuthor.getName(), e); + } } return savedAuthor; @@ -203,10 +211,12 @@ public class AuthorService { Author savedAuthor = authorRepository.save(author); // Update in Typesense - try { - typesenseService.updateAuthor(savedAuthor); - } catch (Exception e) { - logger.warn("Failed to update author in Typesense after removing URL: " + savedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.updateAuthor(savedAuthor); + } catch (Exception e) { + logger.warn("Failed to update author in Typesense after removing URL: " + savedAuthor.getName(), e); + } } return savedAuthor; @@ -242,10 +252,12 @@ public class AuthorService { refreshedAuthor.getAuthorRating(), refreshedAuthor.getName()); // Update in Typesense - try { - typesenseService.updateAuthor(refreshedAuthor); - } catch (Exception e) { - logger.warn("Failed to update author in Typesense after rating: " + refreshedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.updateAuthor(refreshedAuthor); + } catch (Exception e) { + logger.warn("Failed to update author in Typesense after rating: " + refreshedAuthor.getName(), e); + } } return refreshedAuthor; @@ -290,10 +302,12 @@ public class AuthorService { Author savedAuthor = authorRepository.save(author); // Update in Typesense - try { - typesenseService.updateAuthor(savedAuthor); - } catch (Exception e) { - logger.warn("Failed to update author in Typesense after setting avatar: " + savedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.updateAuthor(savedAuthor); + } catch (Exception e) { + logger.warn("Failed to update author in Typesense after setting avatar: " + savedAuthor.getName(), e); + } } return savedAuthor; @@ -305,10 +319,12 @@ public class AuthorService { Author savedAuthor = authorRepository.save(author); // Update in Typesense - try { - typesenseService.updateAuthor(savedAuthor); - } catch (Exception e) { - logger.warn("Failed to update author in Typesense after removing avatar: " + savedAuthor.getName(), e); + if (typesenseService != null) { + try { + typesenseService.updateAuthor(savedAuthor); + } catch (Exception e) { + logger.warn("Failed to update author in Typesense after removing avatar: " + savedAuthor.getName(), e); + } } return savedAuthor; diff --git a/backend/src/main/java/com/storycove/service/TypesenseService.java b/backend/src/main/java/com/storycove/service/TypesenseService.java index 890ef86..8f16f66 100644 --- a/backend/src/main/java/com/storycove/service/TypesenseService.java +++ b/backend/src/main/java/com/storycove/service/TypesenseService.java @@ -209,8 +209,6 @@ public class TypesenseService { try { long startTime = System.currentTimeMillis(); - logger.info("SEARCH DEBUG: searchStories called with query='{}', tagFilters={}, authorFilters={}", - query, tagFilters, authorFilters); // Convert 0-based page (frontend/backend) to 1-based page (Typesense) int typesensePage = page + 1; @@ -242,15 +240,12 @@ public class TypesenseService { } if (tagFilters != null && !tagFilters.isEmpty()) { - logger.info("SEARCH DEBUG: Processing {} tag filters: {}", tagFilters.size(), tagFilters); // Use AND logic for multiple tags - items must have ALL selected tags for (String tag : tagFilters) { String escaped = escapeTypesenseValue(tag); String condition = "tagNames:=" + escaped; - logger.info("SEARCH DEBUG: Tag '{}' -> escaped '{}' -> condition '{}'", tag, escaped, condition); filterConditions.add(condition); } - logger.info("SEARCH DEBUG: Added {} individual tag filter conditions", tagFilters.size()); } if (minRating != null) { @@ -263,17 +258,14 @@ public class TypesenseService { if (!filterConditions.isEmpty()) { String finalFilter = String.join(" && ", filterConditions); - logger.info("SEARCH DEBUG: Final filter condition: '{}'", finalFilter); searchParameters.filterBy(finalFilter); } else { - logger.info("SEARCH DEBUG: No filter conditions applied"); } SearchResult searchResult = typesenseClient.collections(STORIES_COLLECTION) .documents() .search(searchParameters); - logger.info("SEARCH DEBUG: Typesense returned {} results", searchResult.getFound()); List results = convertSearchResult(searchResult); long searchTime = System.currentTimeMillis() - startTime; @@ -377,10 +369,8 @@ public class TypesenseService { List tagNames = story.getTags().stream() .map(tag -> tag.getName()) .collect(Collectors.toList()); - logger.debug("INDEXING DEBUG: Story '{}' has tags: {}", story.getTitle(), tagNames); document.put("tagNames", tagNames); } else { - logger.debug("INDEXING DEBUG: Story '{}' has no tags", story.getTitle()); } document.put("rating", story.getRating() != null ? story.getRating() : 0); @@ -746,8 +736,6 @@ public class TypesenseService { public SearchResultDto searchAuthors(String query, int page, int perPage, String sortBy, String sortOrder) { try { - logger.info("AUTHORS SEARCH DEBUG: Searching collection '{}' with query='{}', sortBy='{}', sortOrder='{}'", - AUTHORS_COLLECTION, query, sortBy, sortOrder); SearchParameters searchParameters = new SearchParameters() .q(query != null && !query.trim().isEmpty() ? query : "*") .queryBy("name,notes") @@ -759,8 +747,6 @@ public class TypesenseService { String sortDirection = "desc".equalsIgnoreCase(sortOrder) ? "desc" : "asc"; String sortField = mapAuthorSortField(sortBy); String sortString = sortField + ":" + sortDirection; - logger.info("AUTHORS SEARCH DEBUG: Original sortBy='{}', mapped to='{}', full sort string='{}'", - sortBy, sortField, sortString); searchParameters.sortBy(sortString); } @@ -771,17 +757,12 @@ public class TypesenseService { .search(searchParameters); } catch (Exception sortException) { // If sorting fails (likely due to schema issues), retry without sorting - logger.error("SORTING ERROR DEBUG: Full exception details", sortException); logger.warn("Sorting failed for authors search, retrying without sort: " + sortException.getMessage()); // Try to get collection info for debugging try { CollectionResponse collection = typesenseClient.collections(AUTHORS_COLLECTION).retrieve(); - logger.error("COLLECTION DEBUG: Collection '{}' exists with {} documents and {} fields", - collection.getName(), collection.getNumDocuments(), collection.getFields().size()); - logger.error("COLLECTION DEBUG: Fields: {}", collection.getFields()); } catch (Exception debugException) { - logger.error("COLLECTION DEBUG: Failed to retrieve collection info", debugException); } searchParameters = new SearchParameters() diff --git a/backend/src/test/java/com/storycove/service/AuthorServiceTest.java b/backend/src/test/java/com/storycove/service/AuthorServiceTest.java index 1b2e916..06ff9af 100644 --- a/backend/src/test/java/com/storycove/service/AuthorServiceTest.java +++ b/backend/src/test/java/com/storycove/service/AuthorServiceTest.java @@ -1,6 +1,7 @@ package com.storycove.service; import com.storycove.entity.Author; +import com.storycove.entity.Story; import com.storycove.repository.AuthorRepository; import com.storycove.service.exception.DuplicateResourceException; import com.storycove.service.exception.ResourceNotFoundException; @@ -24,6 +25,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.*; +import static org.mockito.Mockito.times; @ExtendWith(MockitoExtension.class) @DisplayName("Author Service Unit Tests") @@ -32,7 +34,6 @@ class AuthorServiceTest { @Mock private AuthorRepository authorRepository; - @InjectMocks private AuthorService authorService; private Author testAuthor; @@ -44,6 +45,9 @@ class AuthorServiceTest { testAuthor = new Author("Test Author"); testAuthor.setId(testId); testAuthor.setNotes("Test notes"); + + // Initialize service with null TypesenseService (which is allowed) + authorService = new AuthorService(authorRepository, null); } @Test @@ -307,4 +311,133 @@ class AuthorServiceTest { assertEquals(5L, count); verify(authorRepository).countRecentAuthors(any(java.time.LocalDateTime.class)); } + + @Test + @DisplayName("Should set author rating with validation") + void shouldSetAuthorRating() { + when(authorRepository.findById(testId)).thenReturn(Optional.of(testAuthor)); + when(authorRepository.save(any(Author.class))).thenReturn(testAuthor); + + Author result = authorService.setRating(testId, 4); + + assertEquals(4, testAuthor.getAuthorRating()); + verify(authorRepository, times(2)).findById(testId); // Called twice: once initially, once after flush + verify(authorRepository).save(testAuthor); + verify(authorRepository).flush(); + } + + @Test + @DisplayName("Should throw exception for invalid rating range") + void shouldThrowExceptionForInvalidRating() { + assertThrows(IllegalArgumentException.class, () -> authorService.setRating(testId, 0)); + assertThrows(IllegalArgumentException.class, () -> authorService.setRating(testId, 6)); + + verify(authorRepository, never()).findById(any()); + verify(authorRepository, never()).save(any()); + } + + @Test + @DisplayName("Should handle null rating") + void shouldHandleNullRating() { + when(authorRepository.findById(testId)).thenReturn(Optional.of(testAuthor)); + when(authorRepository.save(any(Author.class))).thenReturn(testAuthor); + + Author result = authorService.setRating(testId, null); + + assertNull(testAuthor.getAuthorRating()); + verify(authorRepository, times(2)).findById(testId); // Called twice: once initially, once after flush + verify(authorRepository).save(testAuthor); + } + + @Test + @DisplayName("Should find all authors with stories") + void shouldFindAllAuthorsWithStories() { + List authors = List.of(testAuthor); + when(authorRepository.findAll()).thenReturn(authors); + + List result = authorService.findAllWithStories(); + + assertEquals(1, result.size()); + verify(authorRepository).findAll(); + } + + @Test + @DisplayName("Should get author rating from database") + void shouldGetAuthorRatingFromDb() { + when(authorRepository.findAuthorRatingById(testId)).thenReturn(4); + + Integer rating = authorService.getAuthorRatingFromDb(testId); + + assertEquals(4, rating); + verify(authorRepository).findAuthorRatingById(testId); + } + + @Test + @DisplayName("Should calculate average story rating") + void shouldCalculateAverageStoryRating() { + // Setup test author with stories + Story story1 = new Story("Story 1"); + story1.setRating(4); + Story story2 = new Story("Story 2"); + story2.setRating(5); + + testAuthor.getStories().add(story1); + testAuthor.getStories().add(story2); + + when(authorRepository.findById(testId)).thenReturn(Optional.of(testAuthor)); + + Double avgRating = authorService.calculateAverageStoryRating(testId); + + assertEquals(4.5, avgRating); + verify(authorRepository).findById(testId); + } + + @Test + @DisplayName("Should find authors with stories using repository method") + void shouldFindAuthorsWithStoriesFromRepository() { + List authors = List.of(testAuthor); + when(authorRepository.findAuthorsWithStories()).thenReturn(authors); + + List result = authorService.findAuthorsWithStories(); + + assertEquals(1, result.size()); + verify(authorRepository).findAuthorsWithStories(); + } + + @Test + @DisplayName("Should find top rated authors") + void shouldFindTopRatedAuthors() { + List authors = List.of(testAuthor); + when(authorRepository.findTopRatedAuthors()).thenReturn(authors); + + List result = authorService.findTopRatedAuthors(); + + assertEquals(1, result.size()); + verify(authorRepository).findTopRatedAuthors(); + } + + @Test + @DisplayName("Should find most prolific authors") + void shouldFindMostProlificAuthors() { + List authors = List.of(testAuthor); + when(authorRepository.findMostProlificAuthors()).thenReturn(authors); + + List result = authorService.findMostProlificAuthors(); + + assertEquals(1, result.size()); + verify(authorRepository).findMostProlificAuthors(); + } + + @Test + @DisplayName("Should find authors by URL domain") + void shouldFindAuthorsByUrlDomain() { + List authors = List.of(testAuthor); + when(authorRepository.findByUrlDomain("example.com")).thenReturn(authors); + + List result = authorService.findByUrlDomain("example.com"); + + assertEquals(1, result.size()); + verify(authorRepository).findByUrlDomain("example.com"); + } + } \ No newline at end of file diff --git a/frontend/next.config.js b/frontend/next.config.js index de17486..bb75921 100644 --- a/frontend/next.config.js +++ b/frontend/next.config.js @@ -1,12 +1,19 @@ /** @type {import('next').NextConfig} */ const nextConfig = { - async rewrites() { - return [ - { - source: '/api/:path*', - destination: 'http://backend:8080/api/:path*', - }, - ]; + // Removed Next.js rewrites since nginx handles all API routing + webpack: (config, { isServer }) => { + // Exclude cheerio and its dependencies from client-side bundling + if (!isServer) { + config.resolve.fallback = { + ...config.resolve.fallback, + fs: false, + net: false, + tls: false, + 'undici': false, + }; + config.externals.push('cheerio', 'server-only'); + } + return config; }, images: { domains: ['localhost'], diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 6164a48..4b4b113 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -8,14 +8,17 @@ "name": "storycove-frontend", "version": "0.1.0", "dependencies": { + "@heroicons/react": "^2.2.0", "autoprefixer": "^10.4.16", "axios": "^1.6.0", + "cheerio": "^1.0.0-rc.12", "dompurify": "^3.0.5", "next": "14.0.0", "postcss": "^8.4.31", "react": "^18", "react-dom": "^18", "react-dropzone": "^14.2.3", + "server-only": "^0.0.1", "tailwindcss": "^3.3.0" }, "devDependencies": { @@ -137,6 +140,15 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "node_modules/@heroicons/react": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@heroicons/react/-/react-2.2.0.tgz", + "integrity": "sha512-LMcepvRaS9LYHJGsF0zzmgKCUim/X3N/DQKc4jepAXJ7l8QxJ1PmxJzqplF2Z3FE4PqBAIGyJAQ/w4B5dsqbtQ==", + "license": "MIT", + "peerDependencies": { + "react": ">= 16 || ^19.0.0-rc" + } + }, "node_modules/@humanwhocodes/config-array": { "version": "0.13.0", "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz", @@ -1398,6 +1410,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -1569,6 +1587,44 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/cheerio": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz", + "integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "htmlparser2": "^8.0.1", + "parse5": "^7.0.0", + "parse5-htmlparser2-tree-adapter": "^7.0.0" + }, + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/chokidar": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", @@ -1671,6 +1727,34 @@ "node": ">= 8" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/cssesc": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-3.0.0.tgz", @@ -1859,6 +1943,47 @@ "node": ">=6.0.0" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, "node_modules/dompurify": { "version": "3.2.6", "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.6.tgz", @@ -1868,6 +1993,20 @@ "@types/trusted-types": "^2.0.7" } }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -1900,6 +2039,18 @@ "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", "license": "MIT" }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/es-abstract": { "version": "1.24.0", "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.24.0.tgz", @@ -3096,6 +3247,25 @@ "node": ">= 0.4" } }, + "node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -4063,6 +4233,18 @@ "node": ">=0.10.0" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -4291,6 +4473,43 @@ "node": ">=6" } }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -4843,6 +5062,12 @@ "node": ">=10" } }, + "node_modules/server-only": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/server-only/-/server-only-0.0.1.tgz", + "integrity": "sha512-qepMx2JxAa5jjfzxG79yPPq+8BuFToHd1hm7kI+Z4zAq1ftQiP7HcxMhDDItrbtwVeLg/cY2JnKnrcFkmiswNA==", + "license": "MIT" + }, "node_modules/set-function-length": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index eaf499a..5f816a8 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -10,23 +10,26 @@ "type-check": "tsc --noEmit" }, "dependencies": { + "@heroicons/react": "^2.2.0", + "autoprefixer": "^10.4.16", + "axios": "^1.6.0", + "cheerio": "^1.0.0-rc.12", + "dompurify": "^3.0.5", "next": "14.0.0", + "postcss": "^8.4.31", "react": "^18", "react-dom": "^18", - "axios": "^1.6.0", - "dompurify": "^3.0.5", "react-dropzone": "^14.2.3", - "tailwindcss": "^3.3.0", - "autoprefixer": "^10.4.16", - "postcss": "^8.4.31" + "server-only": "^0.0.1", + "tailwindcss": "^3.3.0" }, "devDependencies": { - "typescript": "^5", + "@types/dompurify": "^3.0.5", "@types/node": "^20", "@types/react": "^18", "@types/react-dom": "^18", - "@types/dompurify": "^3.0.5", "eslint": "^8", - "eslint-config-next": "14.0.0" + "eslint-config-next": "14.0.0", + "typescript": "^5" } -} \ No newline at end of file +} diff --git a/frontend/src/app/add-story/page.tsx b/frontend/src/app/add-story/page.tsx index 49179d8..49a753c 100644 --- a/frontend/src/app/add-story/page.tsx +++ b/frontend/src/app/add-story/page.tsx @@ -12,6 +12,9 @@ import ImageUpload from '../../components/ui/ImageUpload'; import { storyApi, authorApi } from '../../lib/api'; export default function AddStoryPage() { + const [importMode, setImportMode] = useState<'manual' | 'url'>('manual'); + const [importUrl, setImportUrl] = useState(''); + const [scraping, setScraping] = useState(false); const [formData, setFormData] = useState({ title: '', summary: '', @@ -130,6 +133,57 @@ export default function AddStoryPage() { setFormData(prev => ({ ...prev, tags })); }; + const handleImportFromUrl = async () => { + if (!importUrl.trim()) { + setErrors({ importUrl: 'URL is required' }); + return; + } + + setScraping(true); + setErrors({}); + + try { + const response = await fetch('/scrape/story', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ url: importUrl }), + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.error || 'Failed to scrape story'); + } + + const scrapedStory = await response.json(); + + // Pre-fill the form with scraped data + setFormData({ + title: scrapedStory.title || '', + summary: scrapedStory.summary || '', + authorName: scrapedStory.author || '', + contentHtml: scrapedStory.content || '', + sourceUrl: scrapedStory.sourceUrl || importUrl, + tags: scrapedStory.tags || [], + seriesName: '', + volume: '', + }); + + // Switch to manual mode so user can edit the pre-filled data + setImportMode('manual'); + setImportUrl(''); + + // Show success message + setErrors({ success: 'Story data imported successfully! Review and edit as needed before saving.' }); + } catch (error: any) { + console.error('Failed to import story:', error); + setErrors({ importUrl: error.message }); + } finally { + setScraping(false); + } + }; + const validateForm = () => { const newErrors: Record = {}; @@ -206,7 +260,105 @@ export default function AddStoryPage() {

-
+ {/* Import Mode Toggle */} +
+
+ + +
+
+ + {/* URL Import Section */} + {importMode === 'url' && ( +
+

Import Story from URL

+

+ Enter a URL from a supported story site to automatically extract the story content, title, author, and other metadata. +

+ +
+ setImportUrl(e.target.value)} + placeholder="https://example.com/story-url" + error={errors.importUrl} + disabled={scraping} + /> + +
+ + + +
+ +
+

+ Need to import multiple stories at once? +

+ +
+ +
+

Supported Sites:

+

Archive of Our Own, DeviantArt, FanFiction.Net, Literotica, Royal Road, Wattpad, and more

+
+
+
+ )} + + {/* Success Message */} + {errors.success && ( +
+

{errors.success}

+
+ )} + + {importMode === 'manual' && ( + {/* Title */}
+ )} ); diff --git a/frontend/src/app/scrape/author/route.ts b/frontend/src/app/scrape/author/route.ts new file mode 100644 index 0000000..20ac5f3 --- /dev/null +++ b/frontend/src/app/scrape/author/route.ts @@ -0,0 +1,72 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const { url } = body; + + if (!url || typeof url !== 'string') { + return NextResponse.json( + { error: 'URL is required and must be a string' }, + { status: 400 } + ); + } + + // Dynamic import to prevent client-side bundling + const { StoryScraper } = await import('@/lib/scraper/scraper'); + + const scraper = new StoryScraper(); + const stories = await scraper.scrapeAuthorPage(url); + + return NextResponse.json({ stories }); + } catch (error) { + console.error('Author page scraping error:', error); + + // Check if it's a ScraperError without importing at module level + if (error && typeof error === 'object' && error.constructor.name === 'ScraperError') { + return NextResponse.json( + { + error: (error as any).message, + url: (error as any).url + }, + { status: 400 } + ); + } + + if (error instanceof Error) { + // Handle specific error types + if (error.message.includes('Invalid URL')) { + return NextResponse.json( + { error: 'Invalid URL provided' }, + { status: 400 } + ); + } + + if (error.message.includes('not supported')) { + return NextResponse.json( + { error: 'Author page scraping is not supported for this website' }, + { status: 400 } + ); + } + + if (error.message.includes('HTTP 404')) { + return NextResponse.json( + { error: 'Author page not found at the provided URL' }, + { status: 404 } + ); + } + + if (error.message.includes('timeout')) { + return NextResponse.json( + { error: 'Request timed out while fetching content' }, + { status: 408 } + ); + } + } + + return NextResponse.json( + { error: 'Failed to scrape author page. Please try again.' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/frontend/src/app/scrape/bulk/route.ts b/frontend/src/app/scrape/bulk/route.ts new file mode 100644 index 0000000..f44931a --- /dev/null +++ b/frontend/src/app/scrape/bulk/route.ts @@ -0,0 +1,292 @@ +import { NextRequest, NextResponse } from 'next/server'; + +interface BulkImportRequest { + urls: string[]; +} + +interface ImportResult { + url: string; + status: 'imported' | 'skipped' | 'error'; + reason?: string; + title?: string; + author?: string; + error?: string; + storyId?: string; +} + +interface BulkImportResponse { + results: ImportResult[]; + summary: { + total: number; + imported: number; + skipped: number; + errors: number; + }; +} + +export async function POST(request: NextRequest) { + try { + // Check for authentication + const authorization = request.headers.get('authorization'); + if (!authorization) { + return NextResponse.json( + { error: 'Authentication required for bulk import' }, + { status: 401 } + ); + } + + const body = await request.json(); + const { urls } = body as BulkImportRequest; + + if (!urls || !Array.isArray(urls) || urls.length === 0) { + return NextResponse.json( + { error: 'URLs array is required and must not be empty' }, + { status: 400 } + ); + } + + if (urls.length > 50) { + return NextResponse.json( + { error: 'Maximum 50 URLs allowed per bulk import' }, + { status: 400 } + ); + } + + // Dynamic imports to prevent client-side bundling + const { StoryScraper } = await import('@/lib/scraper/scraper'); + + const scraper = new StoryScraper(); + const results: ImportResult[] = []; + let importedCount = 0; + let skippedCount = 0; + let errorCount = 0; + + console.log(`Starting bulk scraping for ${urls.length} URLs`); + console.log(`Environment NEXT_PUBLIC_API_URL: ${process.env.NEXT_PUBLIC_API_URL}`); + + // For server-side API calls in Docker, use direct backend container URL + // Client-side calls use NEXT_PUBLIC_API_URL through nginx, but server-side needs direct container access + const serverSideApiBaseUrl = 'http://backend:8080/api'; + console.log(`DEBUG: serverSideApiBaseUrl variable is: ${serverSideApiBaseUrl}`); + + // Quick test to verify backend connectivity + try { + console.log(`Testing backend connectivity at: http://backend:8080/api/stories/check-duplicate`); + const testResponse = await fetch(`http://backend:8080/api/stories/check-duplicate?title=test&authorName=test`, { + method: 'GET', + headers: { + 'Authorization': authorization, + 'Content-Type': 'application/json', + }, + }); + console.log(`Backend test response status: ${testResponse.status}`); + } catch (error) { + console.error(`Backend connectivity test failed:`, error); + } + + for (const url of urls) { + console.log(`Processing URL: ${url}`); + + try { + // Validate URL format + if (!url || typeof url !== 'string' || url.trim() === '') { + results.push({ + url: url || 'Empty URL', + status: 'error', + error: 'Invalid URL format' + }); + errorCount++; + continue; + } + + const trimmedUrl = url.trim(); + + // Scrape the story + const scrapedStory = await scraper.scrapeStory(trimmedUrl); + + // Validate required fields + if (!scrapedStory.title || !scrapedStory.author || !scrapedStory.content) { + const missingFields = []; + if (!scrapedStory.title) missingFields.push('title'); + if (!scrapedStory.author) missingFields.push('author'); + if (!scrapedStory.content) missingFields.push('content'); + + results.push({ + url: trimmedUrl, + status: 'skipped', + reason: `Missing required fields: ${missingFields.join(', ')}`, + title: scrapedStory.title, + author: scrapedStory.author + }); + skippedCount++; + continue; + } + + // Check for duplicates using query parameters + try { + // Use hardcoded backend URL for container-to-container communication + const duplicateCheckUrl = `http://backend:8080/api/stories/check-duplicate`; + console.log(`Duplicate check URL: ${duplicateCheckUrl}`); + const params = new URLSearchParams({ + title: scrapedStory.title, + authorName: scrapedStory.author + }); + + const duplicateCheckResponse = await fetch(`${duplicateCheckUrl}?${params.toString()}`, { + method: 'GET', + headers: { + 'Authorization': authorization, + 'Content-Type': 'application/json', + }, + }); + + if (duplicateCheckResponse.ok) { + const duplicateResult = await duplicateCheckResponse.json(); + if (duplicateResult.hasDuplicates) { + results.push({ + url: trimmedUrl, + status: 'skipped', + reason: `Duplicate story found (${duplicateResult.count} existing)`, + title: scrapedStory.title, + author: scrapedStory.author + }); + skippedCount++; + continue; + } + } + } catch (error) { + console.warn('Duplicate check failed:', error); + // Continue with import if duplicate check fails + } + + // Create the story + try { + const storyData = { + title: scrapedStory.title, + summary: scrapedStory.summary || undefined, + contentHtml: scrapedStory.content, + sourceUrl: scrapedStory.sourceUrl || trimmedUrl, + authorName: scrapedStory.author, + tagNames: scrapedStory.tags && scrapedStory.tags.length > 0 ? scrapedStory.tags : undefined, + }; + + // Use hardcoded backend URL for container-to-container communication + const createUrl = `http://backend:8080/api/stories`; + console.log(`Create story URL: ${createUrl}`); + const createResponse = await fetch(createUrl, { + method: 'POST', + headers: { + 'Authorization': authorization, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(storyData), + }); + + if (!createResponse.ok) { + const errorData = await createResponse.json(); + throw new Error(errorData.message || 'Failed to create story'); + } + + const createdStory = await createResponse.json(); + + results.push({ + url: trimmedUrl, + status: 'imported', + title: scrapedStory.title, + author: scrapedStory.author, + storyId: createdStory.id + }); + importedCount++; + + console.log(`Successfully imported: ${scrapedStory.title} by ${scrapedStory.author} (ID: ${createdStory.id})`); + + } catch (error) { + console.error(`Failed to create story for ${trimmedUrl}:`, error); + + let errorMessage = 'Failed to create story'; + if (error instanceof Error) { + errorMessage = error.message; + } + + results.push({ + url: trimmedUrl, + status: 'error', + error: errorMessage, + title: scrapedStory.title, + author: scrapedStory.author + }); + errorCount++; + } + + } catch (error) { + console.error(`Error processing URL ${url}:`, error); + + let errorMessage = 'Unknown error'; + if (error instanceof Error) { + errorMessage = error.message; + } + + results.push({ + url: url, + status: 'error', + error: errorMessage + }); + errorCount++; + } + } + + const response: BulkImportResponse = { + results, + summary: { + total: urls.length, + imported: importedCount, + skipped: skippedCount, + errors: errorCount + } + }; + + console.log(`Bulk import completed:`, response.summary); + + // Trigger Typesense reindex if any stories were imported + if (importedCount > 0) { + try { + console.log('Triggering Typesense reindex after bulk import...'); + const reindexUrl = `http://backend:8080/api/stories/reindex-typesense`; + const reindexResponse = await fetch(reindexUrl, { + method: 'POST', + headers: { + 'Authorization': authorization, + 'Content-Type': 'application/json', + }, + }); + + if (reindexResponse.ok) { + const reindexResult = await reindexResponse.json(); + console.log('Typesense reindex completed:', reindexResult); + } else { + console.warn('Typesense reindex failed:', reindexResponse.status); + } + } catch (error) { + console.warn('Failed to trigger Typesense reindex:', error); + // Don't fail the whole request if reindex fails + } + } + + return NextResponse.json(response); + + } catch (error) { + console.error('Bulk import error:', error); + + if (error instanceof Error) { + return NextResponse.json( + { error: `Bulk import failed: ${error.message}` }, + { status: 500 } + ); + } + + return NextResponse.json( + { error: 'Bulk import failed due to an unknown error' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/frontend/src/app/scrape/story/route.ts b/frontend/src/app/scrape/story/route.ts new file mode 100644 index 0000000..000c69f --- /dev/null +++ b/frontend/src/app/scrape/story/route.ts @@ -0,0 +1,85 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const { url } = body; + + if (!url || typeof url !== 'string') { + return NextResponse.json( + { error: 'URL is required and must be a string' }, + { status: 400 } + ); + } + + // Dynamic import to prevent client-side bundling + const { StoryScraper } = await import('@/lib/scraper/scraper'); + const { ScraperError } = await import('@/lib/scraper/types'); + + const scraper = new StoryScraper(); + const story = await scraper.scrapeStory(url); + + // Debug logging + console.log('Scraped story data:', { + url: url, + title: story.title, + author: story.author, + summary: story.summary, + contentLength: story.content?.length || 0, + contentPreview: story.content?.substring(0, 200) + '...', + tags: story.tags, + coverImage: story.coverImage + }); + + return NextResponse.json(story); + } catch (error) { + console.error('Story scraping error:', error); + + // Check if it's a ScraperError without importing at module level + if (error && typeof error === 'object' && error.constructor.name === 'ScraperError') { + return NextResponse.json( + { + error: (error as any).message, + url: (error as any).url + }, + { status: 400 } + ); + } + + if (error instanceof Error) { + // Handle specific error types + if (error.message.includes('Invalid URL')) { + return NextResponse.json( + { error: 'Invalid URL provided' }, + { status: 400 } + ); + } + + if (error.message.includes('Unsupported site')) { + return NextResponse.json( + { error: 'This website is not supported for scraping' }, + { status: 400 } + ); + } + + if (error.message.includes('HTTP 404')) { + return NextResponse.json( + { error: 'Story not found at the provided URL' }, + { status: 404 } + ); + } + + if (error.message.includes('timeout')) { + return NextResponse.json( + { error: 'Request timed out while fetching content' }, + { status: 408 } + ); + } + } + + return NextResponse.json( + { error: 'Failed to scrape story. Please try again.' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/frontend/src/app/stories/import/bulk/page.tsx b/frontend/src/app/stories/import/bulk/page.tsx new file mode 100644 index 0000000..c2272d0 --- /dev/null +++ b/frontend/src/app/stories/import/bulk/page.tsx @@ -0,0 +1,300 @@ +'use client'; + +import { useState } from 'react'; +import { useRouter } from 'next/navigation'; +import Link from 'next/link'; +import { ArrowLeftIcon } from '@heroicons/react/24/outline'; + +interface ImportResult { + url: string; + status: 'imported' | 'skipped' | 'error'; + reason?: string; + title?: string; + author?: string; + error?: string; + storyId?: string; +} + +interface BulkImportResponse { + results: ImportResult[]; + summary: { + total: number; + imported: number; + skipped: number; + errors: number; + }; +} + +export default function BulkImportPage() { + const router = useRouter(); + const [urls, setUrls] = useState(''); + const [isLoading, setIsLoading] = useState(false); + const [results, setResults] = useState(null); + const [error, setError] = useState(null); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + + if (!urls.trim()) { + setError('Please enter at least one URL'); + return; + } + + setIsLoading(true); + setError(null); + setResults(null); + + try { + // Parse URLs from textarea (one per line) + const urlList = urls + .split('\n') + .map(url => url.trim()) + .filter(url => url.length > 0); + + if (urlList.length === 0) { + setError('Please enter at least one valid URL'); + setIsLoading(false); + return; + } + + if (urlList.length > 50) { + setError('Maximum 50 URLs allowed per bulk import'); + setIsLoading(false); + return; + } + + // Get auth token for server-side API calls + const token = localStorage.getItem('auth-token'); + + const response = await fetch('/scrape/bulk', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': token ? `Bearer ${token}` : '', + }, + body: JSON.stringify({ urls: urlList }), + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.error || 'Bulk import failed'); + } + + const data: BulkImportResponse = await response.json(); + setResults(data); + + } catch (err) { + console.error('Bulk import error:', err); + setError(err instanceof Error ? err.message : 'Failed to import stories'); + } finally { + setIsLoading(false); + } + }; + + const handleReset = () => { + setUrls(''); + setResults(null); + setError(null); + }; + + const getStatusColor = (status: string) => { + switch (status) { + case 'imported': return 'text-green-700 bg-green-50 border-green-200'; + case 'skipped': return 'text-yellow-700 bg-yellow-50 border-yellow-200'; + case 'error': return 'text-red-700 bg-red-50 border-red-200'; + default: return 'text-gray-700 bg-gray-50 border-gray-200'; + } + }; + + const getStatusIcon = (status: string) => { + switch (status) { + case 'imported': return '✓'; + case 'skipped': return '⚠'; + case 'error': return '✗'; + default: return ''; + } + }; + + return ( +
+
+ {/* Header */} +
+
+ + + Back to Library + +
+ +

Bulk Import Stories

+

+ Import multiple stories at once by providing a list of URLs. Each URL will be scraped + and automatically added to your story collection. +

+
+ + {!results ? ( + // Import Form +
+
+ +

+ Enter one URL per line. Maximum 50 URLs per import. +

+