intialization

This commit is contained in:
Stefan Hardegger
2025-11-21 13:27:37 +01:00
parent 8a03edbb88
commit 33377009d0
9 changed files with 178062 additions and 52 deletions

View File

@@ -48,7 +48,16 @@ The specification defines 12 milestones (weeks). You MUST:
- Ask for approval before starting each new milestone - Ask for approval before starting each new milestone
- Report completion status for each milestone - Report completion status for each milestone
**Current Milestone:** 1 **Current Milestone:** 7 (Learning Interface)
**Completed Milestones:**
- ✅ Milestone 1: Foundation (Next.js, Prisma, Docker, NextAuth)
- ✅ Milestone 2: Authentication (Register, login, preferences)
- ✅ Milestones 3-4: Data Import (JSON/CSV parsers, admin UI, 14 tests)
- **Enhancement**: Database initialization system with auto-collection creation
- ✅ Milestone 5: Collections (CRUD, add/remove hanzi, 21 tests)
- ✅ Milestone 5: Hanzi Search (Search page, detail view, 16 tests)
- ✅ Milestone 6: SM-2 Algorithm (Core algorithm, 38 tests, 100% coverage)
### Rule 2: Database Schema is Fixed ### Rule 2: Database Schema is Fixed

View File

@@ -335,42 +335,126 @@ simplified,traditional,pinyin,meaning,hsk_level,radical,frequency,pos,classifier
## 9. Development Milestones ## 9. Development Milestones
### Week 1: Foundation ### Week 1: Foundation ✅ COMPLETE
- Setup Next.js 16 project - Setup Next.js 16 project
- Configure Prisma + PostgreSQL - Configure Prisma + PostgreSQL
- Setup Docker Compose - Setup Docker Compose
- Create all data models - Create all data models (18 models, 3 enums)
- Configure NextAuth.js - Configure NextAuth.js
- ✅ Middleware for route protection
- ✅ All Prisma relations implemented
- ✅ Database migrations created
- ✅ Docker containers: nginx, app, postgres
- ✅ Build successful
### Week 2: Authentication ### Week 2: Authentication ✅ COMPLETE
- Registration/login pages - Registration/login pages
- Middleware protection - Middleware protection
- User preferences - User preferences (cardsPerSession, characterDisplay, hideEnglish)
- Integration tests - Integration tests (10 tests for auth, 8 tests for preferences)
- ✅ Server Actions: register, login, updatePreferences, getPreferences
- ✅ Zod validation for all inputs
- ✅ Password hashing with bcrypt
- ✅ Session management with NextAuth.js v5
- ✅ Settings page with preferences form
### Week 3-4: Data Import ### Week 3-4: Data Import ✅ COMPLETE
- Admin role middleware - Admin role middleware
- HSK JSON parser - HSK JSON parser (`src/lib/import/json-parser.ts`)
- CSV parser - ✅ Support for complete-hsk-vocabulary format
- Import UI and actions - ✅ All transcription types (pinyin, numeric, wade-giles, zhuyin, ipa)
- Test with real HSK data - ✅ Multi-character hanzi support
- ✅ HSK level mapping (new-1 through old-6)
- ✅ CSV parser (`src/lib/import/csv-parser.ts`)
- ✅ Flexible column mapping
- ✅ Comma-separated multi-values
- ✅ Complete field validation
- ✅ Import UI and actions
- ✅ File upload and paste textarea
- ✅ Update existing or skip duplicates
- ✅ Detailed results with line-level errors
- ✅ Test with real HSK data
- ✅ 14 passing integration tests
- ✅ Admin import page at /admin/import
-**Enhancement**: Database initialization system
-`getInitializationFiles()` Server Action to list available files
- ✅ Multi-file selection for batch initialization
- ✅ SSE API endpoint (`/api/admin/initialize`) for long-running operations
- ✅ Real-time progress updates via Server-Sent Events
- ✅ Progress bar showing percent, current/total, and operation message
- ✅ Auto-create HSK level collections from hanzi level attributes
- ✅ Auto-populate collections with corresponding hanzi
- ✅ Optional clean data mode (delete all existing data)
- ✅ Admin initialization page at /admin/initialize with SSE integration
- ✅ No timeouts: processes complete.json (11K+ hanzi) smoothly
### Week 5: Collections ### Week 5: Collections ✅ COMPLETE
- Collections CRUD - Collections CRUD (Server Actions in `src/actions/collections.ts`)
- Add/remove hanzi - ✅ createCollection()
- Global HSK collections - ✅ getUserCollections()
- ✅ getCollectionById()
- ✅ updateCollection()
- ✅ deleteCollection()
- ✅ Add/remove hanzi
- ✅ addHanziToCollection() with multi-select
- ✅ removeHanziFromCollection() with bulk support
- ✅ Search & select interface
- ✅ Paste list interface (comma, space, newline separated)
- ✅ Global HSK collections
- ✅ isPublic flag for admin-created collections
- ✅ Read-only for regular users
- ✅ Full control for admins
- ✅ 21 passing integration tests
- ✅ Pages: /collections, /collections/[id], /collections/new
- ✅ Order preservation with orderIndex
### Week 5: Hanzi Search ### Week 5: Hanzi Search ✅ COMPLETE
- Search page - Search page (`/hanzi`)
- Filters (HSK level) - ✅ Query input for simplified, traditional, pinyin, meaning
- Hanzi detail view - ✅ Case-insensitive search
- Pagination - ✅ Multi-character support
- ✅ Filters (HSK level)
- ✅ 12 HSK levels (new-1 through new-6, old-1 through old-6)
- ✅ Dynamic filtering on hskLevels relation
- ✅ Hanzi detail view (`/hanzi/[id]`)
- ✅ Large character display
- ✅ All forms with isDefault indicator
- ✅ All transcriptions grouped by type
- ✅ All meanings with language codes
- ✅ HSK level badges, parts of speech
- ✅ Classifiers, radical, frequency
- ✅ Add to collection button with modal
- ✅ Pagination
- ✅ 20 results per page
- ✅ hasMore indicator (limit+1 pattern)
- ✅ Previous/Next controls
- ✅ 16 passing integration tests
- ✅ Public access (no authentication required)
- ✅ Server Actions: searchHanzi(), getHanzi(), getHanziBySimplified()
### Week 6: SM-2 Algorithm ### Week 6: SM-2 Algorithm ✅ COMPLETE
- Implement algorithm - Implement algorithm (`src/lib/learning/sm2.ts`)
- Card selection logic - ✅ calculateCorrectAnswer() with exact formulas
- Progress tracking - ✅ calculateIncorrectAnswer() with exact formulas
- Unit tests (90%+ coverage) - ✅ Initial values: easeFactor=2.5, interval=1, consecutiveCorrect=0
- ✅ Correct answer intervals: 1, 6, then interval × easeFactor
- ✅ Incorrect answer: reset to 1 day, decrease easeFactor
- ✅ Card selection logic
- ✅ selectCardsForSession() with priority sorting
- ✅ Filter SUSPENDED cards
- ✅ Priority: HARD > NORMAL > EASY
- ✅ Sort: nextReviewDate ASC, incorrectCount DESC, consecutiveCorrect ASC
- ✅ Wrong answer generation
- ✅ generateWrongAnswers() selects 3 from same HSK level
- ✅ Fisher-Yates shuffle for randomization
- ✅ shuffleOptions() for answer position randomization
- ✅ Unit tests (38 tests, 100% coverage)
- ✅ Test all calculation formulas
- ✅ Test edge cases (minimum easeFactor, large intervals, etc.)
- ✅ Test card selection with all sorting criteria
- ✅ Test wrong answer generation
- ✅ 100% statement and line coverage
- ✅ 94.11% branch coverage (exceeds 90% requirement)
### Week 7-8: Learning Interface ### Week 7-8: Learning Interface
- Learning session pages - Learning session pages

151
README.md
View File

@@ -112,19 +112,144 @@ Implement ALL models exactly as specified in the Prisma schema.
## 📊 Development Milestones ## 📊 Development Milestones
| Week | Milestone | Focus | | Week | Milestone | Focus | Status |
|------|-----------|-------| |------|-----------|-------|--------|
| 1 | Foundation | Setup project, Docker, Prisma schema | | 1 | Foundation | Setup project, Docker, Prisma schema | ✅ Complete |
| 2 | Authentication | User registration, login, preferences | | 2 | Authentication | User registration, login, preferences | ✅ Complete |
| 3-4 | Data Import | Admin imports HSK data (JSON/CSV) | | 3-4 | Data Import | Admin imports HSK data (JSON/CSV) | ✅ Complete |
| 5 | Collections | User collections + global HSK collections | | 5 | Collections | User collections + global HSK collections | ✅ Complete |
| 5 | Hanzi Search | Search interface and detail views | | 5 | Hanzi Search | Search interface and detail views | ✅ Complete |
| 6 | SM-2 Algorithm | Core learning algorithm + tests | | 6 | SM-2 Algorithm | Core learning algorithm + tests | ✅ Complete |
| 7-8 | Learning UI | Learning session interface | | 7-8 | Learning UI | Learning session interface | 🔄 Next |
| 9 | Dashboard | Progress tracking and visualizations | | 9 | Dashboard | Progress tracking and visualizations | |
| 10 | UI Polish | Responsive design, dark mode | | 10 | UI Polish | Responsive design, dark mode | |
| 11 | Testing & Docs | Complete test coverage | | 11 | Testing & Docs | Complete test coverage | |
| 12 | Deployment | Production deployment + data import | | 12 | Deployment | Production deployment + data import | |
### ✅ Milestone 3 Completed Features
**Data Import System:**
- ✅ HSK JSON parser supporting complete-hsk-vocabulary format
- ✅ CSV parser with flexible column mapping
- ✅ Admin import page with file upload and paste functionality
- ✅ Update existing entries or skip duplicates option
- ✅ Detailed import results with success/failure counts and line-level errors
- ✅ Format validation and error reporting
- ✅ Support for multi-character hanzi (words like 中国)
- ✅ All transcription types (pinyin, numeric, wade-giles, zhuyin, ipa)
- ✅ 14 passing integration tests for both JSON and CSV parsers
**Database Initialization System:**
- ✅ Multi-file selection for batch initialization
- ✅ Real-time progress updates via Server-Sent Events (SSE)
- ✅ Progress bar showing current operation and percentage
- ✅ Automatic HSK level collection creation
- ✅ Auto-populate collections with hanzi based on level attribute
- ✅ Optional clean data mode (delete all existing data before import)
- ✅ Comprehensive statistics: hanzi imported, collections created, items added
- ✅ Admin initialization page at /admin/initialize
- ✅ SSE API route at /api/admin/initialize for long-running operations
**Files Created:**
- `src/lib/import/json-parser.ts` - HSK JSON format parser
- `src/lib/import/csv-parser.ts` - CSV format parser
- `src/lib/import/json-parser.test.ts` - JSON parser tests
- `src/lib/import/csv-parser.test.ts` - CSV parser tests
- `src/actions/admin.ts` - Admin-only import and initialization actions
- `src/actions/admin.integration.test.ts` - Admin action tests
- `src/app/(admin)/admin/import/page.tsx` - Import UI
- `src/app/(admin)/admin/initialize/page.tsx` - Initialization UI with SSE progress
- `src/app/api/admin/initialize/route.ts` - SSE API endpoint for real-time progress
### ✅ Milestone 4 Completed Features
**Collections Management:**
- ✅ Complete CRUD operations for collections (create, read, update, delete)
- ✅ Global HSK collections (admin-created, read-only for users)
- ✅ User personal collections (full control)
- ✅ Add hanzi to collections via:
- Search & multi-select with checkboxes
- Paste list (comma, space, or newline separated)
- Create collection with hanzi list
- ✅ Remove hanzi (individual and bulk selection)
- ✅ Collection detail view with hanzi list
- ✅ Order preservation for added hanzi
- ✅ Duplicate detection and validation
- ✅ 21 passing integration tests
**Files Created:**
- `src/actions/collections.ts` - Collection Server Actions
- `src/actions/collections.integration.test.ts` - Complete test suite
- `src/app/(app)/collections/page.tsx` - Collections list page
- `src/app/(app)/collections/[id]/page.tsx` - Collection detail page
- `src/app/(app)/collections/new/page.tsx` - Create collection page
### ✅ Milestone 5 Completed Features
**Hanzi Search & Detail Views:**
- ✅ Public hanzi search (no authentication required)
- ✅ Search by simplified, traditional, pinyin, or meaning
- ✅ HSK level filtering (12 levels: new-1 through new-6, old-1 through old-6)
- ✅ Pagination with hasMore indicator (20 results per page)
- ✅ Comprehensive detail view showing:
- All forms (simplified, traditional with isDefault indicator)
- All transcriptions (pinyin, numeric, wade-giles, etc.)
- All meanings with language codes
- HSK level badges
- Parts of speech
- Classifiers, radical, frequency
- ✅ Add to collection from detail page
- ✅ 16 passing integration tests
**Files Created:**
- `src/actions/hanzi.ts` - Public hanzi search actions
- `src/app/(app)/hanzi/page.tsx` - Search page with filters
- `src/app/(app)/hanzi/[id]/page.tsx` - Detail page with all data
- `src/actions/hanzi.integration.test.ts` - Complete test suite
**Key Features:**
- searchHanzi(): Fuzzy search across simplified, traditional, pinyin, and meanings
- HSK level filtering for targeted vocabulary
- Pagination with hasMore indicator for infinite scroll support
- Complete hanzi data display including rare transcription types
- Direct integration with collections (add from detail page)
### ✅ Milestone 6 Completed Features
**SM-2 Algorithm Implementation:**
- ✅ Core SM-2 spaced repetition algorithm following SuperMemo specification
- ✅ Exact formulas for correct and incorrect answer calculations
- ✅ Initial values: easeFactor=2.5, interval=1, consecutiveCorrect=0
- ✅ Correct answer logic:
- First correct: interval = 1 day
- Second correct: interval = 6 days
- Third+ correct: interval = Math.round(interval × easeFactor)
- Increase easeFactor by 0.1 with each correct answer
- ✅ Incorrect answer logic:
- Reset interval to 1 day
- Reset consecutiveCorrect to 0
- Decrease easeFactor by 0.2 (minimum 1.3)
- Increment incorrectCount
- ✅ Card selection algorithm:
- Filter out SUSPENDED cards
- Select due cards (nextReviewDate ≤ now)
- Priority: HARD > NORMAL > EASY
- Sort by: nextReviewDate ASC, incorrectCount DESC, consecutiveCorrect ASC
- Limit to cardsPerSession
- ✅ Wrong answer generation with Fisher-Yates shuffle
- ✅ 38 passing unit tests with 100% statement and line coverage
- ✅ 94.11% branch coverage (exceeds 90% requirement)
**Files Created:**
- `src/lib/learning/sm2.ts` - Core algorithm implementation
- `src/lib/learning/sm2.test.ts` - Comprehensive unit tests
**Functions Implemented:**
- `calculateCorrectAnswer()` - Update progress for correct answers
- `calculateIncorrectAnswer()` - Update progress for incorrect answers
- `selectCardsForSession()` - Select due cards with priority sorting
- `generateWrongAnswers()` - Generate 3 incorrect options from same HSK level
- `shuffleOptions()` - Fisher-Yates shuffle for randomizing answer positions
## 🎨 Naming Conventions ## 🎨 Naming Conventions

File diff suppressed because it is too large Load Diff

View File

@@ -44,6 +44,11 @@ const toggleUserStatusSchema = z.object({
userId: z.string().min(1), userId: z.string().min(1),
}) })
const initializeDatabaseSchema = z.object({
fileNames: z.array(z.string()).min(1, "At least one file is required"),
cleanData: z.boolean().default(false),
})
// ============================================================================ // ============================================================================
// GLOBAL COLLECTIONS // GLOBAL COLLECTIONS
// ============================================================================ // ============================================================================
@@ -517,3 +522,250 @@ export async function toggleUserStatus(userId: string): Promise<ActionResult> {
} }
} }
} }
// ============================================================================
// DATABASE INITIALIZATION
// ============================================================================
/**
* Get list of available initialization files
*/
export async function getInitializationFiles(): Promise<ActionResult<string[]>> {
try {
await requireAdmin()
const fs = await import("fs/promises")
const path = await import("path")
const dirPath = path.join(process.cwd(), "data", "initialization")
try {
const files = await fs.readdir(dirPath)
// Filter for JSON files only
const jsonFiles = files.filter(file => file.endsWith(".json"))
return {
success: true,
data: jsonFiles,
}
} catch (error) {
return {
success: false,
message: "Initialization directory not found",
data: [],
}
}
} catch (error) {
if (error instanceof Error && error.message.startsWith("Unauthorized")) {
return {
success: false,
message: error.message,
}
}
return {
success: false,
message: "Failed to get initialization files",
}
}
}
/**
* Initialize database with hanzi and collections from a JSON file
*
* NOTE: This is kept for backwards compatibility but the API route
* /api/admin/initialize should be used for real-time progress updates
*
* This function:
* 1. Optionally cleans all hanzi and collections
* 2. Imports hanzi from the specified file
* 3. Creates collections for each unique HSK level found
* 4. Adds hanzi to their corresponding level collections
*
* @param fileName - Name of the file in data/initialization/ (e.g., "complete.json")
* @param cleanData - If true, deletes all hanzi and collections before import
* @returns ActionResult with import statistics
*/
export async function initializeDatabase(
fileName: string,
cleanData: boolean = false
): Promise<ActionResult<{
imported: number
collectionsCreated: number
hanziAddedToCollections: number
}>> {
try {
await requireAdmin()
const validation = initializeDatabaseSchema.safeParse({ fileName, cleanData })
if (!validation.success) {
return {
success: false,
message: "Validation failed",
errors: validation.error.flatten().fieldErrors,
}
}
// Read file from filesystem
const fs = await import("fs/promises")
const path = await import("path")
const filePath = path.join(process.cwd(), "data", "initialization", fileName)
let fileData: string
try {
fileData = await fs.readFile(filePath, "utf-8")
} catch (error) {
return {
success: false,
message: `Failed to read file: ${fileName}`,
}
}
// Parse the JSON file
const { result: parseResult, data: parsedData } = parseHSKJson(fileData)
if (!parseResult.success) {
return {
success: false,
message: `Parse failed: ${parseResult.errors.length} errors found`,
}
}
// Clean data if requested
if (cleanData) {
// Delete all collection items first (foreign key constraint)
await prisma.collectionItem.deleteMany({})
// Delete all collections
await prisma.collection.deleteMany({})
// Delete all user hanzi progress
await prisma.userHanziProgress.deleteMany({})
// Delete all session reviews
await prisma.sessionReview.deleteMany({})
// Delete all learning sessions
await prisma.learningSession.deleteMany({})
// Delete all hanzi-related data
await prisma.hanziMeaning.deleteMany({})
await prisma.hanziTranscription.deleteMany({})
await prisma.hanziClassifier.deleteMany({})
await prisma.hanziForm.deleteMany({})
await prisma.hanziHSKLevel.deleteMany({})
await prisma.hanziPOS.deleteMany({})
await prisma.hanzi.deleteMany({})
}
// Import hanzi
await saveParsedHanzi(parsedData, true)
// Extract all unique HSK levels from the parsed data
const uniqueLevels = new Set<string>()
parsedData.forEach(hanzi => {
hanzi.hskLevels.forEach(level => {
uniqueLevels.add(level)
})
})
// Create collections for each level (if they don't exist)
const levelCollections = new Map<string, string>() // level -> collectionId
let collectionsCreated = 0
for (const level of uniqueLevels) {
// Check if collection already exists
const existingCollection = await prisma.collection.findFirst({
where: {
name: `HSK ${level}`,
isPublic: true,
},
})
if (existingCollection) {
levelCollections.set(level, existingCollection.id)
} else {
// Create new collection
const session = await auth()
const newCollection = await prisma.collection.create({
data: {
name: `HSK ${level}`,
description: `HSK ${level} vocabulary collection`,
isPublic: true,
createdBy: session?.user?.id,
},
})
levelCollections.set(level, newCollection.id)
collectionsCreated++
}
}
// Add hanzi to their corresponding collections
let hanziAddedToCollections = 0
for (const hanzi of parsedData) {
// Find the hanzi record in the database
const hanziRecord = await prisma.hanzi.findUnique({
where: { simplified: hanzi.simplified },
})
if (!hanziRecord) continue
// Add to each level collection
for (const level of hanzi.hskLevels) {
const collectionId = levelCollections.get(level)
if (!collectionId) continue
// Check if already in collection
const existingItem = await prisma.collectionItem.findUnique({
where: {
collectionId_hanziId: {
collectionId,
hanziId: hanziRecord.id,
},
},
})
if (!existingItem) {
// Get the next orderIndex
const maxOrderIndex = await prisma.collectionItem.findFirst({
where: { collectionId },
orderBy: { orderIndex: "desc" },
select: { orderIndex: true },
})
await prisma.collectionItem.create({
data: {
collectionId,
hanziId: hanziRecord.id,
orderIndex: (maxOrderIndex?.orderIndex ?? -1) + 1,
},
})
hanziAddedToCollections++
}
}
}
revalidatePath("/collections")
revalidatePath("/hanzi")
return {
success: true,
data: {
imported: parseResult.imported,
collectionsCreated,
hanziAddedToCollections,
},
message: `Successfully initialized: ${parseResult.imported} hanzi imported, ${collectionsCreated} collections created, ${hanziAddedToCollections} hanzi added to collections`,
}
} catch (error) {
if (error instanceof Error && error.message.startsWith("Unauthorized")) {
return {
success: false,
message: error.message,
}
}
console.error("Database initialization error:", error)
return {
success: false,
message: "Failed to initialize database",
}
}
}

View File

@@ -1,6 +1,7 @@
"use client" "use client"
import { useState } from "react" import { useState } from "react"
import Link from "next/link"
import { importHanzi } from "@/actions/admin" import { importHanzi } from "@/actions/admin"
import type { ImportResult } from "@/lib/import/types" import type { ImportResult } from "@/lib/import/types"
@@ -79,7 +80,18 @@ export default function AdminImportPage() {
return ( return (
<div className="container mx-auto px-4 py-8 max-w-4xl"> <div className="container mx-auto px-4 py-8 max-w-4xl">
<h1 className="text-3xl font-bold mb-6">Import Hanzi Data</h1> <div className="mb-6">
<h1 className="text-3xl font-bold mb-2">Import Hanzi Data</h1>
<div className="flex gap-4 text-sm">
<span className="text-blue-600 dark:text-blue-400 font-semibold">Import</span>
<Link
href="/admin/initialize"
className="text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200"
>
Initialize
</Link>
</div>
</div>
<div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6 mb-6"> <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6 mb-6">
<h2 className="text-xl font-semibold mb-4">Import Data</h2> <h2 className="text-xl font-semibold mb-4">Import Data</h2>

View File

@@ -0,0 +1,305 @@
"use client"
import { useState, useEffect } from "react"
import Link from "next/link"
import { getInitializationFiles } from "@/actions/admin"
interface ProgressData {
percent: number
current: number
total: number
message: string
}
interface CompleteData {
imported: number
collectionsCreated: number
hanziAddedToCollections: number
}
export default function AdminInitializePage() {
const [availableFiles, setAvailableFiles] = useState<string[]>([])
const [selectedFiles, setSelectedFiles] = useState<string[]>([])
const [cleanData, setCleanData] = useState(false)
const [loading, setLoading] = useState(false)
const [progress, setProgress] = useState<ProgressData | null>(null)
const [result, setResult] = useState<CompleteData | null>(null)
const [error, setError] = useState<string | null>(null)
// Load available files on mount
useEffect(() => {
const loadFiles = async () => {
const response = await getInitializationFiles()
if (response.success && response.data) {
setAvailableFiles(response.data)
// Auto-select complete.json if available
if (response.data.includes("complete.json")) {
setSelectedFiles(["complete.json"])
}
}
}
loadFiles()
}, [])
// Debug: Log when progress changes
useEffect(() => {
if (progress) {
console.log("Progress state changed in UI:", progress)
}
}, [progress])
const toggleFileSelection = (fileName: string) => {
setSelectedFiles(prev =>
prev.includes(fileName)
? prev.filter(f => f !== fileName)
: [...prev, fileName]
)
}
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault()
if (selectedFiles.length === 0) {
setError("Please select at least one file")
return
}
setLoading(true)
setError(null)
setResult(null)
setProgress({ percent: 0, current: 0, total: 0, message: "Starting..." })
try {
// Create EventSource connection to SSE endpoint
const response = await fetch("/api/admin/initialize", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ fileNames: selectedFiles, cleanData }),
})
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`)
}
const reader = response.body?.getReader()
const decoder = new TextDecoder()
if (!reader) {
throw new Error("No response body")
}
let buffer = ""
while (true) {
const { done, value } = await reader.read()
if (done) break
const chunk = decoder.decode(value, { stream: true })
console.log("Received chunk:", chunk.substring(0, 100)) // Debug: show first 100 chars
buffer += chunk
const lines = buffer.split("\n\n")
buffer = lines.pop() || ""
for (const line of lines) {
if (!line.trim()) continue
const eventMatch = line.match(/^event: (.+)$/)
const dataMatch = line.match(/^data: (.+)$/m)
if (eventMatch && dataMatch) {
const event = eventMatch[1]
const data = JSON.parse(dataMatch[1])
console.log("SSE Event:", event, data) // Debug logging
if (event === "progress") {
setProgress({ ...data }) // Create new object to force re-render
console.log("Updated progress state:", data)
} else if (event === "complete") {
setResult(data)
setProgress({ percent: 100, current: data.imported, total: data.imported, message: "Complete!" })
setLoading(false)
console.log("Completed!")
} else if (event === "error") {
setError(data.message)
setLoading(false)
console.log("Error:", data.message)
}
}
}
}
} catch (err) {
setError(err instanceof Error ? err.message : "An error occurred")
setLoading(false)
}
}
return (
<div className="container mx-auto px-4 py-8 max-w-4xl">
<div className="mb-6">
<h1 className="text-3xl font-bold mb-2">Initialize Database</h1>
<div className="flex gap-4 text-sm">
<Link
href="/admin/import"
className="text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200"
>
Import
</Link>
<span className="text-blue-600 dark:text-blue-400 font-semibold">Initialize</span>
</div>
</div>
<div className="bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-lg p-4 mb-6">
<h3 className="text-lg font-semibold text-yellow-800 dark:text-yellow-200 mb-2">
Warning
</h3>
<p className="text-yellow-700 dark:text-yellow-300">
This operation will import all hanzi from the selected files and create HSK level collections.
{cleanData && (
<span className="block mt-2 font-semibold">
With "Clean data" enabled, ALL existing hanzi, collections, and user progress will be PERMANENTLY DELETED before import.
</span>
)}
</p>
</div>
<div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6 mb-6">
<h2 className="text-xl font-semibold mb-4">Initialization Settings</h2>
<form onSubmit={handleSubmit} className="space-y-4">
<div>
<label className="block text-sm font-medium mb-2">
Select Files (select multiple)
</label>
<div className="border border-gray-300 dark:border-gray-600 rounded-md p-3 max-h-64 overflow-y-auto">
{availableFiles.length === 0 ? (
<p className="text-sm text-gray-500">No files available in data/initialization/</p>
) : (
<div className="space-y-2">
{availableFiles.map(fileName => (
<label key={fileName} className="flex items-center hover:bg-gray-50 dark:hover:bg-gray-700 p-2 rounded cursor-pointer">
<input
type="checkbox"
checked={selectedFiles.includes(fileName)}
onChange={() => toggleFileSelection(fileName)}
disabled={loading}
className="mr-3 h-4 w-4"
/>
<span className="text-sm">{fileName}</span>
</label>
))}
</div>
)}
</div>
{selectedFiles.length > 0 && (
<p className="text-sm text-gray-600 dark:text-gray-400 mt-2">
{selectedFiles.length} file(s) selected: {selectedFiles.join(", ")}
</p>
)}
</div>
<div>
<label className="flex items-center">
<input
type="checkbox"
checked={cleanData}
onChange={(e) => setCleanData(e.target.checked)}
disabled={loading}
className="mr-2 h-4 w-4"
/>
<span className="text-sm font-medium">
Clean data before import (Delete all existing data)
</span>
</label>
<p className="text-sm text-gray-600 dark:text-gray-400 mt-1 ml-6">
Warning: This will delete all hanzi, collections, user progress, and learning sessions
</p>
</div>
<button
type="submit"
disabled={loading || selectedFiles.length === 0}
className={`w-full py-2 px-4 rounded-md font-medium ${
loading || selectedFiles.length === 0
? "bg-gray-300 dark:bg-gray-600 cursor-not-allowed"
: cleanData
? "bg-red-600 hover:bg-red-700 text-white"
: "bg-blue-600 hover:bg-blue-700 text-white"
}`}
>
{loading ? "Initializing..." : cleanData ? "Clean & Initialize Database" : "Initialize Database"}
</button>
</form>
</div>
{progress && (
<div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6 mb-6" key={`progress-${progress.percent}-${progress.current}`}>
<h3 className="text-lg font-semibold mb-4">Progress</h3>
<div className="space-y-3">
<div>
<div className="flex justify-between text-sm mb-1">
<span>{progress.message}</span>
<span className="font-semibold">{progress.percent}%</span>
</div>
<div className="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-4 overflow-hidden">
<div
className="bg-blue-600 h-4 rounded-full transition-all duration-300 ease-out"
style={{ width: `${progress.percent}%` }}
key={progress.percent}
/>
</div>
</div>
{progress.total > 0 && (
<p className="text-sm text-gray-600 dark:text-gray-400">
{progress.current.toLocaleString()} / {progress.total.toLocaleString()}
</p>
)}
</div>
</div>
)}
{error && (
<div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-4 mb-6">
<h3 className="text-lg font-semibold text-red-800 dark:text-red-200 mb-2">
Error
</h3>
<p className="text-red-700 dark:text-red-300">{error}</p>
</div>
)}
{result && (
<div className="bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-lg p-4">
<h3 className="text-lg font-semibold text-green-800 dark:text-green-200 mb-2">
Initialization Complete
</h3>
<div className="text-green-700 dark:text-green-300 space-y-2">
<p>
<strong>Hanzi imported:</strong> {result.imported.toLocaleString()}
</p>
<p>
<strong>Collections created:</strong> {result.collectionsCreated}
</p>
<p>
<strong>Hanzi added to collections:</strong> {result.hanziAddedToCollections.toLocaleString()}
</p>
</div>
</div>
)}
<div className="bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-lg p-4 mt-6">
<h3 className="text-lg font-semibold text-blue-800 dark:text-blue-200 mb-2">
How It Works
</h3>
<ul className="text-blue-700 dark:text-blue-300 space-y-2 list-disc list-inside">
<li>Reads hanzi data from selected JSON files in data/initialization/</li>
<li>Optionally cleans all existing data (hanzi, collections, progress)</li>
<li>Imports all hanzi with their forms, meanings, and transcriptions</li>
<li>Creates public collections for each unique HSK level found (e.g., "HSK new-1", "HSK old-3")</li>
<li>Adds each hanzi to its corresponding level collection(s)</li>
<li>Shows real-time progress updates during the entire process</li>
</ul>
</div>
</div>
)
}

View File

@@ -46,12 +46,20 @@ export default async function DashboardPage() {
Settings Settings
</Link> </Link>
{(user.role === 'ADMIN' || user.role === 'MODERATOR') && ( {(user.role === 'ADMIN' || user.role === 'MODERATOR') && (
<>
<Link <Link
href="/admin/import" href="/admin/import"
className="text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200" className="text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200"
> >
Import Import
</Link> </Link>
<Link
href="/admin/initialize"
className="text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200"
>
Initialize
</Link>
</>
)} )}
</div> </div>
<div className="flex items-center space-x-4"> <div className="flex items-center space-x-4">

View File

@@ -0,0 +1,330 @@
import { NextRequest } from "next/server"
import { isAdminOrModerator } from "@/lib/auth"
import { auth } from "@/lib/auth"
import { prisma } from "@/lib/prisma"
import { parseHSKJson } from "@/lib/import/hsk-json-parser"
import type { ParsedHanzi } from "@/lib/import/types"
/**
* SSE endpoint for database initialization with real-time progress updates
*
* POST /api/admin/initialize
* Body: { fileNames: string[], cleanData: boolean }
*
* Returns Server-Sent Events stream with progress updates:
* - progress: { percent, current, total, message }
* - complete: { imported, collectionsCreated, hanziAddedToCollections }
* - error: { message }
*/
export async function POST(req: NextRequest) {
try {
// Check admin authorization
const isAuthorized = await isAdminOrModerator()
if (!isAuthorized) {
return new Response("Unauthorized", { status: 401 })
}
const session = await auth()
const body = await req.json()
const { fileNames, cleanData } = body as { fileNames: string[]; cleanData: boolean }
if (!fileNames || fileNames.length === 0) {
return new Response("No files specified", { status: 400 })
}
// Create SSE stream with proper flushing
const encoder = new TextEncoder()
const { readable, writable } = new TransformStream()
const writer = writable.getWriter()
// Start processing in background (don't await - let it run while streaming)
const processInitialization = async () => {
try {
// Helper to send SSE events with immediate flush
const sendEvent = async (event: string, data: any) => {
const message = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`
await writer.write(encoder.encode(message))
}
// Step 1: Read and parse all files
await sendEvent("progress", { percent: 0, current: 0, total: 0, message: "Reading files..." })
const fs = await import("fs/promises")
const path = await import("path")
const allParsedHanzi: ParsedHanzi[] = []
let totalFiles = fileNames.length
let filesProcessed = 0
for (const fileName of fileNames) {
const filePath = path.join(process.cwd(), "data", "initialization", fileName)
try {
const fileData = await fs.readFile(filePath, "utf-8")
const { result, data } = parseHSKJson(fileData)
if (!result.success) {
await sendEvent("error", { message: `Failed to parse ${fileName}: ${result.errors.join(", ")}` })
await writer.close()
return
}
allParsedHanzi.push(...data)
filesProcessed++
await sendEvent("progress", {
percent: Math.round((filesProcessed / totalFiles) * 10),
current: filesProcessed,
total: totalFiles,
message: `Parsed ${fileName} (${data.length} hanzi)`,
})
} catch (error) {
await sendEvent("error", { message: `Failed to read ${fileName}` })
await writer.close()
return
}
}
const totalHanzi = allParsedHanzi.length
await sendEvent("progress", {
percent: 10,
current: 0,
total: totalHanzi,
message: `Ready to import ${totalHanzi} hanzi`,
})
// Step 2: Clean data if requested
if (cleanData) {
await sendEvent("progress", { percent: 15, current: 0, total: totalHanzi, message: "Cleaning existing data..." })
await prisma.collectionItem.deleteMany({})
await prisma.collection.deleteMany({})
await prisma.userHanziProgress.deleteMany({})
await prisma.sessionReview.deleteMany({})
await prisma.learningSession.deleteMany({})
await prisma.hanziMeaning.deleteMany({})
await prisma.hanziTranscription.deleteMany({})
await prisma.hanziClassifier.deleteMany({})
await prisma.hanziForm.deleteMany({})
await prisma.hanziHSKLevel.deleteMany({})
await prisma.hanziPOS.deleteMany({})
await prisma.hanzi.deleteMany({})
await sendEvent("progress", { percent: 20, current: 0, total: totalHanzi, message: "Data cleaned" })
} else {
await sendEvent("progress", { percent: 20, current: 0, total: totalHanzi, message: "Skipping clean" })
}
// Step 3: Get or create English language
let englishLanguage = await prisma.language.findUnique({ where: { code: "en" } })
if (!englishLanguage) {
englishLanguage = await prisma.language.create({
data: { code: "en", name: "English", nativeName: "English", isActive: true },
})
}
// Step 4: Import hanzi with progress updates
let imported = 0
const importStartPercent = 20
const importEndPercent = 70
for (const hanzi of allParsedHanzi) {
// Import hanzi (same logic as saveParsedHanzi but inline)
const hanziRecord = await prisma.hanzi.upsert({
where: { simplified: hanzi.simplified },
update: { radical: hanzi.radical, frequency: hanzi.frequency },
create: { simplified: hanzi.simplified, radical: hanzi.radical, frequency: hanzi.frequency },
})
// Delete existing related data
await prisma.hanziForm.deleteMany({ where: { hanziId: hanziRecord.id } })
await prisma.hanziHSKLevel.deleteMany({ where: { hanziId: hanziRecord.id } })
await prisma.hanziPOS.deleteMany({ where: { hanziId: hanziRecord.id } })
// Create HSK levels
if (hanzi.hskLevels.length > 0) {
await prisma.hanziHSKLevel.createMany({
data: hanzi.hskLevels.map(level => ({ hanziId: hanziRecord.id, level })),
})
}
// Create parts of speech
if (hanzi.partsOfSpeech.length > 0) {
await prisma.hanziPOS.createMany({
data: hanzi.partsOfSpeech.map(pos => ({ hanziId: hanziRecord.id, pos })),
})
}
// Create forms with transcriptions, meanings, classifiers
for (const form of hanzi.forms) {
const formRecord = await prisma.hanziForm.create({
data: { hanziId: hanziRecord.id, traditional: form.traditional, isDefault: form.isDefault },
})
if (form.transcriptions.length > 0) {
await prisma.hanziTranscription.createMany({
data: form.transcriptions.map(trans => ({
formId: formRecord.id,
type: trans.type,
value: trans.value,
})),
})
}
if (form.meanings.length > 0) {
await prisma.hanziMeaning.createMany({
data: form.meanings.map(meaning => ({
formId: formRecord.id,
languageId: englishLanguage!.id,
meaning: meaning.meaning,
})),
})
}
if (form.classifiers.length > 0) {
await prisma.hanziClassifier.createMany({
data: form.classifiers.map(classifier => ({
formId: formRecord.id,
classifier: classifier,
})),
})
}
}
imported++
// Send progress update every 50 hanzi or on last one
if (imported % 50 === 0 || imported === totalHanzi) {
const percent = importStartPercent + Math.round(((imported / totalHanzi) * (importEndPercent - importStartPercent)))
await sendEvent("progress", {
percent,
current: imported,
total: totalHanzi,
message: `Importing hanzi: ${imported}/${totalHanzi}`,
})
}
}
await sendEvent("progress", { percent: 70, current: totalHanzi, total: totalHanzi, message: "All hanzi imported" })
// Step 5: Extract unique HSK levels and create collections
await sendEvent("progress", { percent: 75, current: 0, total: 0, message: "Creating collections..." })
const uniqueLevels = new Set<string>()
allParsedHanzi.forEach(hanzi => {
hanzi.hskLevels.forEach(level => uniqueLevels.add(level))
})
const levelCollections = new Map<string, string>()
let collectionsCreated = 0
for (const level of uniqueLevels) {
const existingCollection = await prisma.collection.findFirst({
where: { name: `HSK ${level}`, isPublic: true },
})
if (existingCollection) {
levelCollections.set(level, existingCollection.id)
} else {
const newCollection = await prisma.collection.create({
data: {
name: `HSK ${level}`,
description: `HSK ${level} vocabulary collection`,
isPublic: true,
createdBy: session?.user?.id,
},
})
levelCollections.set(level, newCollection.id)
collectionsCreated++
}
}
await sendEvent("progress", {
percent: 80,
current: collectionsCreated,
total: uniqueLevels.size,
message: `Created ${collectionsCreated} collections`,
})
// Step 6: Add hanzi to collections
await sendEvent("progress", { percent: 85, current: 0, total: totalHanzi, message: "Adding hanzi to collections..." })
let hanziAddedToCollections = 0
let processedForCollections = 0
for (const hanzi of allParsedHanzi) {
const hanziRecord = await prisma.hanzi.findUnique({ where: { simplified: hanzi.simplified } })
if (!hanziRecord) continue
for (const level of hanzi.hskLevels) {
const collectionId = levelCollections.get(level)
if (!collectionId) continue
const existingItem = await prisma.collectionItem.findUnique({
where: { collectionId_hanziId: { collectionId, hanziId: hanziRecord.id } },
})
if (!existingItem) {
const maxOrderIndex = await prisma.collectionItem.findFirst({
where: { collectionId },
orderBy: { orderIndex: "desc" },
select: { orderIndex: true },
})
await prisma.collectionItem.create({
data: {
collectionId,
hanziId: hanziRecord.id,
orderIndex: (maxOrderIndex?.orderIndex ?? -1) + 1,
},
})
hanziAddedToCollections++
}
}
processedForCollections++
// Send progress update every 100 hanzi
if (processedForCollections % 100 === 0 || processedForCollections === totalHanzi) {
const percent = 85 + Math.round(((processedForCollections / totalHanzi) * 15))
await sendEvent("progress", {
percent,
current: processedForCollections,
total: totalHanzi,
message: `Adding to collections: ${processedForCollections}/${totalHanzi}`,
})
}
}
// Step 7: Complete
await sendEvent("complete", {
imported: totalHanzi,
collectionsCreated,
hanziAddedToCollections,
})
await writer.close()
} catch (error) {
console.error("Initialization error:", error)
const message = `event: error\ndata: ${JSON.stringify({ message: "Initialization failed" })}\n\n`
await writer.write(encoder.encode(message))
await writer.close()
}
}
// Start processing (don't await - let it stream)
processInitialization()
return new Response(readable, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache, no-transform",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", // Disable nginx buffering
},
})
} catch (error) {
console.error("API error:", error)
return new Response("Internal server error", { status: 500 })
}
}