From 1184d08c8b5b0dea3b01dd21d4d313aab9a06e97 Mon Sep 17 00:00:00 2001 From: Joshua Ryder Date: Mon, 10 Nov 2025 19:07:50 -0500 Subject: [PATCH] Perf: Add batch transaction support for search index building MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The search index build was extremely slow (25 v/s) due to individual INSERT statements without transactions. This refactors to use batch inserts with SQLite transactions, which should increase speed by 100-1000x. Changes: - Add insertVersesBatch() method in searchDatabase.js - Use BEGIN TRANSACTION / COMMIT for batch operations - Collect all verses for a version, then insert in one transaction - Removed per-verse insert calls from buildSearchIndex.js - Batch insert ~31,000 verses per version in single transaction Expected performance improvement: - Before: 25 verses/second (~82 minutes for 124k verses) - After: 2,000-5,000 verses/second (~30-60 seconds for 124k verses) SQLite is optimized for batched transactions - this is the standard pattern for bulk data loading. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/src/buildSearchIndex.js | 56 ++++++++++++++++++--------------- backend/src/searchDatabase.js | 32 +++++++++++++++++++ 2 files changed, 62 insertions(+), 26 deletions(-) diff --git a/backend/src/buildSearchIndex.js b/backend/src/buildSearchIndex.js index 09bda22a..7efef7c9 100644 --- a/backend/src/buildSearchIndex.js +++ b/backend/src/buildSearchIndex.js @@ -176,19 +176,33 @@ class SearchIndexBuilder { console.log(`Found ${books.length} books`); + // Collect all verses for this version, then batch insert + const allVerses = []; + for (const book of books) { - await this.buildBookIndex(versionName, book, versionPath); + const bookVerses = await this.collectBookVerses(versionName, book, versionPath); + allVerses.push(...bookVerses); } + // Batch insert all verses at once (MUCH faster with transactions) + console.log(` Inserting ${allVerses.length} verses in batch...`); + await this.searchDb.insertVersesBatch(allVerses); + this.versesProcessed += allVerses.length; + + const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1); + const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0); + console.log(` ✓ ${versionName.toUpperCase()} complete: ${allVerses.length} verses (${rate} v/s total)`); + } catch (error) { console.error(`Error processing version ${versionName}:`, error); throw error; } } - // Build index for a specific book in a version - async buildBookIndex(versionName, book, versionPath) { + // Collect all verses from a book (returns array of verses) + async collectBookVerses(versionName, book, versionPath) { const bookPath = path.join(versionPath, book); + const allVerses = []; try { const files = await fs.readdir(bookPath); @@ -199,42 +213,32 @@ class SearchIndexBuilder { if (!chapterMatch) continue; const chapter = parseInt(chapterMatch[1]); - await this.buildChapterIndex(versionName, book, chapter, path.join(bookPath, chapterFile)); + const chapterVerses = await this.collectChapterVerses( + versionName, + book, + chapter, + path.join(bookPath, chapterFile) + ); + allVerses.push(...chapterVerses); } } catch (error) { // Book might not exist in this version console.log(` Skipping ${book} in ${versionName} (not found)`); } + + return allVerses; } - // Build index for a specific chapter - async buildChapterIndex(version, book, chapter, filePath) { + // Collect verses from a specific chapter (returns array of verses) + async collectChapterVerses(version, book, chapter, filePath) { try { const content = await fs.readFile(filePath, 'utf-8'); const verses = this.parseVersesFromMarkdown(content, book, chapter, version); - - // Insert all verses into the search database - for (const verse of verses) { - await this.searchDb.insertVerse( - verse.book, - verse.chapter, - verse.verse, - verse.text, - verse.version - ); - this.versesProcessed++; - - // Progress indicator every 1000 verses - if (this.versesProcessed % 1000 === 0) { - const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1); - const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0); - process.stdout.write(`\r Processed ${this.versesProcessed} verses (${rate} v/s)`); - } - } - + return verses; } catch (error) { console.error(`Error processing ${filePath}:`, error.message); + return []; } } diff --git a/backend/src/searchDatabase.js b/backend/src/searchDatabase.js index 20c98bfa..e2bd63f1 100644 --- a/backend/src/searchDatabase.js +++ b/backend/src/searchDatabase.js @@ -113,6 +113,38 @@ class SearchDatabase { }); } + // Batch insert verses (MUCH faster - uses transactions) + async insertVersesBatch(verses) { + return new Promise((resolve, reject) => { + this.db.serialize(() => { + this.db.run('BEGIN TRANSACTION'); + + const stmtVerses = this.db.prepare( + `INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version) + VALUES (?, ?, ?, ?, ?)` + ); + + const stmtFts = this.db.prepare( + `INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version) + VALUES (?, ?, ?, ?, ?)` + ); + + for (const verse of verses) { + stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version); + stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version); + } + + stmtVerses.finalize(); + stmtFts.finalize(); + + this.db.run('COMMIT', (err) => { + if (err) reject(err); + else resolve(); + }); + }); + }); + } + // Search using FTS5 with advanced features async search(query, options = {}) { const {