Perf: Add batch transaction support for search index building

The search index build was extremely slow (25 v/s) due to individual INSERT
statements without transactions. This refactors to use batch inserts with
SQLite transactions, which should increase speed by 100-1000x.

Changes:
- Add insertVersesBatch() method in searchDatabase.js
- Use BEGIN TRANSACTION / COMMIT for batch operations
- Collect all verses for a version, then insert in one transaction
- Removed per-verse insert calls from buildSearchIndex.js
- Batch insert ~31,000 verses per version in single transaction

Expected performance improvement:
- Before: 25 verses/second (~82 minutes for 124k verses)
- After: 2,000-5,000 verses/second (~30-60 seconds for 124k verses)

SQLite is optimized for batched transactions - this is the standard pattern
for bulk data loading.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-10 19:07:50 -05:00
parent ec5846631e
commit 1184d08c8b
2 changed files with 62 additions and 26 deletions

View File

@@ -176,19 +176,33 @@ class SearchIndexBuilder {
console.log(`Found ${books.length} books`); console.log(`Found ${books.length} books`);
// Collect all verses for this version, then batch insert
const allVerses = [];
for (const book of books) { for (const book of books) {
await this.buildBookIndex(versionName, book, versionPath); const bookVerses = await this.collectBookVerses(versionName, book, versionPath);
allVerses.push(...bookVerses);
} }
// Batch insert all verses at once (MUCH faster with transactions)
console.log(` Inserting ${allVerses.length} verses in batch...`);
await this.searchDb.insertVersesBatch(allVerses);
this.versesProcessed += allVerses.length;
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0);
console.log(`${versionName.toUpperCase()} complete: ${allVerses.length} verses (${rate} v/s total)`);
} catch (error) { } catch (error) {
console.error(`Error processing version ${versionName}:`, error); console.error(`Error processing version ${versionName}:`, error);
throw error; throw error;
} }
} }
// Build index for a specific book in a version // Collect all verses from a book (returns array of verses)
async buildBookIndex(versionName, book, versionPath) { async collectBookVerses(versionName, book, versionPath) {
const bookPath = path.join(versionPath, book); const bookPath = path.join(versionPath, book);
const allVerses = [];
try { try {
const files = await fs.readdir(bookPath); const files = await fs.readdir(bookPath);
@@ -199,42 +213,32 @@ class SearchIndexBuilder {
if (!chapterMatch) continue; if (!chapterMatch) continue;
const chapter = parseInt(chapterMatch[1]); const chapter = parseInt(chapterMatch[1]);
await this.buildChapterIndex(versionName, book, chapter, path.join(bookPath, chapterFile)); const chapterVerses = await this.collectChapterVerses(
versionName,
book,
chapter,
path.join(bookPath, chapterFile)
);
allVerses.push(...chapterVerses);
} }
} catch (error) { } catch (error) {
// Book might not exist in this version // Book might not exist in this version
console.log(` Skipping ${book} in ${versionName} (not found)`); console.log(` Skipping ${book} in ${versionName} (not found)`);
} }
return allVerses;
} }
// Build index for a specific chapter // Collect verses from a specific chapter (returns array of verses)
async buildChapterIndex(version, book, chapter, filePath) { async collectChapterVerses(version, book, chapter, filePath) {
try { try {
const content = await fs.readFile(filePath, 'utf-8'); const content = await fs.readFile(filePath, 'utf-8');
const verses = this.parseVersesFromMarkdown(content, book, chapter, version); const verses = this.parseVersesFromMarkdown(content, book, chapter, version);
return verses;
// Insert all verses into the search database
for (const verse of verses) {
await this.searchDb.insertVerse(
verse.book,
verse.chapter,
verse.verse,
verse.text,
verse.version
);
this.versesProcessed++;
// Progress indicator every 1000 verses
if (this.versesProcessed % 1000 === 0) {
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0);
process.stdout.write(`\r Processed ${this.versesProcessed} verses (${rate} v/s)`);
}
}
} catch (error) { } catch (error) {
console.error(`Error processing ${filePath}:`, error.message); console.error(`Error processing ${filePath}:`, error.message);
return [];
} }
} }

View File

@@ -113,6 +113,38 @@ class SearchDatabase {
}); });
} }
// Batch insert verses (MUCH faster - uses transactions)
async insertVersesBatch(verses) {
return new Promise((resolve, reject) => {
this.db.serialize(() => {
this.db.run('BEGIN TRANSACTION');
const stmtVerses = this.db.prepare(
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
VALUES (?, ?, ?, ?, ?)`
);
const stmtFts = this.db.prepare(
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
VALUES (?, ?, ?, ?, ?)`
);
for (const verse of verses) {
stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
}
stmtVerses.finalize();
stmtFts.finalize();
this.db.run('COMMIT', (err) => {
if (err) reject(err);
else resolve();
});
});
});
}
// Search using FTS5 with advanced features // Search using FTS5 with advanced features
async search(query, options = {}) { async search(query, options = {}) {
const { const {