Perf: Add batch transaction support for search index building
The search index build was extremely slow (25 v/s) due to individual INSERT statements without transactions. This refactors to use batch inserts with SQLite transactions, which should increase speed by 100-1000x. Changes: - Add insertVersesBatch() method in searchDatabase.js - Use BEGIN TRANSACTION / COMMIT for batch operations - Collect all verses for a version, then insert in one transaction - Removed per-verse insert calls from buildSearchIndex.js - Batch insert ~31,000 verses per version in single transaction Expected performance improvement: - Before: 25 verses/second (~82 minutes for 124k verses) - After: 2,000-5,000 verses/second (~30-60 seconds for 124k verses) SQLite is optimized for batched transactions - this is the standard pattern for bulk data loading. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -176,19 +176,33 @@ class SearchIndexBuilder {
|
||||
|
||||
console.log(`Found ${books.length} books`);
|
||||
|
||||
// Collect all verses for this version, then batch insert
|
||||
const allVerses = [];
|
||||
|
||||
for (const book of books) {
|
||||
await this.buildBookIndex(versionName, book, versionPath);
|
||||
const bookVerses = await this.collectBookVerses(versionName, book, versionPath);
|
||||
allVerses.push(...bookVerses);
|
||||
}
|
||||
|
||||
// Batch insert all verses at once (MUCH faster with transactions)
|
||||
console.log(` Inserting ${allVerses.length} verses in batch...`);
|
||||
await this.searchDb.insertVersesBatch(allVerses);
|
||||
this.versesProcessed += allVerses.length;
|
||||
|
||||
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
|
||||
const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0);
|
||||
console.log(` ✓ ${versionName.toUpperCase()} complete: ${allVerses.length} verses (${rate} v/s total)`);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Error processing version ${versionName}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Build index for a specific book in a version
|
||||
async buildBookIndex(versionName, book, versionPath) {
|
||||
// Collect all verses from a book (returns array of verses)
|
||||
async collectBookVerses(versionName, book, versionPath) {
|
||||
const bookPath = path.join(versionPath, book);
|
||||
const allVerses = [];
|
||||
|
||||
try {
|
||||
const files = await fs.readdir(bookPath);
|
||||
@@ -199,42 +213,32 @@ class SearchIndexBuilder {
|
||||
if (!chapterMatch) continue;
|
||||
|
||||
const chapter = parseInt(chapterMatch[1]);
|
||||
await this.buildChapterIndex(versionName, book, chapter, path.join(bookPath, chapterFile));
|
||||
const chapterVerses = await this.collectChapterVerses(
|
||||
versionName,
|
||||
book,
|
||||
chapter,
|
||||
path.join(bookPath, chapterFile)
|
||||
);
|
||||
allVerses.push(...chapterVerses);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
// Book might not exist in this version
|
||||
console.log(` Skipping ${book} in ${versionName} (not found)`);
|
||||
}
|
||||
|
||||
return allVerses;
|
||||
}
|
||||
|
||||
// Build index for a specific chapter
|
||||
async buildChapterIndex(version, book, chapter, filePath) {
|
||||
// Collect verses from a specific chapter (returns array of verses)
|
||||
async collectChapterVerses(version, book, chapter, filePath) {
|
||||
try {
|
||||
const content = await fs.readFile(filePath, 'utf-8');
|
||||
const verses = this.parseVersesFromMarkdown(content, book, chapter, version);
|
||||
|
||||
// Insert all verses into the search database
|
||||
for (const verse of verses) {
|
||||
await this.searchDb.insertVerse(
|
||||
verse.book,
|
||||
verse.chapter,
|
||||
verse.verse,
|
||||
verse.text,
|
||||
verse.version
|
||||
);
|
||||
this.versesProcessed++;
|
||||
|
||||
// Progress indicator every 1000 verses
|
||||
if (this.versesProcessed % 1000 === 0) {
|
||||
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
|
||||
const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0);
|
||||
process.stdout.write(`\r Processed ${this.versesProcessed} verses (${rate} v/s)`);
|
||||
}
|
||||
}
|
||||
|
||||
return verses;
|
||||
} catch (error) {
|
||||
console.error(`Error processing ${filePath}:`, error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -113,6 +113,38 @@ class SearchDatabase {
|
||||
});
|
||||
}
|
||||
|
||||
// Batch insert verses (MUCH faster - uses transactions)
|
||||
async insertVersesBatch(verses) {
|
||||
return new Promise((resolve, reject) => {
|
||||
this.db.serialize(() => {
|
||||
this.db.run('BEGIN TRANSACTION');
|
||||
|
||||
const stmtVerses = this.db.prepare(
|
||||
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
|
||||
VALUES (?, ?, ?, ?, ?)`
|
||||
);
|
||||
|
||||
const stmtFts = this.db.prepare(
|
||||
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
|
||||
VALUES (?, ?, ?, ?, ?)`
|
||||
);
|
||||
|
||||
for (const verse of verses) {
|
||||
stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
|
||||
stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
|
||||
}
|
||||
|
||||
stmtVerses.finalize();
|
||||
stmtFts.finalize();
|
||||
|
||||
this.db.run('COMMIT', (err) => {
|
||||
if (err) reject(err);
|
||||
else resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Search using FTS5 with advanced features
|
||||
async search(query, options = {}) {
|
||||
const {
|
||||
|
||||
Reference in New Issue
Block a user