Perf: Add batch transaction support for search index building
The search index build was extremely slow (25 v/s) due to individual INSERT statements without transactions. This refactors to use batch inserts with SQLite transactions, which should increase speed by 100-1000x. Changes: - Add insertVersesBatch() method in searchDatabase.js - Use BEGIN TRANSACTION / COMMIT for batch operations - Collect all verses for a version, then insert in one transaction - Removed per-verse insert calls from buildSearchIndex.js - Batch insert ~31,000 verses per version in single transaction Expected performance improvement: - Before: 25 verses/second (~82 minutes for 124k verses) - After: 2,000-5,000 verses/second (~30-60 seconds for 124k verses) SQLite is optimized for batched transactions - this is the standard pattern for bulk data loading. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -176,19 +176,33 @@ class SearchIndexBuilder {
|
|||||||
|
|
||||||
console.log(`Found ${books.length} books`);
|
console.log(`Found ${books.length} books`);
|
||||||
|
|
||||||
|
// Collect all verses for this version, then batch insert
|
||||||
|
const allVerses = [];
|
||||||
|
|
||||||
for (const book of books) {
|
for (const book of books) {
|
||||||
await this.buildBookIndex(versionName, book, versionPath);
|
const bookVerses = await this.collectBookVerses(versionName, book, versionPath);
|
||||||
|
allVerses.push(...bookVerses);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Batch insert all verses at once (MUCH faster with transactions)
|
||||||
|
console.log(` Inserting ${allVerses.length} verses in batch...`);
|
||||||
|
await this.searchDb.insertVersesBatch(allVerses);
|
||||||
|
this.versesProcessed += allVerses.length;
|
||||||
|
|
||||||
|
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
|
||||||
|
const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0);
|
||||||
|
console.log(` ✓ ${versionName.toUpperCase()} complete: ${allVerses.length} verses (${rate} v/s total)`);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error processing version ${versionName}:`, error);
|
console.error(`Error processing version ${versionName}:`, error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build index for a specific book in a version
|
// Collect all verses from a book (returns array of verses)
|
||||||
async buildBookIndex(versionName, book, versionPath) {
|
async collectBookVerses(versionName, book, versionPath) {
|
||||||
const bookPath = path.join(versionPath, book);
|
const bookPath = path.join(versionPath, book);
|
||||||
|
const allVerses = [];
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const files = await fs.readdir(bookPath);
|
const files = await fs.readdir(bookPath);
|
||||||
@@ -199,42 +213,32 @@ class SearchIndexBuilder {
|
|||||||
if (!chapterMatch) continue;
|
if (!chapterMatch) continue;
|
||||||
|
|
||||||
const chapter = parseInt(chapterMatch[1]);
|
const chapter = parseInt(chapterMatch[1]);
|
||||||
await this.buildChapterIndex(versionName, book, chapter, path.join(bookPath, chapterFile));
|
const chapterVerses = await this.collectChapterVerses(
|
||||||
|
versionName,
|
||||||
|
book,
|
||||||
|
chapter,
|
||||||
|
path.join(bookPath, chapterFile)
|
||||||
|
);
|
||||||
|
allVerses.push(...chapterVerses);
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Book might not exist in this version
|
// Book might not exist in this version
|
||||||
console.log(` Skipping ${book} in ${versionName} (not found)`);
|
console.log(` Skipping ${book} in ${versionName} (not found)`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return allVerses;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build index for a specific chapter
|
// Collect verses from a specific chapter (returns array of verses)
|
||||||
async buildChapterIndex(version, book, chapter, filePath) {
|
async collectChapterVerses(version, book, chapter, filePath) {
|
||||||
try {
|
try {
|
||||||
const content = await fs.readFile(filePath, 'utf-8');
|
const content = await fs.readFile(filePath, 'utf-8');
|
||||||
const verses = this.parseVersesFromMarkdown(content, book, chapter, version);
|
const verses = this.parseVersesFromMarkdown(content, book, chapter, version);
|
||||||
|
return verses;
|
||||||
// Insert all verses into the search database
|
|
||||||
for (const verse of verses) {
|
|
||||||
await this.searchDb.insertVerse(
|
|
||||||
verse.book,
|
|
||||||
verse.chapter,
|
|
||||||
verse.verse,
|
|
||||||
verse.text,
|
|
||||||
verse.version
|
|
||||||
);
|
|
||||||
this.versesProcessed++;
|
|
||||||
|
|
||||||
// Progress indicator every 1000 verses
|
|
||||||
if (this.versesProcessed % 1000 === 0) {
|
|
||||||
const elapsed = ((Date.now() - this.startTime) / 1000).toFixed(1);
|
|
||||||
const rate = (this.versesProcessed / parseFloat(elapsed)).toFixed(0);
|
|
||||||
process.stdout.write(`\r Processed ${this.versesProcessed} verses (${rate} v/s)`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`Error processing ${filePath}:`, error.message);
|
console.error(`Error processing ${filePath}:`, error.message);
|
||||||
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -113,6 +113,38 @@ class SearchDatabase {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Batch insert verses (MUCH faster - uses transactions)
|
||||||
|
async insertVersesBatch(verses) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
this.db.serialize(() => {
|
||||||
|
this.db.run('BEGIN TRANSACTION');
|
||||||
|
|
||||||
|
const stmtVerses = this.db.prepare(
|
||||||
|
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
|
||||||
|
VALUES (?, ?, ?, ?, ?)`
|
||||||
|
);
|
||||||
|
|
||||||
|
const stmtFts = this.db.prepare(
|
||||||
|
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
|
||||||
|
VALUES (?, ?, ?, ?, ?)`
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const verse of verses) {
|
||||||
|
stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
|
||||||
|
stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
|
||||||
|
}
|
||||||
|
|
||||||
|
stmtVerses.finalize();
|
||||||
|
stmtFts.finalize();
|
||||||
|
|
||||||
|
this.db.run('COMMIT', (err) => {
|
||||||
|
if (err) reject(err);
|
||||||
|
else resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Search using FTS5 with advanced features
|
// Search using FTS5 with advanced features
|
||||||
async search(query, options = {}) {
|
async search(query, options = {}) {
|
||||||
const {
|
const {
|
||||||
|
|||||||
Reference in New Issue
Block a user