const sqlite3 = require('sqlite3').verbose(); const path = require('path'); const fs = require('fs').promises; const fsSync = require('fs'); class SearchDatabase { constructor(dbPath) { this.dbPath = dbPath || path.join(__dirname, '../data/bible.db'); this.db = null; } // Initialize database connection async initialize() { // Ensure data directory exists const dataDir = path.dirname(this.dbPath); if (!fsSync.existsSync(dataDir)) { fsSync.mkdirSync(dataDir, { recursive: true }); console.log('Created data directory:', dataDir); } return new Promise((resolve, reject) => { this.db = new sqlite3.Database(this.dbPath, (err) => { if (err) { console.error('Error opening search database:', err); reject(err); } else { console.log('Search database connected'); this.createTables().then(resolve).catch(reject); } }); }); } // Create FTS5 tables for search async createTables() { return new Promise((resolve, reject) => { // Create regular verses table for metadata and joins this.db.run(` CREATE TABLE IF NOT EXISTS verses ( id INTEGER PRIMARY KEY AUTOINCREMENT, book TEXT NOT NULL, chapter INTEGER NOT NULL, verse_number INTEGER NOT NULL, verse_text TEXT NOT NULL, version TEXT NOT NULL, UNIQUE(book, chapter, verse_number, version) ) `, (err) => { if (err) { console.error('Error creating verses table:', err); return reject(err); } // Create FTS5 virtual table for full-text search // Using porter stemming, unicode support, and diacritic removal this.db.run(` CREATE VIRTUAL TABLE IF NOT EXISTS verses_fts USING fts5( book, chapter UNINDEXED, verse_number UNINDEXED, verse_text, version UNINDEXED, tokenize='porter unicode61 remove_diacritics 2' ) `, (err) => { if (err) { console.error('Error creating FTS5 table:', err); return reject(err); } console.log('Search tables initialized successfully'); resolve(); }); }); }); } // Check if index is populated async isIndexPopulated() { return new Promise((resolve, reject) => { this.db.get('SELECT COUNT(*) as count FROM verses_fts', [], (err, row) => { if (err) reject(err); else resolve(row.count > 0); }); }); } // Insert a verse into both tables async insertVerse(book, chapter, verseNumber, verseText, version) { return new Promise((resolve, reject) => { // Insert into regular table (or ignore if exists) this.db.run( `INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version) VALUES (?, ?, ?, ?, ?)`, [book, chapter, verseNumber, verseText, version], (err) => { if (err) { return reject(err); } // Insert into FTS5 table this.db.run( `INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version) VALUES (?, ?, ?, ?, ?)`, [book, chapter, verseNumber, verseText, version], (err) => { if (err) reject(err); else resolve(); } ); } ); }); } // Batch insert verses (MUCH faster - uses transactions) async insertVersesBatch(verses) { return new Promise((resolve, reject) => { this.db.serialize(() => { this.db.run('BEGIN TRANSACTION'); const stmtVerses = this.db.prepare( `INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version) VALUES (?, ?, ?, ?, ?)` ); const stmtFts = this.db.prepare( `INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version) VALUES (?, ?, ?, ?, ?)` ); for (const verse of verses) { stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version); stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version); } stmtVerses.finalize(); stmtFts.finalize(); this.db.run('COMMIT', (err) => { if (err) reject(err); else resolve(); }); }); }); } // Search using FTS5 with advanced features async search(query, options = {}) { const { version = null, book = null, limit = 50, includeContext = false, contextSize = 2 } = options; // Build FTS5 query based on search type const ftsQuery = this.buildFTS5Query(query); // Build WHERE clause for filters const filters = []; const params = [ftsQuery]; if (version) { filters.push('version = ?'); params.push(version); } if (book) { filters.push('book = ?'); params.push(book); } const whereClause = filters.length > 0 ? `AND ${filters.join(' AND ')}` : ''; // Build SQL query with BM25 ranking const sql = ` SELECT book, chapter, verse_number, verse_text, version, bm25(verses_fts) as rank, highlight(verses_fts, 3, '', '') as highlighted_text FROM verses_fts WHERE verses_fts MATCH ? ${whereClause} ORDER BY rank LIMIT ? `; params.push(limit); return new Promise((resolve, reject) => { this.db.all(sql, params, async (err, rows) => { if (err) { console.error('Search error:', err); return reject(err); } // Format results with enhanced relevance scoring const results = rows.map(row => { const bm25Score = -row.rank; // BM25 returns negative scores const exactMatchBoost = this.calculateExactMatchBoost(row.verse_text, query); return { book: row.book, chapter: row.chapter, verse: row.verse_number, text: row.verse_text, version: row.version, highlight: row.highlighted_text, relevance: bm25Score + exactMatchBoost, context: [] // Will be populated if requested }; }); // Re-sort by enhanced relevance (BM25 + exact match boost) results.sort((a, b) => b.relevance - a.relevance); // Add context if requested if (includeContext && results.length > 0) { for (const result of results) { result.context = await this.getContext( result.book, result.chapter, result.verse, result.version, contextSize ); } } resolve(results); }); }); } // Build FTS5 query with advanced features buildFTS5Query(query) { // Detect query type and build appropriate FTS5 syntax // Phrase search: "faith hope love" -> "faith hope love" if (query.startsWith('"') && query.endsWith('"')) { return query; // Already a phrase query } // Prefix search: word* -> word* if (query.includes('*')) { return query; } // NEAR query: word1 NEAR(5) word2 -> word1 NEAR(5) word2 if (query.toUpperCase().includes('NEAR')) { return query; } // OR query: word1 OR word2 -> word1 OR word2 if (query.toUpperCase().includes(' OR ')) { return query; } // AND query: word1 AND word2 -> word1 AND word2 if (query.toUpperCase().includes(' AND ')) { return query; } // NOT query: word1 NOT word2 -> word1 NOT word2 if (query.toUpperCase().includes(' NOT ')) { return query; } // Default: Simple term search with implicit AND // Split into words and join with AND for all-words-must-match const words = query.trim().split(/\s+/).filter(w => w.length > 0); return words.join(' AND '); } // Calculate exact match boost for better relevance ranking calculateExactMatchBoost(verseText, query) { const lowerText = verseText.toLowerCase(); const lowerQuery = query.toLowerCase().replace(/['"]/g, ''); // Remove quotes let boost = 0; // Exact phrase match (highest boost) - e.g., "faith hope love" if (lowerText.includes(lowerQuery)) { boost += 100; } // Exact word match boost - prioritize exact words over stemmed variants const queryWords = lowerQuery.split(/\s+/).filter(w => w.length > 0); const textWords = lowerText.split(/\W+/).filter(w => w.length > 0); for (const queryWord of queryWords) { // Exact word match (e.g., "kindness" matches "kindness", not just "kind") if (textWords.includes(queryWord)) { boost += 50; } else { // Partial match (stemmed or substring) - lower boost for (const textWord of textWords) { if (textWord.includes(queryWord) || queryWord.includes(textWord)) { boost += 10; break; // Only count once per query word } } } } return boost; } // Get context verses around a target verse async getContext(book, chapter, verseNumber, version, contextSize = 2) { const start = Math.max(1, verseNumber - contextSize); const end = verseNumber + contextSize; return new Promise((resolve, reject) => { this.db.all( `SELECT verse_number, verse_text FROM verses WHERE book = ? AND chapter = ? AND version = ? AND verse_number >= ? AND verse_number <= ? ORDER BY verse_number`, [book, chapter, version, start, end], (err, rows) => { if (err) { console.error('Context fetch error:', err); return resolve([]); // Return empty array on error } resolve(rows.map(row => ({ verse: row.verse_number, text: row.verse_text }))); } ); }); } // Get search suggestions (autocomplete) async getSuggestions(query, limit = 10) { if (!query || query.length < 2) return []; // Use FTS5 prefix matching for suggestions const ftsQuery = `${query}*`; return new Promise((resolve, reject) => { this.db.all( `SELECT DISTINCT verse_text FROM verses_fts WHERE verse_text MATCH ? LIMIT ?`, [ftsQuery, limit], (err, rows) => { if (err) { return reject(err); } // Extract words that start with the query const suggestions = new Set(); const lowerQuery = query.toLowerCase(); rows.forEach(row => { const words = row.verse_text.toLowerCase().split(/\s+/); words.forEach(word => { if (word.startsWith(lowerQuery) && word.length > query.length) { suggestions.add(word); } }); }); resolve(Array.from(suggestions).slice(0, limit)); } ); }); } // Clear all search data async clearIndex() { return new Promise((resolve, reject) => { this.db.run('DELETE FROM verses', (err) => { if (err) return reject(err); this.db.run('DELETE FROM verses_fts', (err) => { if (err) return reject(err); resolve(); }); }); }); } // Get index statistics async getStats() { return new Promise((resolve, reject) => { this.db.get( `SELECT COUNT(*) as total_verses, COUNT(DISTINCT version) as versions, COUNT(DISTINCT book) as books FROM verses`, [], (err, row) => { if (err) reject(err); else resolve(row); } ); }); } // Close database connection close() { if (this.db) { this.db.close((err) => { if (err) { console.error('Error closing search database:', err); } else { console.log('Search database closed'); } }); } } } module.exports = SearchDatabase;