FTS5 with Porter stemming treats 'kindness' and 'kind' as the same root word, which caused stemmed matches to rank equally with exact matches. This adds a secondary relevance boost on top of BM25 to prioritize exact matches. Relevance scoring now: - BM25 base score (from FTS5) - +100 for exact phrase match in verse text - +50 per exact word match (e.g., 'kindness' exactly) - +10 per partial/stemmed match (e.g., 'kind' via stemming) Example: Searching for 'kindness' - Verses with 'kindness': BM25 + 150 (phrase + word) - Verses with 'kind': BM25 + 10 (partial match) This ensures exact matches appear first while still benefiting from Porter stemming to find all word variations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
426 lines
12 KiB
JavaScript
426 lines
12 KiB
JavaScript
const sqlite3 = require('sqlite3').verbose();
|
|
const path = require('path');
|
|
const fs = require('fs').promises;
|
|
const fsSync = require('fs');
|
|
|
|
class SearchDatabase {
|
|
constructor(dbPath) {
|
|
this.dbPath = dbPath || path.join(__dirname, '../data/bible.db');
|
|
this.db = null;
|
|
}
|
|
|
|
// Initialize database connection
|
|
async initialize() {
|
|
// Ensure data directory exists
|
|
const dataDir = path.dirname(this.dbPath);
|
|
if (!fsSync.existsSync(dataDir)) {
|
|
fsSync.mkdirSync(dataDir, { recursive: true });
|
|
console.log('Created data directory:', dataDir);
|
|
}
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.db = new sqlite3.Database(this.dbPath, (err) => {
|
|
if (err) {
|
|
console.error('Error opening search database:', err);
|
|
reject(err);
|
|
} else {
|
|
console.log('Search database connected');
|
|
this.createTables().then(resolve).catch(reject);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// Create FTS5 tables for search
|
|
async createTables() {
|
|
return new Promise((resolve, reject) => {
|
|
// Create regular verses table for metadata and joins
|
|
this.db.run(`
|
|
CREATE TABLE IF NOT EXISTS verses (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
book TEXT NOT NULL,
|
|
chapter INTEGER NOT NULL,
|
|
verse_number INTEGER NOT NULL,
|
|
verse_text TEXT NOT NULL,
|
|
version TEXT NOT NULL,
|
|
UNIQUE(book, chapter, verse_number, version)
|
|
)
|
|
`, (err) => {
|
|
if (err) {
|
|
console.error('Error creating verses table:', err);
|
|
return reject(err);
|
|
}
|
|
|
|
// Create FTS5 virtual table for full-text search
|
|
// Using porter stemming, unicode support, and diacritic removal
|
|
this.db.run(`
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS verses_fts USING fts5(
|
|
book,
|
|
chapter UNINDEXED,
|
|
verse_number UNINDEXED,
|
|
verse_text,
|
|
version UNINDEXED,
|
|
tokenize='porter unicode61 remove_diacritics 2'
|
|
)
|
|
`, (err) => {
|
|
if (err) {
|
|
console.error('Error creating FTS5 table:', err);
|
|
return reject(err);
|
|
}
|
|
|
|
console.log('Search tables initialized successfully');
|
|
resolve();
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
// Check if index is populated
|
|
async isIndexPopulated() {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.get('SELECT COUNT(*) as count FROM verses_fts', [], (err, row) => {
|
|
if (err) reject(err);
|
|
else resolve(row.count > 0);
|
|
});
|
|
});
|
|
}
|
|
|
|
// Insert a verse into both tables
|
|
async insertVerse(book, chapter, verseNumber, verseText, version) {
|
|
return new Promise((resolve, reject) => {
|
|
// Insert into regular table (or ignore if exists)
|
|
this.db.run(
|
|
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
|
|
VALUES (?, ?, ?, ?, ?)`,
|
|
[book, chapter, verseNumber, verseText, version],
|
|
(err) => {
|
|
if (err) {
|
|
return reject(err);
|
|
}
|
|
|
|
// Insert into FTS5 table
|
|
this.db.run(
|
|
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
|
|
VALUES (?, ?, ?, ?, ?)`,
|
|
[book, chapter, verseNumber, verseText, version],
|
|
(err) => {
|
|
if (err) reject(err);
|
|
else resolve();
|
|
}
|
|
);
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
// Batch insert verses (MUCH faster - uses transactions)
|
|
async insertVersesBatch(verses) {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.serialize(() => {
|
|
this.db.run('BEGIN TRANSACTION');
|
|
|
|
const stmtVerses = this.db.prepare(
|
|
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
|
|
VALUES (?, ?, ?, ?, ?)`
|
|
);
|
|
|
|
const stmtFts = this.db.prepare(
|
|
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
|
|
VALUES (?, ?, ?, ?, ?)`
|
|
);
|
|
|
|
for (const verse of verses) {
|
|
stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
|
|
stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
|
|
}
|
|
|
|
stmtVerses.finalize();
|
|
stmtFts.finalize();
|
|
|
|
this.db.run('COMMIT', (err) => {
|
|
if (err) reject(err);
|
|
else resolve();
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
// Search using FTS5 with advanced features
|
|
async search(query, options = {}) {
|
|
const {
|
|
version = null,
|
|
book = null,
|
|
limit = 50,
|
|
includeContext = false,
|
|
contextSize = 2
|
|
} = options;
|
|
|
|
// Build FTS5 query based on search type
|
|
const ftsQuery = this.buildFTS5Query(query);
|
|
|
|
// Build WHERE clause for filters
|
|
const filters = [];
|
|
const params = [ftsQuery];
|
|
|
|
if (version) {
|
|
filters.push('version = ?');
|
|
params.push(version);
|
|
}
|
|
|
|
if (book) {
|
|
filters.push('book = ?');
|
|
params.push(book);
|
|
}
|
|
|
|
const whereClause = filters.length > 0 ? `AND ${filters.join(' AND ')}` : '';
|
|
|
|
// Build SQL query with BM25 ranking
|
|
const sql = `
|
|
SELECT
|
|
book,
|
|
chapter,
|
|
verse_number,
|
|
verse_text,
|
|
version,
|
|
bm25(verses_fts) as rank,
|
|
highlight(verses_fts, 3, '<mark>', '</mark>') as highlighted_text
|
|
FROM verses_fts
|
|
WHERE verses_fts MATCH ? ${whereClause}
|
|
ORDER BY rank
|
|
LIMIT ?
|
|
`;
|
|
|
|
params.push(limit);
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.db.all(sql, params, async (err, rows) => {
|
|
if (err) {
|
|
console.error('Search error:', err);
|
|
return reject(err);
|
|
}
|
|
|
|
// Format results with enhanced relevance scoring
|
|
const results = rows.map(row => {
|
|
const bm25Score = -row.rank; // BM25 returns negative scores
|
|
const exactMatchBoost = this.calculateExactMatchBoost(row.verse_text, query);
|
|
|
|
return {
|
|
book: row.book,
|
|
chapter: row.chapter,
|
|
verse: row.verse_number,
|
|
text: row.verse_text,
|
|
version: row.version,
|
|
highlight: row.highlighted_text,
|
|
relevance: bm25Score + exactMatchBoost,
|
|
context: [] // Will be populated if requested
|
|
};
|
|
});
|
|
|
|
// Re-sort by enhanced relevance (BM25 + exact match boost)
|
|
results.sort((a, b) => b.relevance - a.relevance);
|
|
|
|
// Add context if requested
|
|
if (includeContext && results.length > 0) {
|
|
for (const result of results) {
|
|
result.context = await this.getContext(
|
|
result.book,
|
|
result.chapter,
|
|
result.verse,
|
|
result.version,
|
|
contextSize
|
|
);
|
|
}
|
|
}
|
|
|
|
resolve(results);
|
|
});
|
|
});
|
|
}
|
|
|
|
// Build FTS5 query with advanced features
|
|
buildFTS5Query(query) {
|
|
// Detect query type and build appropriate FTS5 syntax
|
|
|
|
// Phrase search: "faith hope love" -> "faith hope love"
|
|
if (query.startsWith('"') && query.endsWith('"')) {
|
|
return query; // Already a phrase query
|
|
}
|
|
|
|
// Prefix search: word* -> word*
|
|
if (query.includes('*')) {
|
|
return query;
|
|
}
|
|
|
|
// NEAR query: word1 NEAR(5) word2 -> word1 NEAR(5) word2
|
|
if (query.toUpperCase().includes('NEAR')) {
|
|
return query;
|
|
}
|
|
|
|
// OR query: word1 OR word2 -> word1 OR word2
|
|
if (query.toUpperCase().includes(' OR ')) {
|
|
return query;
|
|
}
|
|
|
|
// AND query: word1 AND word2 -> word1 AND word2
|
|
if (query.toUpperCase().includes(' AND ')) {
|
|
return query;
|
|
}
|
|
|
|
// NOT query: word1 NOT word2 -> word1 NOT word2
|
|
if (query.toUpperCase().includes(' NOT ')) {
|
|
return query;
|
|
}
|
|
|
|
// Default: Simple term search with implicit AND
|
|
// Split into words and join with AND for all-words-must-match
|
|
const words = query.trim().split(/\s+/).filter(w => w.length > 0);
|
|
return words.join(' AND ');
|
|
}
|
|
|
|
// Calculate exact match boost for better relevance ranking
|
|
calculateExactMatchBoost(verseText, query) {
|
|
const lowerText = verseText.toLowerCase();
|
|
const lowerQuery = query.toLowerCase().replace(/['"]/g, ''); // Remove quotes
|
|
let boost = 0;
|
|
|
|
// Exact phrase match (highest boost) - e.g., "faith hope love"
|
|
if (lowerText.includes(lowerQuery)) {
|
|
boost += 100;
|
|
}
|
|
|
|
// Exact word match boost - prioritize exact words over stemmed variants
|
|
const queryWords = lowerQuery.split(/\s+/).filter(w => w.length > 0);
|
|
const textWords = lowerText.split(/\W+/).filter(w => w.length > 0);
|
|
|
|
for (const queryWord of queryWords) {
|
|
// Exact word match (e.g., "kindness" matches "kindness", not just "kind")
|
|
if (textWords.includes(queryWord)) {
|
|
boost += 50;
|
|
} else {
|
|
// Partial match (stemmed or substring) - lower boost
|
|
for (const textWord of textWords) {
|
|
if (textWord.includes(queryWord) || queryWord.includes(textWord)) {
|
|
boost += 10;
|
|
break; // Only count once per query word
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return boost;
|
|
}
|
|
|
|
// Get context verses around a target verse
|
|
async getContext(book, chapter, verseNumber, version, contextSize = 2) {
|
|
const start = Math.max(1, verseNumber - contextSize);
|
|
const end = verseNumber + contextSize;
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.db.all(
|
|
`SELECT verse_number, verse_text
|
|
FROM verses
|
|
WHERE book = ? AND chapter = ? AND version = ?
|
|
AND verse_number >= ? AND verse_number <= ?
|
|
ORDER BY verse_number`,
|
|
[book, chapter, version, start, end],
|
|
(err, rows) => {
|
|
if (err) {
|
|
console.error('Context fetch error:', err);
|
|
return resolve([]); // Return empty array on error
|
|
}
|
|
|
|
resolve(rows.map(row => ({
|
|
verse: row.verse_number,
|
|
text: row.verse_text
|
|
})));
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
// Get search suggestions (autocomplete)
|
|
async getSuggestions(query, limit = 10) {
|
|
if (!query || query.length < 2) return [];
|
|
|
|
// Use FTS5 prefix matching for suggestions
|
|
const ftsQuery = `${query}*`;
|
|
|
|
return new Promise((resolve, reject) => {
|
|
this.db.all(
|
|
`SELECT DISTINCT verse_text
|
|
FROM verses_fts
|
|
WHERE verse_text MATCH ?
|
|
LIMIT ?`,
|
|
[ftsQuery, limit],
|
|
(err, rows) => {
|
|
if (err) {
|
|
return reject(err);
|
|
}
|
|
|
|
// Extract words that start with the query
|
|
const suggestions = new Set();
|
|
const lowerQuery = query.toLowerCase();
|
|
|
|
rows.forEach(row => {
|
|
const words = row.verse_text.toLowerCase().split(/\s+/);
|
|
words.forEach(word => {
|
|
if (word.startsWith(lowerQuery) && word.length > query.length) {
|
|
suggestions.add(word);
|
|
}
|
|
});
|
|
});
|
|
|
|
resolve(Array.from(suggestions).slice(0, limit));
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
// Clear all search data
|
|
async clearIndex() {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.run('DELETE FROM verses', (err) => {
|
|
if (err) return reject(err);
|
|
|
|
this.db.run('DELETE FROM verses_fts', (err) => {
|
|
if (err) return reject(err);
|
|
resolve();
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
// Get index statistics
|
|
async getStats() {
|
|
return new Promise((resolve, reject) => {
|
|
this.db.get(
|
|
`SELECT
|
|
COUNT(*) as total_verses,
|
|
COUNT(DISTINCT version) as versions,
|
|
COUNT(DISTINCT book) as books
|
|
FROM verses`,
|
|
[],
|
|
(err, row) => {
|
|
if (err) reject(err);
|
|
else resolve(row);
|
|
}
|
|
);
|
|
});
|
|
}
|
|
|
|
// Close database connection
|
|
close() {
|
|
if (this.db) {
|
|
this.db.close((err) => {
|
|
if (err) {
|
|
console.error('Error closing search database:', err);
|
|
} else {
|
|
console.log('Search database closed');
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = SearchDatabase;
|