Files
the-bible/backend/src/searchDatabase.js
Joshua Ryder 246d849163 Enhance: Add exact match boosting to search relevance scoring
FTS5 with Porter stemming treats 'kindness' and 'kind' as the same root word,
which caused stemmed matches to rank equally with exact matches. This adds a
secondary relevance boost on top of BM25 to prioritize exact matches.

Relevance scoring now:
- BM25 base score (from FTS5)
- +100 for exact phrase match in verse text
- +50 per exact word match (e.g., 'kindness' exactly)
- +10 per partial/stemmed match (e.g., 'kind' via stemming)

Example: Searching for 'kindness'
- Verses with 'kindness': BM25 + 150 (phrase + word)
- Verses with 'kind': BM25 + 10 (partial match)

This ensures exact matches appear first while still benefiting from Porter
stemming to find all word variations.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-10 20:05:29 -05:00

426 lines
12 KiB
JavaScript

const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const fs = require('fs').promises;
const fsSync = require('fs');
class SearchDatabase {
constructor(dbPath) {
this.dbPath = dbPath || path.join(__dirname, '../data/bible.db');
this.db = null;
}
// Initialize database connection
async initialize() {
// Ensure data directory exists
const dataDir = path.dirname(this.dbPath);
if (!fsSync.existsSync(dataDir)) {
fsSync.mkdirSync(dataDir, { recursive: true });
console.log('Created data directory:', dataDir);
}
return new Promise((resolve, reject) => {
this.db = new sqlite3.Database(this.dbPath, (err) => {
if (err) {
console.error('Error opening search database:', err);
reject(err);
} else {
console.log('Search database connected');
this.createTables().then(resolve).catch(reject);
}
});
});
}
// Create FTS5 tables for search
async createTables() {
return new Promise((resolve, reject) => {
// Create regular verses table for metadata and joins
this.db.run(`
CREATE TABLE IF NOT EXISTS verses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
book TEXT NOT NULL,
chapter INTEGER NOT NULL,
verse_number INTEGER NOT NULL,
verse_text TEXT NOT NULL,
version TEXT NOT NULL,
UNIQUE(book, chapter, verse_number, version)
)
`, (err) => {
if (err) {
console.error('Error creating verses table:', err);
return reject(err);
}
// Create FTS5 virtual table for full-text search
// Using porter stemming, unicode support, and diacritic removal
this.db.run(`
CREATE VIRTUAL TABLE IF NOT EXISTS verses_fts USING fts5(
book,
chapter UNINDEXED,
verse_number UNINDEXED,
verse_text,
version UNINDEXED,
tokenize='porter unicode61 remove_diacritics 2'
)
`, (err) => {
if (err) {
console.error('Error creating FTS5 table:', err);
return reject(err);
}
console.log('Search tables initialized successfully');
resolve();
});
});
});
}
// Check if index is populated
async isIndexPopulated() {
return new Promise((resolve, reject) => {
this.db.get('SELECT COUNT(*) as count FROM verses_fts', [], (err, row) => {
if (err) reject(err);
else resolve(row.count > 0);
});
});
}
// Insert a verse into both tables
async insertVerse(book, chapter, verseNumber, verseText, version) {
return new Promise((resolve, reject) => {
// Insert into regular table (or ignore if exists)
this.db.run(
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
VALUES (?, ?, ?, ?, ?)`,
[book, chapter, verseNumber, verseText, version],
(err) => {
if (err) {
return reject(err);
}
// Insert into FTS5 table
this.db.run(
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
VALUES (?, ?, ?, ?, ?)`,
[book, chapter, verseNumber, verseText, version],
(err) => {
if (err) reject(err);
else resolve();
}
);
}
);
});
}
// Batch insert verses (MUCH faster - uses transactions)
async insertVersesBatch(verses) {
return new Promise((resolve, reject) => {
this.db.serialize(() => {
this.db.run('BEGIN TRANSACTION');
const stmtVerses = this.db.prepare(
`INSERT OR IGNORE INTO verses (book, chapter, verse_number, verse_text, version)
VALUES (?, ?, ?, ?, ?)`
);
const stmtFts = this.db.prepare(
`INSERT INTO verses_fts (book, chapter, verse_number, verse_text, version)
VALUES (?, ?, ?, ?, ?)`
);
for (const verse of verses) {
stmtVerses.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
stmtFts.run(verse.book, verse.chapter, verse.verse, verse.text, verse.version);
}
stmtVerses.finalize();
stmtFts.finalize();
this.db.run('COMMIT', (err) => {
if (err) reject(err);
else resolve();
});
});
});
}
// Search using FTS5 with advanced features
async search(query, options = {}) {
const {
version = null,
book = null,
limit = 50,
includeContext = false,
contextSize = 2
} = options;
// Build FTS5 query based on search type
const ftsQuery = this.buildFTS5Query(query);
// Build WHERE clause for filters
const filters = [];
const params = [ftsQuery];
if (version) {
filters.push('version = ?');
params.push(version);
}
if (book) {
filters.push('book = ?');
params.push(book);
}
const whereClause = filters.length > 0 ? `AND ${filters.join(' AND ')}` : '';
// Build SQL query with BM25 ranking
const sql = `
SELECT
book,
chapter,
verse_number,
verse_text,
version,
bm25(verses_fts) as rank,
highlight(verses_fts, 3, '<mark>', '</mark>') as highlighted_text
FROM verses_fts
WHERE verses_fts MATCH ? ${whereClause}
ORDER BY rank
LIMIT ?
`;
params.push(limit);
return new Promise((resolve, reject) => {
this.db.all(sql, params, async (err, rows) => {
if (err) {
console.error('Search error:', err);
return reject(err);
}
// Format results with enhanced relevance scoring
const results = rows.map(row => {
const bm25Score = -row.rank; // BM25 returns negative scores
const exactMatchBoost = this.calculateExactMatchBoost(row.verse_text, query);
return {
book: row.book,
chapter: row.chapter,
verse: row.verse_number,
text: row.verse_text,
version: row.version,
highlight: row.highlighted_text,
relevance: bm25Score + exactMatchBoost,
context: [] // Will be populated if requested
};
});
// Re-sort by enhanced relevance (BM25 + exact match boost)
results.sort((a, b) => b.relevance - a.relevance);
// Add context if requested
if (includeContext && results.length > 0) {
for (const result of results) {
result.context = await this.getContext(
result.book,
result.chapter,
result.verse,
result.version,
contextSize
);
}
}
resolve(results);
});
});
}
// Build FTS5 query with advanced features
buildFTS5Query(query) {
// Detect query type and build appropriate FTS5 syntax
// Phrase search: "faith hope love" -> "faith hope love"
if (query.startsWith('"') && query.endsWith('"')) {
return query; // Already a phrase query
}
// Prefix search: word* -> word*
if (query.includes('*')) {
return query;
}
// NEAR query: word1 NEAR(5) word2 -> word1 NEAR(5) word2
if (query.toUpperCase().includes('NEAR')) {
return query;
}
// OR query: word1 OR word2 -> word1 OR word2
if (query.toUpperCase().includes(' OR ')) {
return query;
}
// AND query: word1 AND word2 -> word1 AND word2
if (query.toUpperCase().includes(' AND ')) {
return query;
}
// NOT query: word1 NOT word2 -> word1 NOT word2
if (query.toUpperCase().includes(' NOT ')) {
return query;
}
// Default: Simple term search with implicit AND
// Split into words and join with AND for all-words-must-match
const words = query.trim().split(/\s+/).filter(w => w.length > 0);
return words.join(' AND ');
}
// Calculate exact match boost for better relevance ranking
calculateExactMatchBoost(verseText, query) {
const lowerText = verseText.toLowerCase();
const lowerQuery = query.toLowerCase().replace(/['"]/g, ''); // Remove quotes
let boost = 0;
// Exact phrase match (highest boost) - e.g., "faith hope love"
if (lowerText.includes(lowerQuery)) {
boost += 100;
}
// Exact word match boost - prioritize exact words over stemmed variants
const queryWords = lowerQuery.split(/\s+/).filter(w => w.length > 0);
const textWords = lowerText.split(/\W+/).filter(w => w.length > 0);
for (const queryWord of queryWords) {
// Exact word match (e.g., "kindness" matches "kindness", not just "kind")
if (textWords.includes(queryWord)) {
boost += 50;
} else {
// Partial match (stemmed or substring) - lower boost
for (const textWord of textWords) {
if (textWord.includes(queryWord) || queryWord.includes(textWord)) {
boost += 10;
break; // Only count once per query word
}
}
}
}
return boost;
}
// Get context verses around a target verse
async getContext(book, chapter, verseNumber, version, contextSize = 2) {
const start = Math.max(1, verseNumber - contextSize);
const end = verseNumber + contextSize;
return new Promise((resolve, reject) => {
this.db.all(
`SELECT verse_number, verse_text
FROM verses
WHERE book = ? AND chapter = ? AND version = ?
AND verse_number >= ? AND verse_number <= ?
ORDER BY verse_number`,
[book, chapter, version, start, end],
(err, rows) => {
if (err) {
console.error('Context fetch error:', err);
return resolve([]); // Return empty array on error
}
resolve(rows.map(row => ({
verse: row.verse_number,
text: row.verse_text
})));
}
);
});
}
// Get search suggestions (autocomplete)
async getSuggestions(query, limit = 10) {
if (!query || query.length < 2) return [];
// Use FTS5 prefix matching for suggestions
const ftsQuery = `${query}*`;
return new Promise((resolve, reject) => {
this.db.all(
`SELECT DISTINCT verse_text
FROM verses_fts
WHERE verse_text MATCH ?
LIMIT ?`,
[ftsQuery, limit],
(err, rows) => {
if (err) {
return reject(err);
}
// Extract words that start with the query
const suggestions = new Set();
const lowerQuery = query.toLowerCase();
rows.forEach(row => {
const words = row.verse_text.toLowerCase().split(/\s+/);
words.forEach(word => {
if (word.startsWith(lowerQuery) && word.length > query.length) {
suggestions.add(word);
}
});
});
resolve(Array.from(suggestions).slice(0, limit));
}
);
});
}
// Clear all search data
async clearIndex() {
return new Promise((resolve, reject) => {
this.db.run('DELETE FROM verses', (err) => {
if (err) return reject(err);
this.db.run('DELETE FROM verses_fts', (err) => {
if (err) return reject(err);
resolve();
});
});
});
}
// Get index statistics
async getStats() {
return new Promise((resolve, reject) => {
this.db.get(
`SELECT
COUNT(*) as total_verses,
COUNT(DISTINCT version) as versions,
COUNT(DISTINCT book) as books
FROM verses`,
[],
(err, row) => {
if (err) reject(err);
else resolve(row);
}
);
});
}
// Close database connection
close() {
if (this.db) {
this.db.close((err) => {
if (err) {
console.error('Error closing search database:', err);
} else {
console.log('Search database closed');
}
});
}
}
}
module.exports = SearchDatabase;