Add comprehensive encoding fixes for NKJV text corruption

- Detect UTF-8 replacement characters and fall back to Latin-1 encoding
- Add NKJV-specific cleanup to remove replacement chars and normalize line endings
- Strip UTF-8 BOM and handle Windows line endings properly
This commit is contained in:
Ryderjj89
2025-09-28 13:14:43 -04:00
parent 1f526df0bc
commit 2b3d753275

View File

@@ -64,9 +64,35 @@ function getDataDir(version) {
// Helper function to read markdown files with encoding normalization
async function readMarkdownFile(filePath) {
try {
const content = await fs.readFile(filePath, 'utf-8');
// Remove BOM if present and normalize encoding issues
return content.replace(/^\uFEFF/, ''); // Remove UTF-8 BOM
// Try UTF-8 first
let content = await fs.readFile(filePath, 'utf-8');
// Check if content contains replacement characters (indicates wrong encoding)
if (content.includes('\ufffd')) {
// Try with ISO-8859-1 (Latin-1) encoding common for Windows files
try {
const isoContent = await fs.readFile(filePath, 'latin1');
// Convert ISO-8859-1 to UTF-8
content = isoContent.normalize('NFC');
} catch (isoError) {
console.warn(`Failed to read with Latin-1 encoding for ${filePath}`);
}
}
// Remove BOM if present
content = content.replace(/^\uFEFF/, '');
// Additional cleanup for NKJV files
if (filePath.includes('/NKJV/')) {
// Replace any remaining encoding issues
content = content.replace(/\ufffd/g, ''); // Remove any remaining replacement chars
// Clean up any anomalous line breaks that might emerge in NKJV
content = content.replace(/\r\n/g, '\n'); // Normalize line endings
content = content.replace(/\r/g, '\n'); // Handle stray \r characters
}
return content;
} catch (error) {
throw new Error(`Failed to read file: ${filePath}`);
}