Add NKJV encoding fix utility script

Provides a comprehensive tool to clean up text encoding issues in NKJV files:
- Converts corrupted UTF-8 to Latin-1 and back to clean UTF-8
- Removes BOM markers and Windows line endings
- Strips replacement characters
- Can be run locally: node fix-nkjv-encoding.js
This commit is contained in:
Ryderjj89
2025-09-28 13:17:48 -04:00
parent 2b3d753275
commit 58a85088f1

101
fix-nkjv-encoding.js Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env node
/**
* NKJV Encoding Fix Utility
* Converts NKJV markdown files from problematic encoding to clean UTF-8
*/
const fs = require('fs');
const path = require('path');
const NKJV_DIR = './NKJV';
function walkDirectory(dir, callback) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
walkDirectory(fullPath, callback);
} else if (entry.isFile() && entry.name.endsWith('.md')) {
callback(fullPath);
}
}
}
function fixFileEncoding(filePath) {
console.log(`Processing: ${filePath}`);
try {
// Try to read with UTF-8 first
let content = fs.readFileSync(filePath, 'utf-8');
// Check for encoding issues
const hasReplacementChars = content.includes('\ufffd'); // character
const hasCorruptedText = /\w\w/.test(content); // Interspersed replacement chars
if (hasReplacementChars || hasCorruptedText) {
console.log(` ✓ Found encoding issues, fixing...`);
// Try alternative encodings
try {
// Attempt Latin-1 reading
content = fs.readFileSync(filePath, 'latin1').normalize('NFC');
console.log(` ✓ Successfully converted to Latin-1 encoding`);
} catch (latinError) {
console.log(` ✗ Latin-1 conversion failed`);
}
// Clean up any remaining encoding artifacts
content = content
.replace(/\ufffd/g, '') // Remove replacement characters
.replace(/^\uFEFF/, '') // Remove UTF-8 BOM if present
.replace(/\r\n/g, '\n') // Normalize Windows line endings
.replace(/\r/g, '\n') // Handle stray carriage returns
.trim();
// Write back cleaned content
fs.writeFileSync(filePath, content, 'utf-8');
console.log(` ✓ Fixed and saved: ${filePath}`);
} else {
console.log(` ✓ File appears clean`);
}
} catch (error) {
console.error(` ✗ Failed to process ${filePath}:`, error.message);
}
}
console.log('🚀 Starting NKJV encoding fix...');
console.log(`Scanning directory: ${NKJV_DIR}`);
console.log('');
let processedCount = 0;
let fixedCount = 0;
walkDirectory(NKJV_DIR, (filePath) => {
processedCount++;
const originalContent = fs.readFileSync(filePath, 'utf-8');
const originalLength = originalContent.length;
fixFileEncoding(filePath);
// Check if file was modified
try {
const newContent = fs.readFileSync(filePath, 'utf-8');
if (!originalContent.includes('\ufffd') && newContent.includes('\ufffd')) {
console.log(` ⚠️ No encoding issues found`);
}
} catch (checkError) {
// File exists, just continue
}
});
console.log('');
console.log('✅ Encoding fix complete!');
console.log(`Processed ${processedCount} files`);
console.log('');
console.log('🔄 Now rebuild your containers:');
console.log('docker-compose build --no-cache');
console.log('docker-compose up -d');