From 58a85088f12b93979f7076c2d9e81b6f1c3040ef Mon Sep 17 00:00:00 2001 From: Ryderjj89 Date: Sun, 28 Sep 2025 13:17:48 -0400 Subject: [PATCH] Add NKJV encoding fix utility script Provides a comprehensive tool to clean up text encoding issues in NKJV files: - Converts corrupted UTF-8 to Latin-1 and back to clean UTF-8 - Removes BOM markers and Windows line endings - Strips replacement characters - Can be run locally: node fix-nkjv-encoding.js --- fix-nkjv-encoding.js | 101 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 fix-nkjv-encoding.js diff --git a/fix-nkjv-encoding.js b/fix-nkjv-encoding.js new file mode 100644 index 00000000..fd6f3c32 --- /dev/null +++ b/fix-nkjv-encoding.js @@ -0,0 +1,101 @@ +#!/usr/bin/env node + +/** + * NKJV Encoding Fix Utility + * Converts NKJV markdown files from problematic encoding to clean UTF-8 + */ + +const fs = require('fs'); +const path = require('path'); + +const NKJV_DIR = './NKJV'; + +function walkDirectory(dir, callback) { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + + if (entry.isDirectory()) { + walkDirectory(fullPath, callback); + } else if (entry.isFile() && entry.name.endsWith('.md')) { + callback(fullPath); + } + } +} + +function fixFileEncoding(filePath) { + console.log(`Processing: ${filePath}`); + + try { + // Try to read with UTF-8 first + let content = fs.readFileSync(filePath, 'utf-8'); + + // Check for encoding issues + const hasReplacementChars = content.includes('\ufffd'); // character + const hasCorruptedText = /\w\w/.test(content); // Interspersed replacement chars + + if (hasReplacementChars || hasCorruptedText) { + console.log(` ✓ Found encoding issues, fixing...`); + + // Try alternative encodings + try { + // Attempt Latin-1 reading + content = fs.readFileSync(filePath, 'latin1').normalize('NFC'); + console.log(` ✓ Successfully converted to Latin-1 encoding`); + } catch (latinError) { + console.log(` ✗ Latin-1 conversion failed`); + } + + // Clean up any remaining encoding artifacts + content = content + .replace(/\ufffd/g, '') // Remove replacement characters + .replace(/^\uFEFF/, '') // Remove UTF-8 BOM if present + .replace(/\r\n/g, '\n') // Normalize Windows line endings + .replace(/\r/g, '\n') // Handle stray carriage returns + .trim(); + + // Write back cleaned content + fs.writeFileSync(filePath, content, 'utf-8'); + console.log(` ✓ Fixed and saved: ${filePath}`); + } else { + console.log(` ✓ File appears clean`); + } + + } catch (error) { + console.error(` ✗ Failed to process ${filePath}:`, error.message); + } +} + +console.log('🚀 Starting NKJV encoding fix...'); +console.log(`Scanning directory: ${NKJV_DIR}`); +console.log(''); + +let processedCount = 0; +let fixedCount = 0; + +walkDirectory(NKJV_DIR, (filePath) => { + processedCount++; + const originalContent = fs.readFileSync(filePath, 'utf-8'); + const originalLength = originalContent.length; + + fixFileEncoding(filePath); + + // Check if file was modified + try { + const newContent = fs.readFileSync(filePath, 'utf-8'); + if (!originalContent.includes('\ufffd') && newContent.includes('\ufffd')) { + console.log(` ⚠️ No encoding issues found`); + } + } catch (checkError) { + // File exists, just continue + } +}); + +console.log(''); +console.log('✅ Encoding fix complete!'); +console.log(`Processed ${processedCount} files`); +console.log(''); +console.log('🔄 Now rebuild your containers:'); +console.log('docker-compose build --no-cache'); +console.log('docker-compose up -d');