Friday, 28 April 2017

UTF-8/16 encoding issue when turning JSON file to CSV file in nodejs

I am writing a script to turn a json file with Chinese to a csv file. All conversions work but the Chinese contents seem to appear encoding issues. If I open the converted CSV file with Sublime, all Chinese words are correct. If I open it with Excel (my version is 2016), none of these words look right. In addition, a part of information in next column behind the column with Chinese will be attached to uncoded Chinese. If I open CSV with Sublime first and then save with encoding in UTF-8 with BOM, the whole CSV file looks correct in Excel. I know that UTF-16 may be much better in Excel in Windows 10, but nothing changed no matter I commented this code.

data = data.replace(/^\uFEFF/, "");

And this my whole code.

var fs = require('fs');
    enter code here`var json2csv = require('json2csv');
    var Excel = require('exceljs');
    var detectEncoding = require('jschardet');
    var encoding = require("encoding");

fs.readFile('./input.json', function (err, data) {
    var charsetMatch = detectEncoding.detect(data);
    if(charsetMatch.encoding != "UTF-8") {
        data = encoding.convert(data, "UTF-8", charsetMatch.encoding).toString("utf8");
        data = data.replace(/^\uFEFF/, "");
    }
    var jsondata = JSON.parse(data);
    var csvFields = Object.keys(jsondata[0]);
    var unwindFields = [];
    for(var i = 0; i < csvFields.length; i++) {
        if(Array.isArray(jsondata[0][csvFields[i]])) {
            unwindFields.push(i);
        }
    }
    json2csv({data: jsondata, fields: csvFields, excelString: true}, function(err, csv) {
        fs.writeFile('./output.csv', csv, function(err) {
            var workbook = new Excel.Workbook();
            workbook.csv.readFile("./output.csv").then(function(worksheet) {
                for(var i = 0; i < csvFields.length; i++) {
                    for(var x = 0; x < jsondata.length; x++) {
                        var cellCode = (String.fromCharCode(parseInt((i / 26), 10) + 64) + String.fromCharCode(i - 26 * parseInt((i / 26), 10) + 1 + 64) + "" + (x + 2)).replace(/\@/g,"");
                        var cell = worksheet.getCell(cellCode);
                        if(cell.value != "" && cell.value != null && cell.value != undefined && typeof cell.value === "string") {
                            cell.value = cell.value.replace(/\[/g,"(").replace(/\]/g,")").replace(/\)\"\,\"\(/g,"), \n(").replace(/\(\"\(/g,"((").replace(/\)\"\)/g,"))").replace(/\"\,\"/g,",");
                        }
                    }
                }
                workbook.csv.writeFile("./output.csv").then(function() {
                    console.log("All files have been converted successfully!");
                });
            });
        });
    });
});

Anyone can help me solve the Chinese encoding issues in Excel 2016 in Windows 10?



via Dongyang Jiang

No comments:

Post a Comment