admin管理员组

文章数量:1331691

I wanted to write a method to escape special chars like 'ä' to their responding Unicode (e.g. \u00e4).

For some reason JS finds it amusing to not even save the 'ä' internally but use 'üÜ' or some other garble, so when I convert it spits out '\u00c3\u00b6\u00c3\u002013' because it converts these chars instead of 'ä'.

I have tried setting the HTML file's encoding to utf-8 and tried loading the scripts with charset="UTF-8" to no avail. The code doesn't really do anything special but here it is:

String.prototype.replaceWithUtf8 = function() {
    var str_newString = '';
    var str_procString = this;

    for (var i = 0; i < str_procString.length; i++) {
        if (str_procString.charCodeAt(i) > 126) {
            var hex_uniCode = '\\u00' + str_procString.charCodeAt(i).toString(16);
            console.log(hex_uniCode + " (" + str_procString.charAt(i) + ")");
            str_newString += hex_uniCode;
        } else {
            str_newString += str_procString.charAt(i);
        }
    }
    return str_newString;
}
var str_item = "Lärm, Lichter, Lücken, Löcher."

console.log(str_item); // Lärm, Lichter, Lücken, Löcher. 
console.log(str_item.replaceWithUtf8()); //L\u00c3\u00a4rm, Lichter, L\u00c3\u00bccken, L\u00c3\u00b6cher. 

I wanted to write a method to escape special chars like 'ä' to their responding Unicode (e.g. \u00e4).

For some reason JS finds it amusing to not even save the 'ä' internally but use 'üÜ' or some other garble, so when I convert it spits out '\u00c3\u00b6\u00c3\u002013' because it converts these chars instead of 'ä'.

I have tried setting the HTML file's encoding to utf-8 and tried loading the scripts with charset="UTF-8" to no avail. The code doesn't really do anything special but here it is:

String.prototype.replaceWithUtf8 = function() {
    var str_newString = '';
    var str_procString = this;

    for (var i = 0; i < str_procString.length; i++) {
        if (str_procString.charCodeAt(i) > 126) {
            var hex_uniCode = '\\u00' + str_procString.charCodeAt(i).toString(16);
            console.log(hex_uniCode + " (" + str_procString.charAt(i) + ")");
            str_newString += hex_uniCode;
        } else {
            str_newString += str_procString.charAt(i);
        }
    }
    return str_newString;
}
var str_item = "Lärm, Lichter, Lücken, Löcher."

console.log(str_item); // Lärm, Lichter, Lücken, Löcher. 
console.log(str_item.replaceWithUtf8()); //L\u00c3\u00a4rm, Lichter, L\u00c3\u00bccken, L\u00c3\u00b6cher. 
Share asked Nov 6, 2012 at 9:42 ProudOneProudOne 3874 silver badges17 bronze badges 5
  • This seems to be working fine (jsfiddle/4HmgN). How did you set the encoding on the HTML? – mihai Commented Nov 6, 2012 at 10:08
  • Hey @mihai, I set it like so in the head-tag: <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> – ProudOne Commented Nov 6, 2012 at 11:51
  • @mihai and like so in the script tags: <script type="text/javascript" charset="UTF-8" src="script/utf8.js"></script> – ProudOne Commented Nov 6, 2012 at 11:53
  • sounds good...I'm still getting correct results in Chrome/WinXP – mihai Commented Nov 6, 2012 at 11:57
  • @mihai thanks for trying. I feel trolled by technology ;D – ProudOne Commented Nov 6, 2012 at 12:04
Add a ment  | 

3 Answers 3

Reset to default 3

I have no idea how or why but I just restarted the server again and now it's displaying correctly. To follow up; here's the code for everyone who's interested:

String.prototype.replaceWithUtf8 = function() {
    var str_newString = '';
    var str_procString = this;
    var arr_replace = new Array('/', '"');
    var arr_replaceWith = new Array('\\/', '\\"');

    for (var i = 0; i < str_procString.length; i++) {
        var int_charCode = str_procString.charCodeAt(i);
        var cha_charAt = str_procString.charAt(i);
        var int_chrIndex = arr_replace.indexOf(cha_charAt);

        if (int_chrIndex > -1) {
            console.log(arr_replaceWith[int_chrIndex]);
            str_newString += arr_replaceWith[int_chrIndex];
        } else {
            if (int_charCode > 126 && int_charCode < 65536) {
                var hex_uniCode = '\\u' + ("000" + int_charCode.toString(16)).substr(-4);
                console.log(hex_uniCode + " (" + cha_charAt + ")");
                str_newString += hex_uniCode;
            } else {
                str_newString += cha_charAt;
            }
        }
    }
    return str_newString;
}

Use '\\u' + ('000' + str_procString.charCodeAt(i).toString(16) ).stubstr(-4); instead to get the right escape sequences - yours do always start with 00. Also, instead of a for-loop processing your string, .replace() might be faster.

On your question:

console.log("Lärm, Lichter, Lücken, Löcher."); // Lärm, Lichter, Lücken, Löcher.

does not sound as you really sent the file with the right encoding. Might be a server problem, too, if it is correctly saved already.

String.prototype.replaceWithUtf8 = function() {
  function r(r) {
    for (var t, n, e = "", i = 0; !isNaN(t = r.charCodeAt(i++)); ) n = t.toString(16), 
    e += 256 > t ? "\\x" + (t > 15 ? "" :"0") + n :"\\u" + ("0000" + n).slice(-4);
    return e;
  }
  var a, c, o, u, s, e = "", i = this, t = [ "/", '"' ], n = [ "\\/", '\\"' ];
  for (a = 0; a < i.length; a++) c = i.charCodeAt(a), o = i.charAt(a), u = t.indexOf(o), 
  u > -1 ? e += n[u] :c > 126 && 65536 > c ? (s = r(o), e += s) :e += o;
  return e;
};

prompt("Your escaped string:","Lärm, Lichter, Lücken, Löcher.".replaceWithUtf8());

alert("L\xe4rm, Lichter, L\xfccken, L\xf6cher.");

Unicode encoding only makes every character 6 digits. But for characters above 127 to 256, we can actually make these hexdecimal with less bytes (4 digits per character).

本文标签: JavaScript encoding with Special charactersStack Overflow