admin管理员组文章数量:1330658
I wanted to write a method to escape special chars like 'ä' to their responding Unicode (e.g. \u00e4).
For some reason JS finds it amusing to not even save the 'ä' internally but use 'üÜ' or some other garble, so when I convert it spits out '\u00c3\u00b6\u00c3\u002013' because it converts these chars instead of 'ä'.
I have tried setting the HTML file's encoding to utf-8 and tried loading the scripts with charset="UTF-8" to no avail. The code doesn't really do anything special but here it is:
String.prototype.replaceWithUtf8 = function() {
var str_newString = '';
var str_procString = this;
for (var i = 0; i < str_procString.length; i++) {
if (str_procString.charCodeAt(i) > 126) {
var hex_uniCode = '\\u00' + str_procString.charCodeAt(i).toString(16);
console.log(hex_uniCode + " (" + str_procString.charAt(i) + ")");
str_newString += hex_uniCode;
} else {
str_newString += str_procString.charAt(i);
}
}
return str_newString;
}
var str_item = "Lärm, Lichter, Lücken, Löcher."
console.log(str_item); // Lärm, Lichter, Lücken, Löcher.
console.log(str_item.replaceWithUtf8()); //L\u00c3\u00a4rm, Lichter, L\u00c3\u00bccken, L\u00c3\u00b6cher.
I wanted to write a method to escape special chars like 'ä' to their responding Unicode (e.g. \u00e4).
For some reason JS finds it amusing to not even save the 'ä' internally but use 'üÜ' or some other garble, so when I convert it spits out '\u00c3\u00b6\u00c3\u002013' because it converts these chars instead of 'ä'.
I have tried setting the HTML file's encoding to utf-8 and tried loading the scripts with charset="UTF-8" to no avail. The code doesn't really do anything special but here it is:
String.prototype.replaceWithUtf8 = function() {
var str_newString = '';
var str_procString = this;
for (var i = 0; i < str_procString.length; i++) {
if (str_procString.charCodeAt(i) > 126) {
var hex_uniCode = '\\u00' + str_procString.charCodeAt(i).toString(16);
console.log(hex_uniCode + " (" + str_procString.charAt(i) + ")");
str_newString += hex_uniCode;
} else {
str_newString += str_procString.charAt(i);
}
}
return str_newString;
}
var str_item = "Lärm, Lichter, Lücken, Löcher."
console.log(str_item); // Lärm, Lichter, Lücken, Löcher.
console.log(str_item.replaceWithUtf8()); //L\u00c3\u00a4rm, Lichter, L\u00c3\u00bccken, L\u00c3\u00b6cher.
Share
asked Nov 6, 2012 at 9:42
ProudOneProudOne
3874 silver badges17 bronze badges
5
- This seems to be working fine (jsfiddle/4HmgN). How did you set the encoding on the HTML? – mihai Commented Nov 6, 2012 at 10:08
-
Hey @mihai, I set it like so in the head-tag:
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
– ProudOne Commented Nov 6, 2012 at 11:51 -
@mihai and like so in the script tags:
<script type="text/javascript" charset="UTF-8" src="script/utf8.js"></script>
– ProudOne Commented Nov 6, 2012 at 11:53 - sounds good...I'm still getting correct results in Chrome/WinXP – mihai Commented Nov 6, 2012 at 11:57
- @mihai thanks for trying. I feel trolled by technology ;D – ProudOne Commented Nov 6, 2012 at 12:04
3 Answers
Reset to default 3I have no idea how or why but I just restarted the server again and now it's displaying correctly. To follow up; here's the code for everyone who's interested:
String.prototype.replaceWithUtf8 = function() {
var str_newString = '';
var str_procString = this;
var arr_replace = new Array('/', '"');
var arr_replaceWith = new Array('\\/', '\\"');
for (var i = 0; i < str_procString.length; i++) {
var int_charCode = str_procString.charCodeAt(i);
var cha_charAt = str_procString.charAt(i);
var int_chrIndex = arr_replace.indexOf(cha_charAt);
if (int_chrIndex > -1) {
console.log(arr_replaceWith[int_chrIndex]);
str_newString += arr_replaceWith[int_chrIndex];
} else {
if (int_charCode > 126 && int_charCode < 65536) {
var hex_uniCode = '\\u' + ("000" + int_charCode.toString(16)).substr(-4);
console.log(hex_uniCode + " (" + cha_charAt + ")");
str_newString += hex_uniCode;
} else {
str_newString += cha_charAt;
}
}
}
return str_newString;
}
Use '\\u' + ('000' + str_procString.charCodeAt(i).toString(16) ).stubstr(-4);
instead to get the right escape sequences - yours do always start with 00
. Also, instead of a for-loop processing your string, .replace()
might be faster.
On your question:
console.log("Lärm, Lichter, Lücken, Löcher."); // Lärm, Lichter, Lücken, Löcher.
does not sound as you really sent the file with the right encoding. Might be a server problem, too, if it is correctly saved already.
String.prototype.replaceWithUtf8 = function() {
function r(r) {
for (var t, n, e = "", i = 0; !isNaN(t = r.charCodeAt(i++)); ) n = t.toString(16),
e += 256 > t ? "\\x" + (t > 15 ? "" :"0") + n :"\\u" + ("0000" + n).slice(-4);
return e;
}
var a, c, o, u, s, e = "", i = this, t = [ "/", '"' ], n = [ "\\/", '\\"' ];
for (a = 0; a < i.length; a++) c = i.charCodeAt(a), o = i.charAt(a), u = t.indexOf(o),
u > -1 ? e += n[u] :c > 126 && 65536 > c ? (s = r(o), e += s) :e += o;
return e;
};
prompt("Your escaped string:","Lärm, Lichter, Lücken, Löcher.".replaceWithUtf8());
alert("L\xe4rm, Lichter, L\xfccken, L\xf6cher.");
Unicode encoding only makes every character 6 digits. But for characters above 127 to 256, we can actually make these hexdecimal with less bytes (4 digits per character).
本文标签: JavaScript encoding with Special charactersStack Overflow
版权声明:本文标题:JavaScript encoding with Special characters - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1742262686a2442808.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论