如何用Javascript 将汉字转换为gb2312编码?

JavaScript027

如何用Javascript 将汉字转换为gb2312编码?,第1张

实际上IE 5.5+,Netscape 6+,Mozilla中已经有了转换函数,即encodeURIComponent,但对于低版本的浏览器则需要一下代码。

/* ***************************

** Most of this code was kindly

** provided to me by

** Andrew Clover (and at doxdesk dot com)

** http://and.doxdesk.com/

** in response to my plea in my blog at

** http://worldtimzone.com/blog/date/2002/09/24

** It was unclear whether he created it.

*/

function utf8(wide) {

var c, s

var enc = ""

var i = 0

while(i c= wide.charCodeAt(i++)

// handle UTF-16 surrogates

if (c>=0xDC00 %26amp%26ampc<0xE000) continue

if (c>=0xD800 %26amp%26ampc<0xDC00) {

if (i>=wide.length) continue

s= wide.charCodeAt(i++)

if (s<0xDC00 || c>=0xDE00) continue

c= ((c-0xD800)<<10)+(s-0xDC00)+0x10000

}

// output value

if (c<0x80) enc += String.fromCharCode(c)

else if (c<0x800) enc += String.fromCharCode(0xC0+(c>>6),0x80+(c%26amp0x3F))

else if (c<0x10000) enc += String.fromCharCode(0xE0+(c>>12),0x80+(c>>6%26amp0x3F),0x80+(c%26amp0x3F))

else enc += String.fromCharCode(0xF0+(c>>18),0x80+(c>>12%26amp0x3F),0x80+(c>>6%26amp0x3F),0x80+(c%26amp0x3F))

}

return enc

}

var hexchars = "0123456789ABCDEF"

function toHex(n) {

return hexchars.charAt(n>>4)+hexchars.charAt(n %26amp0xF)

}

var okURIchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-"

function encodeURIComponentNew(s) {

var s = utf8(s)

var c

var enc = ""

for (var i= 0i if (okURIchars.indexOf(s.charAt(i))==-1)

enc += "%"+toHex(s.charCodeAt(i))

else

enc += s.charAt(i)

}

return enc

}

function URLEncode(fld)

{

if (fld == "") return false

var encodedField = ""

var s = fld

if (typeof encodeURIComponent == "function")

{

// Use javascript built-in function

// IE 5.5+ and Netscape 6+ and Mozilla

encodedField = encodeURIComponent(s)

}

else

{

// Need to mimic the javascript version

// Netscape 4 and IE 4 and IE 5.0

encodedField = encodeURIComponentNew(s)

}

//alert ("New encoding: " + encodeURIComponentNew(fld) +

// "\n escape(): " + escape(fld))

return encodedField

}

JAVA有一个public String(byte bytes[], Charset charset)函数可以用指定字节数组和编码来构造字符串。一个public byte[] getBytes(Charset charset)函数把字符串按指定编码来得到字节数组。可以用这两个函数来实现编码转换。

下面是一个简单的例子,注意一下例子中的文字本身的编码,最好在自己的环境中用gb2312重新输入,不然可能是乱码。当然转换后输出肯定有一个是乱码,也肯能都是乱码。根据你的编辑器的编码格式有关。

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

public class EncodingTest

{

public static void main(String[] args)

{

try

{

String gb = new String("国标2312".getBytes(),"gb2312")

System.out.println(gb)

byte [] b = gb.getBytes("gb2312")

String ios = new String(b,"ISO-8859-1")

System.out.println(ios)

} catch (UnsupportedEncodingException e)

{

e.printStackTrace()

}

}

}