메뉴 건너뛰기

Dev tips

ASP ASP를 이용한 Decode and Encode UTF-8

taknim 2008.10.17 03:20 조회 수 : 9662 추천:2115

<%
option explicit

' Simple functions to convert the first 256 characters
' of the Windows character set from and to UTF-8.

' Written by Hans Kalle for Fisz
' http://www.fisz.nl

'IsValidUTF8
'  Tells if the string is valid UTF-8 encoded
'Returns:
'  true (valid UTF-8)
'  false (invalid UTF-8 or not UTF-8 encoded string)
function IsValidUTF8(s)
  dim i
  dim c
  dim n

  IsValidUTF8 = false
  i = 1
  do while i <= len(s)
    c = asc(mid(s,i,1))
    if c and &H80 then
      n = 1
      do while i + n < len(s)
        if (asc(mid(s,i+n,1)) and &HC0) <> &H80 then
          exit do
        end if
        n = n + 1
      loop
      select case n
      case 1
        exit function
      case 2
        if (c and &HE0) <> &HC0 then
          exit function
        end if
      case 3
        if (c and &HF0) <> &HE0 then
          exit function
        end if
      case 4
        if (c and &HF8) <> &HF0 then
          exit function
        end if
      case else
        exit function
      end select
      i = i + n
    else
      i = i + 1
    end if
  loop
  IsValidUTF8 = true
end function

'DecodeUTF8
'  Decodes a UTF-8 string to the Windows character set
'  Non-convertable characters are replace by an upside
'  down question mark.
'Returns:
'  A Windows string
function DecodeUTF8(s)
  dim i
  dim c
  dim n

  i = 1
  do while i <= len(s)
    c = asc(mid(s,i,1))
    if c and &H80 then
      n = 1
      do while i + n < len(s)
        if (asc(mid(s,i+n,1)) and &HC0) <> &H80 then
          exit do
        end if
        n = n + 1
      loop
      if n = 2 and ((c and &HE0) = &HC0) then
        c = asc(mid(s,i+1,1)) + &H40 * (c and &H01)
      else
        c = 191
      end if
      s = left(s,i-1) + chr(c) + mid(s,i+n)
    end if
    i = i + 1
  loop
  DecodeUTF8 = s
end function

'EncodeUTF8
'  Encodes a Windows string in UTF-8
'Returns:
'  A UTF-8 encoded string
function EncodeUTF8(s)
  dim i
  dim c

  i = 1
  do while i <= len(s)
    c = asc(mid(s,i,1))
    if c >= &H80 then
      s = left(s,i-1) + chr(&HC2 + ((c and &H40) / &H40)) + chr(c and &HBF) + mid(s,i+1)
      i = i + 1
    end if
    i = i + 1
  loop
  EncodeUTF8 = s
end function
%>


To test the functions, one can use the code below. Place this code in a separate file, utf8test.asp for example.

<!--#include file="utf8.asp"-->
<html>
<head>
<title>Test UTF encoding en decoding</title>
<head/>
<body>
<h1>Decoding</h1>
<p>Kopi?ren ok: <%=DecodeUTF8("Kopi큰ren")%></p>
<p>Kopi?ren error: <%=DecodeUTF8("Kopi?ren")%></p>
<p>Kopi?ren error: <%=DecodeUTF8("Kopi흏en")%></p>
<p>Kopi?ren error: <%=DecodeUTF8("Kopi큰쳑en")%></p>
<p>Kopi?ren error: <%=DecodeUTF8("Kopi奭ren")%></p>
<h1>Encoding</h1>
<p>Kopi?ren UTF-8: <%=EncodeUTF8("Kopi?ren")%></p>
<p>Kopi?ren forth & back: <%=DecodeUTF8(EncodeUTF8("Kopi?ren"))%></p>
<h1>Testing encoding</h1>
<p>Kopi?ren: <%=IsValidUTF8("Kopi?ren")%></p>
<p>Kopi큰ren: <%=IsValidUTF8("Kopi큰ren")%></p>
<p>Kopi큰쳑en: <%=IsValidUTF8("Kopi큰쳑en")%></p>
<p>Aeroplane: <%=IsValidUTF8("Aeroplane")%></p>
</body>
</html>