본문 바로가기

Program/DB

SQL Server 에서 문자를 UTF-8로 변환

출처 : https://gist.github.com/sevaa/f084a0a5a994c3bc28e518d5c708d5f6

 

Converting an NVARCHAR string to a UTF-8 VARBINARY data block in pure Transact-SQL

Converting an NVARCHAR string to a UTF-8 VARBINARY data block in pure Transact-SQL - ToUTF8.sql

gist.github.com

USE master;

GO



create function [dbo].[ToUTF8](@s nvarchar(max)) 
returns varbinary(max) 
as 
begin 
    declare @i int = 1, @n int = datalength(@s)/2, @r varbinary(max) = 0x, @c int, @d varbinary(4) 
    while @i <= @n 
    begin 
        set @c = unicode(substring(@s, @i, 1)) 
        if (@c & 0xFC00) = 0xD800 
        begin 
            set @i += 1 
            set @c = ((@c & 0x3FF) * 0x400) | 0x10000 | (unicode(substring(@s, @i, 1)) & 0x3FF) 
        end 

        if @c < 0x80 
            set @d = cast(@c as binary(1)) 
        if @c >= 0x80 and @c < 0x800  
            set @d = cast(((@c * 4) & 0xFF00) | (@c & 0x3F) | 0xC080 as binary(2)) 
        if @c >= 0x800 and @c < 0x10000 
            set @d = cast(((@c * 0x10) & 0xFF0000) | ((@c * 4) & 0x3F00) | (@c & 0x3F) | 0xe08080 as binary(3)) 
        if @c >= 0x10000 
            set @d = cast(((@c * 0x40) & 0xFF000000) | ((@c * 0x10) & 0x3F0000) | ((@c * 4) & 0x3F00) | (@c & 0x3F) | 0xf0808080 as binary(4)) 
             
        set @r += @d 
        set @i += 1 
    end 
    return @r 
end