Skip to content

Commit

Permalink
[Scott Paul Jones] Fixed bug in unicode.jl/encode16
Browse files Browse the repository at this point in the history
  • Loading branch information
ScottPJones committed Apr 22, 2015
1 parent 2e3c72b commit f8691b5
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
6 changes: 4 additions & 2 deletions base/utf16.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ function encode16(s::AbstractString)
c = reinterpret(UInt32, ch)
if c < 0x10000
push!(buf, UInt16(c))
elseif c <= 0x10FFFF
push!(buf, UInt16(0xd7c0 + (c>>10)))
push!(buf, UInt16(0xdc00 + (c & 0x3ff)))
else
push!(buf, UInt16(0xd7c0 + (c>>10) & 0x3ff))
push!(buf, UInt16(0xdc00 + c & 0x3ff))
throw(ArgumentError("invalid Unicode character (>0x10FFFF)"))
end
end
push!(buf, 0) # NULL termination
Expand Down
20 changes: 10 additions & 10 deletions test/unicode.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
# UTF16
u8 = "\U1d565\U1d7f6\U00066\U2008a"
u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
u16 = utf16(u8)
@test sizeof(u16) == 14
@test length(u16.data) == 8 && u16.data[end] == 0
@test length(u16) == 4
@test sizeof(u16) == 18
@test length(u16.data) == 10 && u16.data[end] == 0
@test length(u16) == 5
@test utf8(u16) == u8
@test collect(u8) == collect(u16)
@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 14), 1, reinterpret(UInt8, u16.data), 1, 14))
@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
@test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))

# UTF32
u32 = utf32(u8)
@test sizeof(u32) == 16
@test length(u32.data) == 5 && u32.data[end] == Char(0)
@test length(u32) == 4
@test sizeof(u32) == 20
@test length(u32.data) == 6 && u32.data[end] == Char(0)
@test length(u32) == 5
@test utf8(u32) == u8
@test collect(u8) == collect(u32)
@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 16), 1, reinterpret(UInt8, u32.data), 1, 16))
@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
@test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))

# Wstring
w = wstring(u8)
@test length(w) == 4 && utf8(w) == u8 && collect(u8) == collect(w)
@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
@test u8 == WString(w.data)

if !success(`iconv --version`)
Expand Down

0 comments on commit f8691b5

Please sign in to comment.