diff --git a/base/c.jl b/base/c.jl
index 061599a11b2ce..f30fc62434a42 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -84,7 +84,7 @@ containsnul(p::Ptr, len) = C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cin
 function unsafe_convert(::Type{Cstring}, s::ByteString)
     p = unsafe_convert(Ptr{Cchar}, s)
     if containsnul(p, sizeof(s))
-        throw(ArgumentError("embedded NUL chars are not allowed in C strings: $(repr(s))"))
+        throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
     end
     return Cstring(p)
 end
@@ -94,6 +94,105 @@ convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))
 
 # in string.jl: unsafe_convert(::Type{Cwstring}, s::WString)
 
+# FIXME: this should be handled by implicit conversion to Cwstring, but good luck with that
+@windows_only function cwstring(s::AbstractString)
+    bytes = bytestring(s).data
+    0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
+    return push!(utf8to16(bytes), 0)
+end
+
+# conversions between UTF-8 and UTF-16 for Windows APIs
+
+function utf8to16(src::Vector{UInt8})
+    dst = UInt16[]
+    i, n = 1, length(src)
+    n > 0 || return dst
+    sizehint!(dst, 2n)
+    a = src[1]
+    while true
+        if i < n && -64 <= a % Int8 <= -12 # multi-byte character
+            b = src[i += 1]
+            if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
+                # invalid UTF-8 (non-continuation or too-high code point)
+                push!(dst, a)
+                a = b; continue
+            elseif a < 0xe0 # 2-byte UTF-8
+                push!(dst, 0x3080 $ (UInt16(a) << 6) $ b)
+            elseif i < n # 3/4-byte character
+                c = src[i += 1]
+                if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
+                    push!(dst, a, b)
+                    a = c; continue
+                elseif a < 0xf0 # 3-byte UTF-8
+                    push!(dst, 0x2080 $ (UInt16(a) << 12) $ (UInt16(b) << 6) $ c)
+                elseif i < n
+                    d = src[i += 1]
+                    if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
+                        push!(dst, a, b, c)
+                        a = d; continue
+                    elseif a == 0xf0 && b < 0x90 # overlong encoding
+                        push!(dst, 0x2080 $ (UInt16(b) << 12) $ (UInt16(c) << 6) $ d)
+                    else # 4-byte UTF-8
+                        push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
+                                   0xdc80 $ (UInt16(c & 0xf) << 6) $ d)
+                    end
+                else # too short
+                    push!(dst, a, b, c)
+                    break
+                end
+            else # too short
+                push!(dst, a, b)
+                break
+            end
+        else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
+            push!(dst, a)
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
+function utf16to8(src::Vector{UInt16})
+    dst = UInt8[]
+    i, n = 1, length(src)
+    n > 0 || return dst
+    sizehint!(dst, n)
+    a = src[1]
+    while true
+        if a < 0x80 # ASCII
+            push!(dst, a % UInt8)
+        elseif a < 0x800 # 2-byte UTF-8
+            push!(dst, 0xc0 | ((a >> 6) % UInt8),
+                       0x80 | ((a % UInt8) & 0x3f))
+        elseif a & 0xfc00 == 0xd800 && i < n
+            b = src[i += 1]
+            if (b & 0xfc00) == 0xdc00
+                # 2-unit UTF-16 sequence => 4-byte UTF-8
+                a += 0x2840
+                push!(dst, 0xf0 | ((a >> 8) % UInt8),
+                           0x80 | ((a % UInt8) >> 2),
+                           0xf0 $ ((((a % UInt8) << 4) & 0x3f) $ (b >> 6) % UInt8),
+                           0x80 | ((b % UInt8) & 0x3f))
+            else
+                push!(dst, 0xe0 | ((a >> 12) % UInt8),
+                           0x80 | (((a >> 6) % UInt8) & 0x3f),
+                           0x80 | ((a % UInt8) & 0x3f))
+                a = b; continue
+            end
+        else
+            # 1-unit high UTF-16 or unpaired high surrogate
+            # either way, encode as 3-byte UTF-8 code point
+            push!(dst, 0xe0 | ((a >> 12) % UInt8),
+                       0x80 | (((a >> 6) % UInt8) & 0x3f),
+                       0x80 | ((a % UInt8) & 0x3f))
+        end
+        i < n || break
+        a = src[i += 1]
+    end
+    return dst
+end
+
 # deferring (or un-deferring) ctrl-c handler for external C code that
 # is not interrupt safe (see also issue #2622).  The sigatomic_begin/end
 # functions should always be called in matched pairs, ideally via:
diff --git a/base/env.jl b/base/env.jl
index 0f47a04202e78..876e32b807b41 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -26,33 +26,37 @@ end # @unix_only
 
 const ERROR_ENVVAR_NOT_FOUND = UInt32(203)
 
-_getenvlen(var::AbstractString) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Cwstring,Ptr{UInt8},UInt32),var,C_NULL,0)
-_hasenv(s::AbstractString) = _getenvlen(s)!=0 || Libc.GetLastError()!=ERROR_ENVVAR_NOT_FOUND
+_getenvlen(var::Vector{UInt16}) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,C_NULL,0)
+_hasenv(s::Vector{UInt16}) = _getenvlen(s) != 0 || Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND
+_hasenv(s::AbstractString) = _hasenv(cwstring(s))
 
 function access_env(onError::Function, str::AbstractString)
-    var = utf16(str)
+    var = cwstring(str)
     len = _getenvlen(var)
     if len == 0
         return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str)
     end
     val = zeros(UInt16,len)
-    ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Cwstring,Ptr{UInt16},UInt32),var,val,len)
+    ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len)
     if (ret == 0 && len != 1) || ret != len-1 || val[end] != 0
         error(string("getenv: ", str, ' ', len, "-1 != ", ret, ": ", Libc.FormatMessage()))
     end
-    return utf8(UTF16String(val))
+    pop!(val) # NUL
+    return UTF8String(utf16to8(val))
 end
 
-function _setenv(var::AbstractString, val::AbstractString, overwrite::Bool=true)
-    var = utf16(var)
+function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
+    var = cwstring(svar)
+    val = cwstring(sval)
     if overwrite || !_hasenv(var)
-        ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Cwstring,Cwstring),var,val)
+        ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,val)
         systemerror(:setenv, ret == 0)
     end
 end
 
-function _unsetenv(var::AbstractString)
-    ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Cwstring,Ptr{UInt16}),var,C_NULL)
+function _unsetenv(svar::AbstractString)
+    var = cwstring(svar)
+    ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
     systemerror(:setenv, ret == 0)
 end
 
@@ -105,10 +109,10 @@ end
 function next(hash::EnvHash, block::Tuple{Ptr{UInt16},Ptr{UInt16}})
     pos = block[1]
     blk = block[2]
-    len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)+1
+    len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
     buf = Array(UInt16, len)
     unsafe_copy!(pointer(buf), pos, len)
-    env = utf8(UTF16String(buf))
+    env = UTF8String(utf16to8(buf))
     m = match(r"^(=?[^=]+)=(.*)$"s, env)
     if m === nothing
         error("malformed environment entry: $env")
diff --git a/base/file.jl b/base/file.jl
index 8ca14f1807620..87df9b99e64f0 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -218,19 +218,21 @@ function tempdir()
     if lentemppath >= length(temppath) || lentemppath == 0
         error("GetTempPath failed: $(Libc.FormatMessage())")
     end
-    resize!(temppath,lentemppath+1)
-    return utf8(UTF16String(temppath))
+    resize!(temppath,lentemppath)
+    return UTF8String(utf16to8(temppath))
 end
 tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique)
+const temp_prefix = cwstring("jl_")
 function tempname(temppath::AbstractString,uunique::UInt32)
+    tempp = cwstring(temppath)
     tname = Array(UInt16,32767)
-    uunique = ccall(:GetTempFileNameW,stdcall,UInt32,(Cwstring,Ptr{UInt16},UInt32,Ptr{UInt16}), temppath,utf16("jul"),uunique,tname)
+    uunique = ccall(:GetTempFileNameW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32,Ptr{UInt16}), tempp,temp_prefix,uunique,tname)
     lentname = findfirst(tname,0)-1
     if uunique == 0 || lentname <= 0
         error("GetTempFileName failed: $(Libc.FormatMessage())")
     end
-    resize!(tname,lentname+1)
-    return utf8(UTF16String(tname))
+    resize!(tname,lentname)
+    return UTF8String(utf16to8(tname))
 end
 function mktemp(parent=tempdir())
     filename = tempname(parent, UInt32(0))
@@ -243,7 +245,7 @@ function mktempdir(parent=tempdir())
             seed += 1
         end
         filename = tempname(parent, seed)
-        ret = ccall(:_wmkdir, Int32, (Ptr{UInt16},), utf16(filename))
+        ret = ccall(:_wmkdir, Int32, (Ptr{UInt16},), cwstring(filename))
         if ret == 0
             return filename
         end
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 94a4d4e22815e..50755ceef8cf1 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -38,10 +38,13 @@ export File,
        S_IRGRP, S_IWGRP, S_IXGRP, S_IRWXG,
        S_IROTH, S_IWOTH, S_IXOTH, S_IRWXO
 
-import Base: uvtype, uvhandle, eventloop, fd, position, stat, close,
-            write, read, unsafe_write, unsafe_read, readavailable, read!,
-            isopen, show, seek, seekend, skip, eof, nb_available,
-            check_open, _sizeof_uv_fs, uv_error, UVError
+import Base:
+    UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
+    nb_available, position, read, read!, readavailable, seek, seekend, show,
+    skip, stat, unsafe_read, unsafe_write, utf16to8, utf8to16, uv_error,
+    uvhandle, uvtype, write
+
+@windows_only import Base: cwstring
 
 include("path.jl")
 include("stat.jl")
diff --git a/base/interactiveutil.jl b/base/interactiveutil.jl
index e29176063d326..709230a88ed8e 100644
--- a/base/interactiveutil.jl
+++ b/base/interactiveutil.jl
@@ -131,13 +131,13 @@ end
         end
         systemerror(:OpenClipboard, 0==ccall((:OpenClipboard, "user32"), stdcall, Cint, (Ptr{Void},), C_NULL))
         systemerror(:EmptyClipboard, 0==ccall((:EmptyClipboard, "user32"), stdcall, Cint, ()))
-        x_u16 = utf16(x)
+        x_u16 = cwstring(x)
         # copy data to locked, allocated space
-        p = ccall((:GlobalAlloc, "kernel32"), stdcall, Ptr{UInt16}, (UInt16, Int32), 2, sizeof(x_u16)+2)
+        p = ccall((:GlobalAlloc, "kernel32"), stdcall, Ptr{UInt16}, (UInt16, Int32), 2, sizeof(x_u16))
         systemerror(:GlobalAlloc, p==C_NULL)
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), p)
         systemerror(:GlobalLock, plock==C_NULL)
-        ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16},Ptr{UInt16},Int), plock, x_u16, sizeof(x_u16)+2)
+        ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16},Ptr{UInt16},Int), plock, x_u16, sizeof(x_u16))
         systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{Void},), plock))
         pdata = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (UInt32, Ptr{UInt16}), 13, p)
         systemerror(:SetClipboardData, pdata!=p)
@@ -152,7 +152,9 @@ end
         systemerror(:CloseClipboard, 0==ccall((:CloseClipboard, "user32"), stdcall, Cint, ()))
         plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
         systemerror(:GlobalLock, plock==C_NULL)
-        s = utf8(utf16(plock))
+        len = 0
+        while unsafe_load(plock, len+1) != 0; len += 1; end
+        s = UTF8String(utf16to8(pointer_to_array(plock, len)))
         systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), plock))
         return s
     end
diff --git a/base/libc.jl b/base/libc.jl
index ae55eb50ad2aa..7e0b0bef582c4 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -6,6 +6,8 @@ export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, ca
     errno, strerror, flush_cstdio, systemsleep, time
 @windows_only export GetLastError, FormatMessage
 
+import Base: utf16to8
+
 include(string(length(Core.ARGS)>=2?Core.ARGS[2]:"","errno_h.jl"))  # include($BUILDROOT/base/errno_h.jl)
 
 ## RawFD ##
@@ -258,11 +260,10 @@ function FormatMessage end
                     C_NULL, e, 0, lpMsgBuf, 0, C_NULL)
         p = lpMsgBuf[1]
         len == 0 && return utf8("")
-        len = len + 1
         buf = Array(UInt16, len)
         unsafe_copy!(pointer(buf), p, len)
         ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
-        return utf8(UTF16String(buf))
+        return UTF8String(utf16to8(buf))
     end
 end
 
diff --git a/base/path.jl b/base/path.jl
index 6f191a0cd67c5..4cd8ebfa3c338 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -124,36 +124,31 @@ normpath(a::AbstractString, b::AbstractString...) = normpath(joinpath(a,b...))
 abspath(a::AbstractString) = normpath(isabspath(a) ? a : joinpath(pwd(),a))
 abspath(a::AbstractString, b::AbstractString...) = abspath(joinpath(a,b...))
 
-@windows_only realpath(path::AbstractString) = realpath(utf16(path))
-@windows_only function realpath(path::UTF16String)
-    p::UInt32 = sizeof(path)>>1
+@windows_only function realpath(path::AbstractString)
+    path = cwstring(path)
+    buf = zeros(UInt16, length(path))
     while true
-        buf = zeros(UInt16, p + 1)
-        p = ccall((:GetFullPathNameW, "kernel32"), stdcall,
-            UInt32, (Cwstring, UInt32, Ptr{UInt16}, Ptr{Void}),
+        n = ccall((:GetFullPathNameW, "kernel32"), stdcall,
+            UInt32, (Ptr{UInt16}, UInt32, Ptr{UInt16}, Ptr{Void}),
             path, length(buf), buf, C_NULL)
-        systemerror(:realpath, p == 0)
-        if (p < length(buf))
-            resize!(buf, p + 1)
-            return utf8(UTF16String(buf))
-        end
+        systemerror(:realpath, n == 0)
+        x = n < length(buf) # is the buffer big enough?
+        resize!(buf, n) # shrink if x, grow if !x
+        x && return UTF8String(utf16to8(buf))
     end
 end
 
-@windows_only longpath(path::AbstractString) = longpath(utf16(path))
-@windows_only function longpath(path::UTF16String)
-    p::UInt32 = sizeof(path)>>1
+@windows_only function longpath(path::AbstractString)
+    path = cwstring(path)
+    buf = zeros(UInt16, length(path))
     while true
-        buf = zeros(UInt16, p + 1)
-        p = ccall((:GetLongPathNameW, "kernel32"), stdcall, UInt32,
-            (Cwstring, Ptr{UInt16}, UInt32),
+        n = ccall((:GetLongPathNameW, "kernel32"), stdcall,
+            UInt32, (Ptr{UInt16}, Ptr{UInt16}, UInt32),
             path, buf, length(buf))
-        systemerror(:longpath, p == 0)
-        # Buffer wasn't big enough, in which case `p` is the necessary buffer size
-        if (p < length(buf))
-            resize!(buf, p + 1)
-            return utf8(UTF16String(buf))
-        end
+        systemerror(:longpath, n == 0)
+        x = n < length(buf) # is the buffer big enough?
+        resize!(buf, n) # shrink if x, grow if !x
+        x && return UTF8String(utf16to8(buf))
     end
 end
 
diff --git a/test/misc.jl b/test/misc.jl
index e8f55e8f78ffa..260f7d19ac1e9 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -210,3 +210,170 @@ end
 whos(IOBuffer(), Tmp14173) # warm up
 @test @allocated(whos(IOBuffer(), Tmp14173)) < 10000
 
+## test conversion from UTF-8 to UTF-16 (for Windows APIs)
+import Base: utf8to16, utf16to8
+
+# empty arrays
+@test utf8to16(UInt8[]) == UInt16[]
+@test utf16to8(UInt16[]) == UInt8[]
+
+# UTF-8-like sequences
+V8 = [
+    # 1-byte (ASCII)
+    ([0x00],[0x0000])
+    ([0x0a],[0x000a])
+    ([0x7f],[0x007f])
+    # 2-byte
+    ([0xc0,0x80],[0x0000]) # overlong encoding
+    ([0xc1,0xbf],[0x007f]) # overlong encoding
+    ([0xc2,0x80],[0x0080])
+    ([0xc3,0xbf],[0x00ff])
+    ([0xc4,0x80],[0x0100])
+    ([0xc4,0xa3],[0x0123])
+    ([0xdf,0xbf],[0x07ff])
+    # 3-byte
+    ([0xe0,0x80,0x80],[0x0000]) # overlong encoding
+    ([0xe0,0x81,0xbf],[0x007f]) # overlong encoding
+    ([0xe0,0x82,0x80],[0x0080]) # overlong encoding
+    ([0xe0,0x9f,0xbf],[0x07ff]) # overlong encoding
+    ([0xe0,0xa0,0x80],[0x0800])
+    ([0xe0,0xa2,0x9a],[0x089a])
+    ([0xe1,0x88,0xb4],[0x1234])
+    ([0xea,0xaf,0x8d],[0xabcd])
+    ([0xed,0x9f,0xbf],[0xd7ff])
+    ([0xed,0xa0,0x80],[0xd800]) # invalid code point – high surrogate
+    ([0xed,0xaf,0xbf],[0xdbff]) # invalid code point – high surrogate
+    ([0xed,0xb0,0x80],[0xdc00]) # invalid code point – low surrogate
+    ([0xed,0xbf,0xbf],[0xdfff]) # invalid code point – low surrogate
+    ([0xee,0x80,0x80],[0xe000])
+    ([0xef,0xbf,0xbf],[0xffff])
+    # 4-byte
+    ([0xf0,0x80,0x80,0x80],[0x0000]) # overlong encoding
+    ([0xf0,0x80,0x81,0xbf],[0x007f]) # overlong encoding
+    ([0xf0,0x80,0x82,0x80],[0x0080]) # overlong encoding
+    ([0xf0,0x80,0x9f,0xbf],[0x07ff]) # overlong encoding
+    ([0xf0,0x80,0xa0,0x80],[0x0800]) # overlong encoding
+    ([0xf0,0x8f,0xbf,0xbf],[0xffff]) # overlong encoding
+    ([0xf0,0x90,0x80,0x80],[0xd800,0xdc00]) # U+10000
+    ([0xf0,0x90,0x8d,0x88],[0xd800,0xdf48]) # U+10348
+    ([0xf0,0x90,0x90,0xb7],[0xd801,0xdc37]) # U+10437
+    ([0xf0,0xa4,0xad,0xa2],[0xd852,0xdf62]) # U+24b62
+    ([0xf2,0xab,0xb3,0x9e],[0xda6f,0xdcde]) # U+abcde
+    ([0xf3,0xbf,0xbf,0xbf],[0xdbbf,0xdfff]) # U+fffff
+    ([0xf4,0x80,0x80,0x80],[0xdbc0,0xdc00]) # U+100000
+    ([0xf4,0x8a,0xaf,0x8d],[0xdbea,0xdfcd]) # U+10abcd
+    ([0xf4,0x8f,0xbf,0xbf],[0xdbff,0xdfff]) # U+10ffff
+]
+
+# non UTF-8-like sequences
+X8 = Vector{UInt8}[
+    # invalid 1-byte sequences
+    [0x80], # 1 leading ones
+    [0xbf],
+    [0xc0], # 2 leading ones
+    [0xdf],
+    [0xe0], # 3 leading ones
+    [0xef],
+    [0xf0], # 4 leading ones
+    [0xf7],
+    [0xf8], # 5 leading ones
+    [0xfb],
+    [0xfc], # 6 leading ones
+    [0xfd],
+    [0xfe], # 7 leading ones
+    [0xff], # 8 leading ones
+    # other invalid sequences
+    [0xf4,0x90,0xbf,0xbf],
+    [0xf4,0x91,0x80,0x80],
+    [0xf7,0x80,0x80,0x80],
+    [0xf7,0xbf,0xbf,0xbf],
+    [0xf8,0x80,0x80,0x80],
+    [0xf8,0xbf,0xbf,0xbf],
+    [0xff,0x80,0x80,0x80],
+    [0xff,0xbf,0xbf,0xbf],
+]
+
+for s in [map(first,V8); X8],
+    i = 1:length(s)-1,
+    j = i+1:length(s)-(i==1)
+    ss = s[i:j]
+    ss in X8 || push!(X8, ss)
+end
+sort!(X8, lt=lexless)
+sort!(X8, by=length)
+
+I8 = [(s,map(UInt16,s)) for s in X8]
+
+for (X,Y,Z) in ((V8,V8,V8), (I8,V8,I8), (V8,I8,V8), (V8,V8,I8), (I8,V8,V8))
+    for (a8, a16) in X
+        @test utf8to16(a8) == a16
+        for (b8, b16) in Y
+            ab8 = [a8; b8]
+            ab16 = [a16; b16]
+            @test utf8to16(ab8) == ab16
+            for (c8, c16) in Z
+                abc8 = [ab8; c8]
+                abc16 = [ab16; c16]
+                @test utf8to16(abc8) == abc16
+            end
+        end
+    end
+end
+
+# UTF-16-like sequences
+V16 = [
+    # 1-unit UTF-16, 1-byte UTF-8 (ASCII)
+    ([0x0000],[0x00])
+    ([0x000a],[0x0a])
+    ([0x007f],[0x7f])
+    # 1-unit UTF-16, 2-byte UTF-8
+    ([0x0080],[0xc2,0x80])
+    ([0x00ff],[0xc3,0xbf])
+    ([0x0100],[0xc4,0x80])
+    ([0x0123],[0xc4,0xa3])
+    ([0x07ff],[0xdf,0xbf])
+    # 1-unit UTF-16, 3-byte UTF-8
+    ([0x0800],[0xe0,0xa0,0x80])
+    ([0x089a],[0xe0,0xa2,0x9a])
+    ([0x1234],[0xe1,0x88,0xb4])
+    ([0xabcd],[0xea,0xaf,0x8d])
+    ([0xd7ff],[0xed,0x9f,0xbf])
+    ([0xe000],[0xee,0x80,0x80])
+    ([0xffff],[0xef,0xbf,0xbf])
+    # 2-unit UTF-16, 4-byte UTF-8
+    ([0xd800,0xdc00],[0xf0,0x90,0x80,0x80]) # U+10000
+    ([0xd800,0xdf48],[0xf0,0x90,0x8d,0x88]) # U+10348
+    ([0xd801,0xdc37],[0xf0,0x90,0x90,0xb7]) # U+10437
+    ([0xd852,0xdf62],[0xf0,0xa4,0xad,0xa2]) # U+24b62
+    ([0xda6f,0xdcde],[0xf2,0xab,0xb3,0x9e]) # U+abcde
+    ([0xdbbf,0xdfff],[0xf3,0xbf,0xbf,0xbf]) # U+fffff
+    ([0xdbc0,0xdc00],[0xf4,0x80,0x80,0x80]) # U+100000
+    ([0xdbea,0xdfcd],[0xf4,0x8a,0xaf,0x8d]) # U+10abcd
+    ([0xdbff,0xdfff],[0xf4,0x8f,0xbf,0xbf]) # U+10ffff
+]
+
+I16 = [
+    ([0xd800],[0xed,0xa0,0x80]) # high surrogate
+    ([0xdbff],[0xed,0xaf,0xbf]) # high surrogate
+    ([0xdc00],[0xed,0xb0,0x80]) # low surrogate
+    ([0xdfff],[0xed,0xbf,0xbf]) # low surrogate
+]
+
+for (X,Y,Z) in ((V16,V16,V16), (I16,V16,I16), (V16,I16,V16), (V16,V16,I16), (I16,V16,V16))
+    for (a16, a8) in X
+        @test utf16to8(a16) == a8
+        @test utf8to16(a8) == a16
+        for (b16, b8) in Y
+            ab16 = [a16; b16]
+            ab8 = [a8; b8]
+            @test utf16to8(ab16) == ab8
+            @test utf8to16(ab8) == ab16
+            for (c16, c8) in Z
+                abc16 = [ab16; c16]
+                abc8 = [ab8; c8]
+                @test utf16to8(abc16) == abc8
+                @test utf8to16(abc8) == abc16
+            end
+        end
+    end
+end