Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

decouple Windows APIs from UTF16String type #15033

Merged
merged 3 commits into from
Feb 19, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 100 additions & 3 deletions base/c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import Core.Intrinsics: cglobal, box

const OS_NAME = ccall(:jl_get_OS_NAME, Any, ())

cfunction(f::Function, r, a) = ccall(:jl_function_ptr, Ptr{Void}, (Any, Any, Any), f, r, a)

if ccall(:jl_is_char_signed, Any, ())
Expand Down Expand Up @@ -86,7 +84,7 @@ containsnul(p::Ptr, len) = C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cin
function unsafe_convert(::Type{Cstring}, s::ByteString)
p = unsafe_convert(Ptr{Cchar}, s)
if containsnul(p, sizeof(s))
throw(ArgumentError("embedded NUL chars are not allowed in C strings: $(repr(s))"))
throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
end
return Cstring(p)
end
Expand All @@ -96,6 +94,105 @@ convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s))

# in string.jl: unsafe_convert(::Type{Cwstring}, s::WString)

# FIXME: this should be handled by implicit conversion to Cwstring, but good luck with that
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the obstacle to using the Cwstring conversion for ccall arguments?

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forget the details, but unsafe_convert and cconvert were not cooperative. I spent an hour or so going back and forth with @yuyichao and @vtjnash and it didn't lead to anything workable. That would be the right solution, rather than exposing these internal conversion functions.

@windows_only function cwstring(s::AbstractString)
bytes = bytestring(s).data
0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
return push!(utf8to16(bytes), 0)
end

# conversions between UTF-8 and UTF-16 for Windows APIs

function utf8to16(src::Vector{UInt8})
dst = UInt16[]
i, n = 1, length(src)
n > 0 || return dst
sizehint!(dst, 2n)
a = src[1]
while true
if i < n && -64 <= a % Int8 <= -12 # multi-byte character
b = src[i += 1]
if -64 <= (b % Int8) || a == 0xf4 && 0x8f < b
# invalid UTF-8 (non-continuation or too-high code point)
push!(dst, a)
a = b; continue
elseif a < 0xe0 # 2-byte UTF-8
push!(dst, 0x3080 $ (UInt16(a) << 6) $ b)
elseif i < n # 3/4-byte character
c = src[i += 1]
if -64 <= (c % Int8) # invalid UTF-8 (non-continuation)
push!(dst, a, b)
a = c; continue
elseif a < 0xf0 # 3-byte UTF-8
push!(dst, 0x2080 $ (UInt16(a) << 12) $ (UInt16(b) << 6) $ c)
elseif i < n
d = src[i += 1]
if -64 <= (d % Int8) # invalid UTF-8 (non-continuation)
push!(dst, a, b, c)
a = d; continue
elseif a == 0xf0 && b < 0x90 # overlong encoding
push!(dst, 0x2080 $ (UInt16(b) << 12) $ (UInt16(c) << 6) $ d)
else # 4-byte UTF-8
push!(dst, 0xe5b8 + (UInt16(a) << 8) + (UInt16(b) << 2) + (c >> 4),
0xdc80 $ (UInt16(c & 0xf) << 6) $ d)
end
else # too short
push!(dst, a, b, c)
break
end
else # too short
push!(dst, a, b)
break
end
else # ASCII or invalid UTF-8 (continuation byte or too-high code point)
push!(dst, a)
end
i < n || break
a = src[i += 1]
end
return dst
end

function utf16to8(src::Vector{UInt16})
dst = UInt8[]
i, n = 1, length(src)
n > 0 || return dst
sizehint!(dst, n)
a = src[1]
while true
if a < 0x80 # ASCII
push!(dst, a % UInt8)
elseif a < 0x800 # 2-byte UTF-8
push!(dst, 0xc0 | ((a >> 6) % UInt8),
0x80 | ((a % UInt8) & 0x3f))
elseif a & 0xfc00 == 0xd800 && i < n
b = src[i += 1]
if (b & 0xfc00) == 0xdc00
# 2-unit UTF-16 sequence => 4-byte UTF-8
a += 0x2840
push!(dst, 0xf0 | ((a >> 8) % UInt8),
0x80 | ((a % UInt8) >> 2),
0xf0 $ ((((a % UInt8) << 4) & 0x3f) $ (b >> 6) % UInt8),
0x80 | ((b % UInt8) & 0x3f))
else
push!(dst, 0xe0 | ((a >> 12) % UInt8),
0x80 | (((a >> 6) % UInt8) & 0x3f),
0x80 | ((a % UInt8) & 0x3f))
a = b; continue
end
else
# 1-unit high UTF-16 or unpaired high surrogate
# either way, encode as 3-byte UTF-8 code point
push!(dst, 0xe0 | ((a >> 12) % UInt8),
0x80 | (((a >> 6) % UInt8) & 0x3f),
0x80 | ((a % UInt8) & 0x3f))
end
i < n || break
a = src[i += 1]
end
return dst
end

# deferring (or un-deferring) ctrl-c handler for external C code that
# is not interrupt safe (see also issue #2622). The sigatomic_begin/end
# functions should always be called in matched pairs, ideally via:
Expand Down
28 changes: 16 additions & 12 deletions base/env.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,33 +26,37 @@ end # @unix_only

const ERROR_ENVVAR_NOT_FOUND = UInt32(203)

_getenvlen(var::AbstractString) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Cwstring,Ptr{UInt8},UInt32),var,C_NULL,0)
_hasenv(s::AbstractString) = _getenvlen(s)!=0 || Libc.GetLastError()!=ERROR_ENVVAR_NOT_FOUND
_getenvlen(var::Vector{UInt16}) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,C_NULL,0)
_hasenv(s::Vector{UInt16}) = _getenvlen(s) != 0 || Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND
_hasenv(s::AbstractString) = _hasenv(cwstring(s))

function access_env(onError::Function, str::AbstractString)
var = utf16(str)
var = cwstring(str)
len = _getenvlen(var)
if len == 0
return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str)
end
val = zeros(UInt16,len)
ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Cwstring,Ptr{UInt16},UInt32),var,val,len)
ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len)
if (ret == 0 && len != 1) || ret != len-1 || val[end] != 0
error(string("getenv: ", str, ' ', len, "-1 != ", ret, ": ", Libc.FormatMessage()))
end
return utf8(UTF16String(val))
pop!(val) # NUL
return UTF8String(utf16to8(val))
end

function _setenv(var::AbstractString, val::AbstractString, overwrite::Bool=true)
var = utf16(var)
function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
var = cwstring(svar)
val = cwstring(sval)
if overwrite || !_hasenv(var)
ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Cwstring,Cwstring),var,val)
ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,val)
systemerror(:setenv, ret == 0)
end
end

function _unsetenv(var::AbstractString)
ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Cwstring,Ptr{UInt16}),var,C_NULL)
function _unsetenv(svar::AbstractString)
var = cwstring(svar)
ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
systemerror(:setenv, ret == 0)
end

Expand Down Expand Up @@ -105,10 +109,10 @@ end
function next(hash::EnvHash, block::Tuple{Ptr{UInt16},Ptr{UInt16}})
pos = block[1]
blk = block[2]
len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)+1
len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
buf = Array(UInt16, len)
unsafe_copy!(pointer(buf), pos, len)
env = utf8(UTF16String(buf))
env = UTF8String(utf16to8(buf))
m = match(r"^(=?[^=]+)=(.*)$"s, env)
if m === nothing
error("malformed environment entry: $env")
Expand Down
14 changes: 8 additions & 6 deletions base/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,19 +218,21 @@ function tempdir()
if lentemppath >= length(temppath) || lentemppath == 0
error("GetTempPath failed: $(Libc.FormatMessage())")
end
resize!(temppath,lentemppath+1)
return utf8(UTF16String(temppath))
resize!(temppath,lentemppath)
return UTF8String(utf16to8(temppath))
end
tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique)
const temp_prefix = cwstring("jl_")
function tempname(temppath::AbstractString,uunique::UInt32)
tempp = cwstring(temppath)
tname = Array(UInt16,32767)
uunique = ccall(:GetTempFileNameW,stdcall,UInt32,(Cwstring,Ptr{UInt16},UInt32,Ptr{UInt16}), temppath,utf16("jul"),uunique,tname)
uunique = ccall(:GetTempFileNameW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32,Ptr{UInt16}), tempp,temp_prefix,uunique,tname)
lentname = findfirst(tname,0)-1
if uunique == 0 || lentname <= 0
error("GetTempFileName failed: $(Libc.FormatMessage())")
end
resize!(tname,lentname+1)
return utf8(UTF16String(tname))
resize!(tname,lentname)
return UTF8String(utf16to8(tname))
end
function mktemp(parent=tempdir())
filename = tempname(parent, UInt32(0))
Expand All @@ -243,7 +245,7 @@ function mktempdir(parent=tempdir())
seed += 1
end
filename = tempname(parent, seed)
ret = ccall(:_wmkdir, Int32, (Ptr{UInt16},), utf16(filename))
ret = ccall(:_wmkdir, Int32, (Ptr{UInt16},), cwstring(filename))
if ret == 0
return filename
end
Expand Down
11 changes: 7 additions & 4 deletions base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,13 @@ export File,
S_IRGRP, S_IWGRP, S_IXGRP, S_IRWXG,
S_IROTH, S_IWOTH, S_IXOTH, S_IRWXO

import Base: uvtype, uvhandle, eventloop, fd, position, stat, close,
write, read, unsafe_write, unsafe_read, readavailable, read!,
isopen, show, seek, seekend, skip, eof, nb_available,
check_open, _sizeof_uv_fs, uv_error, UVError
import Base:
UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
nb_available, position, read, read!, readavailable, seek, seekend, show,
skip, stat, unsafe_read, unsafe_write, utf16to8, utf8to16, uv_error,
uvhandle, uvtype, write

@windows_only import Base: cwstring

include("path.jl")
include("stat.jl")
Expand Down
10 changes: 6 additions & 4 deletions base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,13 @@ end
end
systemerror(:OpenClipboard, 0==ccall((:OpenClipboard, "user32"), stdcall, Cint, (Ptr{Void},), C_NULL))
systemerror(:EmptyClipboard, 0==ccall((:EmptyClipboard, "user32"), stdcall, Cint, ()))
x_u16 = utf16(x)
x_u16 = cwstring(x)
# copy data to locked, allocated space
p = ccall((:GlobalAlloc, "kernel32"), stdcall, Ptr{UInt16}, (UInt16, Int32), 2, sizeof(x_u16)+2)
p = ccall((:GlobalAlloc, "kernel32"), stdcall, Ptr{UInt16}, (UInt16, Int32), 2, sizeof(x_u16))
systemerror(:GlobalAlloc, p==C_NULL)
plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), p)
systemerror(:GlobalLock, plock==C_NULL)
ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16},Ptr{UInt16},Int), plock, x_u16, sizeof(x_u16)+2)
ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16},Ptr{UInt16},Int), plock, x_u16, sizeof(x_u16))
systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{Void},), plock))
pdata = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (UInt32, Ptr{UInt16}), 13, p)
systemerror(:SetClipboardData, pdata!=p)
Expand All @@ -152,7 +152,9 @@ end
systemerror(:CloseClipboard, 0==ccall((:CloseClipboard, "user32"), stdcall, Cint, ()))
plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata)
systemerror(:GlobalLock, plock==C_NULL)
s = utf8(utf16(plock))
len = 0
while unsafe_load(plock, len+1) != 0; len += 1; end
Copy link
Member

@stevengj stevengj Apr 29, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe len = ccall(:wcslen, Csize_t, (Ptr{Cwchar},), plock) would be clearer? Better yet, add a method:

@windows_only utf16to8(p::Ptr{UInt16}, len=ccall(:wcslen, Csize_t, (Ptr{UInt16},), p)) = utf16to8(pointer_to_array(p, len))

method?

s = UTF8String(utf16to8(pointer_to_array(plock, len)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could use comments explaining what's going on

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still not obvious, still needs comments

systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), plock))
return s
end
Expand Down
5 changes: 3 additions & 2 deletions base/libc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, ca
errno, strerror, flush_cstdio, systemsleep, time
@windows_only export GetLastError, FormatMessage

import Base: utf16to8

include(string(length(Core.ARGS)>=2?Core.ARGS[2]:"","errno_h.jl")) # include($BUILDROOT/base/errno_h.jl)

## RawFD ##
Expand Down Expand Up @@ -258,11 +260,10 @@ function FormatMessage end
C_NULL, e, 0, lpMsgBuf, 0, C_NULL)
p = lpMsgBuf[1]
len == 0 && return utf8("")
len = len + 1
buf = Array(UInt16, len)
unsafe_copy!(pointer(buf), p, len)
ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
return utf8(UTF16String(buf))
return UTF8String(utf16to8(buf))
end
end

Expand Down
2 changes: 2 additions & 0 deletions base/osutils.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

const OS_NAME = ccall(:jl_get_OS_NAME, Any, ())

function is_unix(os::Symbol)
if (os==:Windows) return false;
elseif (os==:Linux) return true;
Expand Down
41 changes: 18 additions & 23 deletions base/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,36 +124,31 @@ normpath(a::AbstractString, b::AbstractString...) = normpath(joinpath(a,b...))
abspath(a::AbstractString) = normpath(isabspath(a) ? a : joinpath(pwd(),a))
abspath(a::AbstractString, b::AbstractString...) = abspath(joinpath(a,b...))

@windows_only realpath(path::AbstractString) = realpath(utf16(path))
@windows_only function realpath(path::UTF16String)
p::UInt32 = sizeof(path)>>1
@windows_only function realpath(path::AbstractString)
path = cwstring(path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not type-stable (path will get ::Any, I think?). If we care, just rename path to wpath. Similarly below.

buf = zeros(UInt16, length(path))
while true
buf = zeros(UInt16, p + 1)
p = ccall((:GetFullPathNameW, "kernel32"), stdcall,
UInt32, (Cwstring, UInt32, Ptr{UInt16}, Ptr{Void}),
n = ccall((:GetFullPathNameW, "kernel32"), stdcall,
UInt32, (Ptr{UInt16}, UInt32, Ptr{UInt16}, Ptr{Void}),
path, length(buf), buf, C_NULL)
systemerror(:realpath, p == 0)
if (p < length(buf))
resize!(buf, p + 1)
return utf8(UTF16String(buf))
end
systemerror(:realpath, n == 0)
x = n < length(buf) # is the buffer big enough?
resize!(buf, n) # shrink if x, grow if !x
x && return UTF8String(utf16to8(buf))
end
end

@windows_only longpath(path::AbstractString) = longpath(utf16(path))
@windows_only function longpath(path::UTF16String)
p::UInt32 = sizeof(path)>>1
@windows_only function longpath(path::AbstractString)
path = cwstring(path)
buf = zeros(UInt16, length(path))
while true
buf = zeros(UInt16, p + 1)
p = ccall((:GetLongPathNameW, "kernel32"), stdcall, UInt32,
(Cwstring, Ptr{UInt16}, UInt32),
n = ccall((:GetLongPathNameW, "kernel32"), stdcall,
UInt32, (Ptr{UInt16}, Ptr{UInt16}, UInt32),
path, buf, length(buf))
systemerror(:longpath, p == 0)
# Buffer wasn't big enough, in which case `p` is the necessary buffer size
if (p < length(buf))
resize!(buf, p + 1)
return utf8(UTF16String(buf))
end
systemerror(:longpath, n == 0)
x = n < length(buf) # is the buffer big enough?
resize!(buf, n) # shrink if x, grow if !x
x && return UTF8String(utf16to8(buf))
end
end

Expand Down
2 changes: 1 addition & 1 deletion base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ include("iterator.jl")
# For OS specific stuff
include(UTF8String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "build_h.jl".data))) # include($BUILDROOT/base/build_h.jl)
include(UTF8String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "version_git.jl".data))) # include($BUILDROOT/base/version_git.jl)
include("c.jl")
include("osutils.jl")
include("c.jl")

# Core I/O
include("io.jl")
Expand Down
Loading