From 57e166b9c402e02c03525b1dacdadf39383386c8 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 26 Apr 2015 09:16:11 -0400 Subject: [PATCH] add Cstring/Cwstring types for safe passing of NUL-terminated strings to ccall (see #10958, #10991) --- NEWS.md | 15 +++-- base/c.jl | 34 +++++++++- base/client.jl | 10 +-- base/datafmt.jl | 10 +-- base/env.jl | 17 +++-- base/exports.jl | 2 + base/fftw.jl | 4 +- base/file.jl | 15 +++-- base/fs.jl | 12 ++-- base/gmp.jl | 20 ++++-- base/interactiveutil.jl | 5 +- base/iostream.jl | 2 +- base/libc.jl | 8 +-- base/libdl.jl | 8 +-- base/loading.jl | 7 ++- base/mmap.jl | 6 +- base/mpfr.jl | 2 +- base/path.jl | 4 +- base/pcre.jl | 2 +- base/pointer.jl | 4 +- base/poll.jl | 4 +- base/process.jl | 25 +++++--- base/sharedarray.jl | 4 +- base/show.jl | 4 +- base/socket.jl | 2 +- base/stat.jl | 4 +- base/stream.jl | 4 +- base/string.jl | 43 ++++++++----- base/sysinfo.jl | 2 +- doc/manual/calling-c-and-fortran-code.rst | 23 +++++-- src/ast.c | 18 +++--- src/builtins.c | 75 ++--------------------- src/flisp/cvalues.c | 7 ++- src/flisp/flisp.h | 1 + src/jlapi.c | 2 +- src/julia.h | 10 +-- src/toplevel.c | 4 +- test/core.jl | 4 +- test/file.jl | 12 ++++ test/mpfr.jl | 3 + test/parser.jl | 3 + test/regex.jl | 3 + test/socket.jl | 1 + test/spawn.jl | 8 +++ test/strings.jl | 17 +++++ test/sysinfo.jl | 5 ++ 46 files changed, 275 insertions(+), 200 deletions(-) diff --git a/NEWS.md b/NEWS.md index cbadec18bbe0b..09448e51684b9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -148,9 +148,9 @@ Library improvements * `Givens` type doesn't have a size anymore and is no longer a subtype of `AbstractMatrix` ([#8660]). - * Large speedup in sparse ``\`` and splitting of Cholesky and LDLᵀ factorizations into ``cholfact`` and ``ldltfact`` ([#10117]). + * Large speedup in sparse `\` and splitting of Cholesky and LDLᵀ factorizations into `cholfact` and `ldltfact` ([#10117]). - * Add sparse least squares to ``\`` by adding ``qrfact`` for sparse matrices based on the SPQR library. ([#10180]) + * Add sparse least squares to `\` by adding `qrfact` for sparse matrices based on the SPQR library. ([#10180]) * Split `Triangular` type into `UpperTriangular`, `LowerTriangular`, `UnitUpperTriagular` and `UnitLowerTriangular` ([#9779]) @@ -158,6 +158,10 @@ Library improvements * Strings + * NUL-terminated strings should now be passed to C via the new `Cstring` type, not `Ptr{UInt8}` or `Ptr{Cchar}`, + in order to check whether the string is free of NUL characters (which would cause silent truncation in C). + The analogous type `Cwstring` should be used for NUL-terminated `wchar_t*` strings ([#10994]). + * `graphemes(s)` returns an iterator over grapheme substrings of `s` ([#9261]). * Character predicates such as `islower()`, `isspace()`, etc. use @@ -1380,11 +1384,12 @@ Too numerous to mention. [#10659]: https://github.com/JuliaLang/julia/issues/10659 [#10679]: https://github.com/JuliaLang/julia/issues/10679 [#10709]: https://github.com/JuliaLang/julia/issues/10709 -[#10714]: https://github.com/JuliaLang/julia/pull/10714 +[#10714]: https://github.com/JuliaLang/julia/issues/10714 [#10747]: https://github.com/JuliaLang/julia/issues/10747 [#10844]: https://github.com/JuliaLang/julia/issues/10844 [#10870]: https://github.com/JuliaLang/julia/issues/10870 [#10885]: https://github.com/JuliaLang/julia/issues/10885 -[#10888]: https://github.com/JuliaLang/julia/pull/10888 -[#10893]: https://github.com/JuliaLang/julia/pull/10893 +[#10888]: https://github.com/JuliaLang/julia/issues/10888 +[#10893]: https://github.com/JuliaLang/julia/issues/10893 [#10914]: https://github.com/JuliaLang/julia/issues/10914 +[#10994]: https://github.com/JuliaLang/julia/issues/10994 diff --git a/base/c.jl b/base/c.jl index b15d4b3a72713..23f4cfe08ff42 100644 --- a/base/c.jl +++ b/base/c.jl @@ -1,6 +1,6 @@ # definitions related to C interface -import Core.Intrinsics.cglobal +import Core.Intrinsics: cglobal, box, unbox cfunction(f::Function, r, a) = ccall(:jl_function_ptr, Ptr{Void}, (Any, Any, Any), f, r, a) @@ -43,6 +43,34 @@ end typealias Coff_t FileOffset +# C NUL-terminated string pointers; these can be used in ccall +# instead of Ptr{Cchar} and Ptr{Cwchar_t}, respectively, to enforce +# a check for embedded NUL chars in the string (to avoid silent truncation). +if Int === Int64 + bitstype 64 Cstring + bitstype 64 Cwstring +else + bitstype 32 Cstring + bitstype 32 Cwstring +end + +convert{T<:Union(Int8,UInt8)}(::Type{Cstring}, p::Ptr{T}) = box(Cstring, unbox(Ptr{T}, p)) +convert(::Type{Cwstring}, p::Ptr{Cwchar_t}) = box(Cwstring, unbox(Ptr{Cwchar_t}, p)) + +containsnul(p::Ptr, len) = C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len) +function unsafe_convert(::Type{Cstring}, s::ByteString) + p = unsafe_convert(Ptr{Cchar}, s) + if containsnul(p, sizeof(s)) + throw(ArgumentError("embedded NUL chars are not allowed in C strings")) + end + return Cstring(p) +end + +# symbols are guaranteed not to contain embedded NUL +convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s)) + +# in string.jl: unsafe_convert(::Type{Cwstring}, s::WString) + # deferring (or un-deferring) ctrl-c handler for external C code that # is not interrupt safe (see also issue #2622). The sigatomic_begin/end # functions should always be called in matched pairs, ideally via: @@ -56,11 +84,11 @@ disable_sigint(f::Function) = try sigatomic_begin(); f(); finally sigatomic_end( reenable_sigint(f::Function) = try sigatomic_end(); f(); finally sigatomic_begin(); end function ccallable(f::Function, rt::Type, argt::Type, name::Union(AbstractString,Symbol)=string(f)) - ccall(:jl_extern_c, Void, (Any, Any, Any, Ptr{UInt8}), f, rt, argt, name) + ccall(:jl_extern_c, Void, (Any, Any, Any, Cstring), f, rt, argt, name) end function ccallable(f::Function, argt::Type, name::Union(AbstractString,Symbol)=string(f)) - ccall(:jl_extern_c, Void, (Any, Ptr{Void}, Any, Ptr{UInt8}), f, C_NULL, argt, name) + ccall(:jl_extern_c, Void, (Any, Ptr{Void}, Any, Cstring), f, C_NULL, argt, name) end macro ccallable(def) diff --git a/base/client.jl b/base/client.jl index d57510bab4592..125f8952d3962 100644 --- a/base/client.jl +++ b/base/client.jl @@ -150,18 +150,18 @@ function syntax_deprecation_warnings(f::Function, warn::Bool) end end -function parse_input_line(s::AbstractString) - # s = bytestring(s) +function parse_input_line(s::ByteString) # (expr, pos) = parse(s, 1) # (ex, pos) = ccall(:jl_parse_string, Any, - # (Ptr{UInt8},Int32,Int32), - # s, pos-1, 1) + # (Ptr{UInt8},Csize_t,Int32,Int32), + # s, sizeof(s), pos-1, 1) # if !is(ex,()) # throw(ParseError("extra input after end of expression")) # end # expr - ccall(:jl_parse_input_line, Any, (Ptr{UInt8},), s) + ccall(:jl_parse_input_line, Any, (Ptr{UInt8}, Csize_t), s, sizeof(s)) end +parse_input_line(s::AbstractString) = parse_input_line(bytestring(s)) function parse_input_line(io::IO) s = "" diff --git a/base/datafmt.jl b/base/datafmt.jl index 1a0db96a24805..58a5aac2934c7 100644 --- a/base/datafmt.jl +++ b/base/datafmt.jl @@ -334,13 +334,13 @@ function colval{T<:Integer, S<:ByteString}(sbuff::S, startpos::Int, endpos::Int, isnull(n) || (cells[row,col] = get(n)) isnull(n) end -function colval{S<:ByteString}(sbuff::S, startpos::Int, endpos::Int, cells::Array{Float64,2}, row::Int, col::Int) - n = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), sbuff, startpos-1, endpos-startpos+1) +function colval(sbuff::ByteString, startpos::Int, endpos::Int, cells::Array{Float64,2}, row::Int, col::Int) + n = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), sbuff, startpos-1, endpos-startpos+1) isnull(n) || (cells[row,col] = get(n)) isnull(n) end -function colval{S<:ByteString}(sbuff::S, startpos::Int, endpos::Int, cells::Array{Float32,2}, row::Int, col::Int) - n = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), sbuff, startpos-1, endpos-startpos+1) +function colval(sbuff::ByteString, startpos::Int, endpos::Int, cells::Array{Float32,2}, row::Int, col::Int) + n = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), sbuff, startpos-1, endpos-startpos+1) isnull(n) || (cells[row,col] = get(n)) isnull(n) end @@ -358,7 +358,7 @@ function colval{S<:ByteString}(sbuff::S, startpos::Int, endpos::Int, cells::Arra isnull(nb) || (cells[row,col] = get(nb); return false) # check float64 - nf64 = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), sbuff, startpos-1, endpos-startpos+1) + nf64 = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), sbuff, startpos-1, endpos-startpos+1) isnull(nf64) || (cells[row,col] = get(nf64); return false) end cells[row,col] = SubString(sbuff, startpos, endpos) diff --git a/base/env.jl b/base/env.jl index 921863ae36d70..8c15c4842a662 100644 --- a/base/env.jl +++ b/base/env.jl @@ -1,5 +1,5 @@ @unix_only begin - _getenv(var::AbstractString) = ccall(:getenv, Ptr{UInt8}, (Ptr{UInt8},), var) + _getenv(var::AbstractString) = ccall(:getenv, Ptr{UInt8}, (Cstring,), var) _hasenv(s::AbstractString) = _getenv(s) != C_NULL end @windows_only begin @@ -24,12 +24,11 @@ function FormatMessage(e=GetLastError()) return utf8(UTF16String(buf)) end -_getenvlen(var::UTF16String) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt8},UInt32),utf16(var),C_NULL,0) -_hasenv(s::UTF16String) = _getenvlen(s)!=0 || GetLastError()!=ERROR_ENVVAR_NOT_FOUND -_hasenv(s::AbstractString) = _hasenv(utf16(s)) +_getenvlen(var::AbstractString) = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Cwstring,Ptr{UInt8},UInt32),var,C_NULL,0) +_hasenv(s::AbstractString) = _getenvlen(s)!=0 || GetLastError()!=ERROR_ENVVAR_NOT_FOUND function _jl_win_getenv(s::UTF16String,len::UInt32) val=zeros(UInt16,len) - ret=ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),s,val,len) + ret=ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Cwstring,Ptr{UInt16},UInt32),s,val,len) if ret==0 || ret != len-1 || val[end] != 0 error(string("getenv: ", s, ' ', len, "-1 != ", ret, ": ", FormatMessage())) end @@ -62,13 +61,13 @@ end function _setenv(var::AbstractString, val::AbstractString, overwrite::Bool) @unix_only begin - ret = ccall(:setenv, Int32, (Ptr{UInt8},Ptr{UInt8},Int32), var, val, overwrite) + ret = ccall(:setenv, Int32, (Cstring,Cstring,Int32), var, val, overwrite) systemerror(:setenv, ret != 0) end @windows_only begin var = utf16(var) if overwrite || !_hasenv(var) - ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),utf16(var),utf16(val)) + ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Cwstring,Cwstring),var,val) systemerror(:setenv, ret == 0) end end @@ -78,11 +77,11 @@ _setenv(var::AbstractString, val::AbstractString) = _setenv(var, val, true) function _unsetenv(var::AbstractString) @unix_only begin - ret = ccall(:unsetenv, Int32, (Ptr{UInt8},), var) + ret = ccall(:unsetenv, Int32, (Cstring,), var) systemerror(:unsetenv, ret != 0) end @windows_only begin - ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),utf16(var),C_NULL) + ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Cwstring,Ptr{UInt16}),var,C_NULL) systemerror(:setenv, ret == 0) end end diff --git a/base/exports.jl b/base/exports.jl index 9522c64c14151..27557c9ab807d 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -145,6 +145,8 @@ export Culonglong, Cushort, Cwchar_t, + Cstring, + Cwstring, # Exceptions ArgumentError, diff --git a/base/fftw.jl b/base/fftw.jl index a8b740b553073..c2ea0a865e46e 100644 --- a/base/fftw.jl +++ b/base/fftw.jl @@ -71,7 +71,7 @@ typealias fftwTypeSingle Union(Type{Float32},Type{Complex64}) # FFTW's api/import-wisdom-from-file.c file]. function export_wisdom(fname::AbstractString) - f = ccall(:fopen, Ptr{Void}, (Ptr{UInt8},Ptr{UInt8}), fname, "w") + f = ccall(:fopen, Ptr{Void}, (Cstring,Ptr{UInt8}), fname, "w") systemerror("could not open wisdom file $fname for writing", f == C_NULL) ccall((:fftw_export_wisdom_to_file,libfftw), Void, (Ptr{Void},), f) ccall(:fputs, Int32, (Ptr{UInt8},Ptr{Void}), " "^256, f) @@ -80,7 +80,7 @@ function export_wisdom(fname::AbstractString) end function import_wisdom(fname::AbstractString) - f = ccall(:fopen, Ptr{Void}, (Ptr{UInt8},Ptr{UInt8}), fname, "r") + f = ccall(:fopen, Ptr{Void}, (Cstring,Ptr{UInt8}), fname, "r") systemerror("could not open wisdom file $fname for reading", f == C_NULL) if ccall((:fftw_import_wisdom_from_file,libfftw),Int32,(Ptr{Void},),f)==0|| ccall((:fftwf_import_wisdom_from_file,libfftwf),Int32,(Ptr{Void},),f)==0 diff --git a/base/file.jl b/base/file.jl index f5fdb1fda57b1..08f8e93171d8a 100644 --- a/base/file.jl +++ b/base/file.jl @@ -8,7 +8,7 @@ function pwd() end function cd(dir::AbstractString) - uv_error("chdir $dir", ccall(:uv_chdir, Cint, (Ptr{UInt8},), dir)) + uv_error("chdir $dir", ccall(:uv_chdir, Cint, (Cstring,), dir)) end cd() = cd(homedir()) @@ -35,8 +35,8 @@ end cd(f::Function) = cd(f, homedir()) function mkdir(path::AbstractString, mode::Unsigned=0o777) - @unix_only ret = ccall(:mkdir, Int32, (Ptr{UInt8},UInt32), path, mode) - @windows_only ret = ccall(:_wmkdir, Int32, (Ptr{UInt16},), utf16(path)) + @unix_only ret = ccall(:mkdir, Int32, (Cstring,UInt32), path, mode) + @windows_only ret = ccall(:_wmkdir, Int32, (Cwstring,), path) systemerror(:mkdir, ret != 0) end @@ -61,8 +61,8 @@ function rm(path::AbstractString; recursive::Bool=false) rm(joinpath(path, p), recursive=true) end end - @unix_only ret = ccall(:rmdir, Int32, (Ptr{UInt8},), path) - @windows_only ret = ccall(:_wrmdir, Int32, (Ptr{UInt16},), utf16(path)) + @unix_only ret = ccall(:rmdir, Int32, (Cstring,), path) + @windows_only ret = ccall(:_wrmdir, Int32, (Cwstring,), path) systemerror(:rmdir, ret != 0) end end @@ -168,8 +168,7 @@ end tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique) function tempname(temppath::AbstractString,uunique::UInt32) tname = Array(UInt16,32767) - uunique = ccall(:GetTempFileNameW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32,Ptr{UInt16}), - utf16(temppath),utf16("jul"),uunique,tname) + uunique = ccall(:GetTempFileNameW,stdcall,UInt32,(Cwstring,Ptr{UInt16},UInt32,Ptr{UInt16}), temppath,utf16("jul"),uunique,tname) lentname = findfirst(tname,0)-1 if uunique == 0 || lentname <= 0 error("GetTempFileName failed: $(FormatMessage())") @@ -223,7 +222,7 @@ function readdir(path::AbstractString) uv_readdir_req = zeros(UInt8, ccall(:jl_sizeof_uv_fs_t, Int32, ())) # defined in sys.c, to call uv_fs_readdir, which sets errno on error. - file_count = ccall(:jl_readdir, Int32, (Ptr{UInt8}, Ptr{UInt8}), + file_count = ccall(:jl_readdir, Int32, (Cstring, Ptr{UInt8}), path, uv_readdir_req) systemerror("unable to read directory $path", file_count < 0) diff --git a/base/fs.jl b/base/fs.jl index 829d7e175d0cb..eafa5585358b9 100644 --- a/base/fs.jl +++ b/base/fs.jl @@ -72,7 +72,7 @@ _uv_fs_result(req) = ccall(:jl_uv_fs_result,Int32,(Ptr{Void},),req) function open(f::File,flags::Integer,mode::Integer=0) req = Libc.malloc(_sizeof_uv_fs) - ret = ccall(:uv_fs_open,Int32,(Ptr{Void},Ptr{Void},Ptr{UInt8},Int32,Int32,Ptr{Void}), + ret = ccall(:uv_fs_open,Int32,(Ptr{Void},Ptr{Void},Cstring,Int32,Int32,Ptr{Void}), eventloop(), req, f.path, flags,mode, C_NULL) f.handle = _uv_fs_result(req) ccall(:uv_fs_req_cleanup,Void,(Ptr{Void},),req) @@ -96,7 +96,7 @@ function close(f::File) end function unlink(p::AbstractString) - err = ccall(:jl_fs_unlink, Int32, (Ptr{UInt8},), p) + err = ccall(:jl_fs_unlink, Int32, (Cstring,), p) uv_error("unlink",err) end function unlink(f::File) @@ -112,7 +112,7 @@ end # For move command function rename(src::AbstractString, dst::AbstractString) - err = ccall(:jl_fs_rename, Int32, (Ptr{UInt8}, Ptr{UInt8}), src, dst) + err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), src, dst) # on error, default to cp && rm if err < 0 @@ -159,7 +159,7 @@ end @non_windowsxp_only function symlink(p::AbstractString, np::AbstractString) flags = 0 @windows_only if isdir(p); flags |= UV_FS_SYMLINK_JUNCTION; p = abspath(p); end - err = ccall(:jl_fs_symlink, Int32, (Ptr{UInt8}, Ptr{UInt8}, Cint), p, np, flags) + err = ccall(:jl_fs_symlink, Int32, (Cstring, Cstring, Cint), p, np, flags) @windows_only if err < 0 Base.warn_once("Note: on Windows, creating file symlinks requires Administrator privileges.") end @@ -171,7 +171,7 @@ end function readlink(path::AbstractString) req = Libc.malloc(_sizeof_uv_fs) ret = ccall(:uv_fs_readlink, Int32, - (Ptr{Void}, Ptr{Void}, Ptr{UInt8}, Ptr{Void}), + (Ptr{Void}, Ptr{Void}, Cstring, Ptr{Void}), eventloop(), req, path, C_NULL) uv_error("readlink", ret) tgt = bytestring(ccall(:jl_uv_fs_t_ptr, Ptr{Cchar}, (Ptr{Void}, ), req)) @@ -181,7 +181,7 @@ function readlink(path::AbstractString) end function chmod(p::AbstractString, mode::Integer) - err = ccall(:jl_fs_chmod, Int32, (Ptr{UInt8}, Cint), p, mode) + err = ccall(:jl_fs_chmod, Int32, (Cstring, Cint), p, mode) uv_error("chmod",err) end diff --git a/base/gmp.jl b/base/gmp.jl index 37789529003a9..87f6287f19bd2 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -76,17 +76,25 @@ BigInt(x::BigInt) = x function tryparse_internal(::Type{BigInt}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) _n = Nullable{BigInt}() - sgn, base, i = Base.parseint_preamble(true,base,s,startpos,endpos) + + # don't make a copy in the common case where we are parsing a whole bytestring + bstr = startpos == start(s) && endpos == endof(s) ? bytestring(s) : bytestring(SubString(s,i,endpos)) + + sgn, base, i = Base.parseint_preamble(true,base,bstr,start(bstr),endof(bstr)) if i == 0 - raise && throw(ArgumentError("premature end of integer: $(repr(s))")) + raise && throw(ArgumentError("premature end of integer: $(repr(bstr))")) return _n end z = BigInt() - err = ccall((:__gmpz_set_str, :libgmp), - Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32), - &z, SubString(s,i,endpos), base) + if Base.containsnul(bstr) + err = -1 # embedded NUL char (not handled correctly by GMP) + else + err = ccall((:__gmpz_set_str, :libgmp), + Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32), + &z, pointer(bstr)+(i-start(bstr)), base) + end if err != 0 - raise && throw(ArgumentError("invalid BigInt: $(repr(s))")) + raise && throw(ArgumentError("invalid BigInt: $(repr(bstr))")) return _n end Nullable(sgn < 0 ? -z : z) diff --git a/base/interactiveutil.jl b/base/interactiveutil.jl index 5a5ecedeae236..abf9075c6cc8a 100644 --- a/base/interactiveutil.jl +++ b/base/interactiveutil.jl @@ -107,6 +107,9 @@ end @windows_only begin # TODO: these functions leak memory and memory locks if they throw an error function clipboard(x::AbstractString) + if containsnul(x) + throw(ArgumentError("Windows clipboard strings cannot contain NUL character")) + end systemerror(:OpenClipboard, 0==ccall((:OpenClipboard, "user32"), stdcall, Cint, (Ptr{Void},), C_NULL)) systemerror(:EmptyClipboard, 0==ccall((:EmptyClipboard, "user32"), stdcall, Cint, ())) x_u16 = utf16(x) @@ -355,7 +358,7 @@ end @windows_only function download(url::AbstractString, filename::AbstractString) res = ccall((:URLDownloadToFileW,:urlmon),stdcall,Cuint, - (Ptr{Void},Ptr{UInt16},Ptr{UInt16},Cint,Ptr{Void}),0,utf16(url),utf16(filename),0,0) + (Ptr{Void},Cwstring,Cwstring,Cint,Ptr{Void}),C_NULL,url,filename,0,0) if res != 0 error("automatic download failed (error: $res): $url") end diff --git a/base/iostream.jl b/base/iostream.jl index f315c9cac6b0d..b4e0b5840fc50 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -86,7 +86,7 @@ function open(fname::AbstractString, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff: s = IOStream(string("")) systemerror("opening file $fname", ccall(:ios_file, Ptr{Void}, - (Ptr{UInt8}, Ptr{UInt8}, Int32, Int32, Int32, Int32), + (Ptr{UInt8}, Cstring, Int32, Int32, Int32, Int32), s.ios, fname, rd, wr, cr, tr) == C_NULL) if ff systemerror("seeking to end of file $fname", ccall(:ios_seek_end, FileOffset, (Ptr{Void},), s.ios) != 0) diff --git a/base/libc.jl b/base/libc.jl index 4f8dbab0c64f8..9966897507a83 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -32,8 +32,8 @@ modestr(s::IO) = modestr(isreadable(s), iswritable(s)) modestr(r::Bool, w::Bool) = r ? (w ? "r+" : "r") : (w ? "w" : throw(ArgumentError("neither readable nor writable"))) function FILE(fd, mode) - @unix_only FILEp = ccall(:fdopen, Ptr{Void}, (Cint, Ptr{UInt8}), convert(Cint, fd), mode) - @windows_only FILEp = ccall(:_fdopen, Ptr{Void}, (Cint, Ptr{UInt8}), convert(Cint, fd), mode) + @unix_only FILEp = ccall(:fdopen, Ptr{Void}, (Cint, Cstring), convert(Cint, fd), mode) + @windows_only FILEp = ccall(:_fdopen, Ptr{Void}, (Cint, Cstring), convert(Cint, fd), mode) systemerror("fdopen", FILEp == C_NULL) FILE(FILEp) end @@ -98,7 +98,7 @@ strftime(t) = strftime("%c", t) strftime(fmt::AbstractString, t::Real) = strftime(fmt, TmStruct(t)) function strftime(fmt::AbstractString, tm::TmStruct) timestr = Array(UInt8, 128) - n = ccall(:strftime, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Ptr{TmStruct}), + n = ccall(:strftime, Int, (Ptr{UInt8}, Int, Cstring, Ptr{TmStruct}), timestr, length(timestr), fmt, &tm) if n == 0 return "" @@ -109,7 +109,7 @@ end strptime(timestr::AbstractString) = strptime("%c", timestr) function strptime(fmt::AbstractString, timestr::AbstractString) tm = TmStruct() - r = ccall(:strptime, Ptr{UInt8}, (Ptr{UInt8}, Ptr{UInt8}, Ptr{TmStruct}), + r = ccall(:strptime, Ptr{UInt8}, (Cstring, Cstring, Ptr{TmStruct}), timestr, fmt, &tm) # the following would tell mktime() that this is a local time, and that # it should try to guess the timezone. not sure if/how this should be diff --git a/base/libdl.jl b/base/libdl.jl index 099f29dfe7034..dd3fdc1703425 100644 --- a/base/libdl.jl +++ b/base/libdl.jl @@ -20,21 +20,21 @@ const RTLD_FIRST = 0x00000040 function dlsym(hnd::Ptr, s::Union(Symbol,AbstractString)) hnd == C_NULL && error("NULL library handle") - ccall(:jl_dlsym, Ptr{Void}, (Ptr{Void}, Ptr{UInt8}), hnd, s) + ccall(:jl_dlsym, Ptr{Void}, (Ptr{Void}, Cstring), hnd, s) end function dlsym_e(hnd::Ptr, s::Union(Symbol,AbstractString)) hnd == C_NULL && error("NULL library handle") - ccall(:jl_dlsym_e, Ptr{Void}, (Ptr{Void}, Ptr{UInt8}), hnd, s) + ccall(:jl_dlsym_e, Ptr{Void}, (Ptr{Void}, Cstring), hnd, s) end dlopen(s::Symbol, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND) = dlopen(string(s), flags) dlopen(s::AbstractString, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND) = - ccall(:jl_load_dynamic_library, Ptr{Void}, (Ptr{UInt8},UInt32), s, flags) + ccall(:jl_load_dynamic_library, Ptr{Void}, (Cstring,UInt32), s, flags) dlopen_e(s::AbstractString, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND) = - ccall(:jl_load_dynamic_library_e, Ptr{Void}, (Ptr{UInt8},UInt32), s, flags) + ccall(:jl_load_dynamic_library_e, Ptr{Void}, (Cstring,UInt32), s, flags) dlopen_e(s::Symbol, flags::Integer = RTLD_LAZY | RTLD_DEEPBIND) = dlopen_e(string(s), flags) diff --git a/base/loading.jl b/base/loading.jl index dc22c0ff945a2..4c086adc47ab2 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -95,8 +95,11 @@ end # remote/parallel load -include_string(txt::AbstractString, fname::AbstractString) = - ccall(:jl_load_file_string, Any, (Ptr{UInt8},Ptr{UInt8}), txt, fname) +include_string(txt::ByteString, fname::ByteString) = + ccall(:jl_load_file_string, Any, (Ptr{UInt8},Csize_t,Ptr{UInt8},Csize_t), + txt, sizeof(txt), fname, sizeof(fname)) + +include_string(txt::AbstractString, fname::AbstractString) = include_string(bytestring(txt), bytestring(fname)) include_string(txt::AbstractString) = include_string(txt, "string") diff --git a/base/mmap.jl b/base/mmap.jl index bd87b80faccbd..8f1db6a29851c 100644 --- a/base/mmap.jl +++ b/base/mmap.jl @@ -96,7 +96,7 @@ function mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::Union(IO,SharedM if Int(hdl) == -1 error("could not get handle for file to map: $(FormatMessage())") end - name = C_NULL + name = Ptr{Cwchar_t}(C_NULL) ro = isreadonly(s) create = true else @@ -121,10 +121,10 @@ function mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::Union(IO,SharedM access = ro ? 4 : 2 if create flprotect = ro ? 0x02 : 0x04 - mmaphandle = ccall(:CreateFileMappingW, stdcall, Ptr{Void}, (Cptrdiff_t, Ptr{Void}, Cint, Cint, Cint, Ptr{UInt16}), + mmaphandle = ccall(:CreateFileMappingW, stdcall, Ptr{Void}, (Cptrdiff_t, Ptr{Void}, Cint, Cint, Cint, Cwstring), hdl, C_NULL, flprotect, szfile>>32, szfile&typemax(UInt32), name) else - mmaphandle = ccall(:OpenFileMappingW, stdcall, Ptr{Void}, (Cint, Cint, Ptr{UInt16}), + mmaphandle = ccall(:OpenFileMappingW, stdcall, Ptr{Void}, (Cint, Cint, Cwstring), access, true, name) end if mmaphandle == C_NULL diff --git a/base/mpfr.jl b/base/mpfr.jl index 24c7909b93e3d..6e502cadfd8f0 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -88,7 +88,7 @@ BigFloat(x::Rational) = BigFloat(num(x)) / BigFloat(den(x)) function tryparse(::Type{BigFloat}, s::AbstractString, base::Int=0) z = BigFloat() - err = ccall((:mpfr_set_str, :libmpfr), Int32, (Ptr{BigFloat}, Ptr{UInt8}, Int32, Int32), &z, s, base, ROUNDING_MODE[end]) + err = ccall((:mpfr_set_str, :libmpfr), Int32, (Ptr{BigFloat}, Cstring, Int32, Int32), &z, s, base, ROUNDING_MODE[end]) err == 0 ? Nullable(z) : Nullable{BigFloat}() end diff --git a/base/path.jl b/base/path.jl index 0ca44717d3ec8..21bec32844e90 100644 --- a/base/path.jl +++ b/base/path.jl @@ -113,7 +113,7 @@ abspath(a::AbstractString, b::AbstractString...) = abspath(joinpath(a,b...)) buflength = p buf = zeros(UInt16,buflength) p = ccall((:GetFullPathNameW, "Kernel32"), stdcall, - UInt32, (Ptr{UInt16}, UInt32, Ptr{UInt16}, Ptr{Void}), + UInt32, (Cwstring, UInt32, Ptr{UInt16}, Ptr{Void}), path, buflength, buf, C_NULL) systemerror(:realpath, p == 0) if (p < buflength) @@ -124,7 +124,7 @@ abspath(a::AbstractString, b::AbstractString...) = abspath(joinpath(a,b...)) end @unix_only function realpath(path::AbstractString) - p = ccall(:realpath, Ptr{UInt8}, (Ptr{UInt8}, Ptr{UInt8}), path, C_NULL) + p = ccall(:realpath, Ptr{UInt8}, (Cstring, Ptr{UInt8}), path, C_NULL) systemerror(:realpath, p == C_NULL) s = bytestring(p) Libc.free(p) diff --git a/base/pcre.jl b/base/pcre.jl index fcc14c43a4831..eba57886913d1 100644 --- a/base/pcre.jl +++ b/base/pcre.jl @@ -87,7 +87,7 @@ function compile(pattern::AbstractString, options::Integer) errstr[1] = C_NULL erroff = zeros(Int32,1) re_ptr = ccall((:pcre_compile, :libpcre), Ptr{Void}, - (Ptr{UInt8}, Int32, Ptr{Ptr{UInt8}}, Ptr{Int32}, Ptr{UInt8}), + (Cstring, Int32, Ptr{Ptr{UInt8}}, Ptr{Int32}, Ptr{UInt8}), pattern, options, errstr, erroff, C_NULL) if re_ptr == C_NULL error("$(bytestring(errstr[1]))", diff --git a/base/pointer.jl b/base/pointer.jl index de063b4c0660c..115ee660fecab 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -19,9 +19,11 @@ unsafe_convert(::Type{Ptr{UInt8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{UInt8 unsafe_convert(::Type{Ptr{Int8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{Int8}, (Any,), x) unsafe_convert(::Type{Ptr{UInt8}}, s::ByteString) = unsafe_convert(Ptr{UInt8}, s.data) unsafe_convert(::Type{Ptr{Int8}}, s::ByteString) = convert(Ptr{Int8}, unsafe_convert(Ptr{UInt8}, s.data)) -# convert strings to ByteString to pass as pointers +# convert strings to ByteString etc. to pass as pointers cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = bytestring(s) cconvert(::Type{Ptr{Int8}}, s::AbstractString) = bytestring(s) +cconvert(::Type{Cstring}, s::AbstractString) = bytestring(s) +cconvert(::Type{Cwstring}, s::AbstractString) = wstring(s) unsafe_convert{T}(::Type{Ptr{T}}, a::Array{T}) = ccall(:jl_array_ptr, Ptr{T}, (Any,), a) unsafe_convert(::Type{Ptr{Void}}, a::Array) = ccall(:jl_array_ptr, Ptr{Void}, (Any,), a) diff --git a/base/poll.jl b/base/poll.jl index 2a9a2b62b16dc..e827bebbaa393 100644 --- a/base/poll.jl +++ b/base/poll.jl @@ -5,7 +5,7 @@ type FileMonitor notify::Condition function FileMonitor(cb, file) handle = Libc.malloc(_sizeof_uv_fs_event) - err = ccall(:jl_fs_event_init,Int32, (Ptr{Void}, Ptr{Void}, Ptr{UInt8}, Int32), eventloop(),handle,file,0) + err = ccall(:jl_fs_event_init,Int32, (Ptr{Void}, Ptr{Void}, Cstring, Int32), eventloop(),handle,file,0) if err < 0 ccall(:uv_fs_event_stop, Int32, (Ptr{Void},), handle) disassociate_julia_struct(handle) @@ -253,7 +253,7 @@ start_watching(f::Function, t::FDWatcher, events::FDEvent) = (t.cb = f; start_wa function start_watching(t::PollingFileWatcher, interval=2.0) associate_julia_struct(t.handle, t) uv_error("start_watching (File)", - ccall(:jl_fs_poll_start, Int32, (Ptr{Void},Ptr{UInt8},UInt32), + ccall(:jl_fs_poll_start, Int32, (Ptr{Void},Cstring,UInt32), t.handle, t.file, round(UInt32,interval*1000))) end start_watching(f::Function, t::PollingFileWatcher, interval=2.0) = (t.cb = f;start_watching(t,interval)) diff --git a/base/process.jl b/base/process.jl index ce8311a05972c..ed18a4a16f828 100644 --- a/base/process.jl +++ b/base/process.jl @@ -117,10 +117,19 @@ ignorestatus(cmd::Cmd) = (cmd.ignorestatus=true; cmd) ignorestatus(cmd::Union(OrCmds,AndCmds)) = (ignorestatus(cmd.a); ignorestatus(cmd.b); cmd) detach(cmd::Cmd) = (cmd.detach=true; cmd) -setenv{S<:ByteString}(cmd::Cmd, env::Array{S}; dir="") = (cmd.env = ByteString[x for x in env]; setenv(cmd, dir=dir); cmd) -setenv(cmd::Cmd, env::Associative; dir="") = (cmd.env = ByteString[string(k)*"="*string(v) for (k,v) in env]; setenv(cmd, dir=dir); cmd) -setenv{T<:AbstractString}(cmd::Cmd, env::Pair{T}...; dir="") = (cmd.env = ByteString[k*"="*string(v) for (k,v) in env]; setenv(cmd, dir=dir); cmd) -setenv(cmd::Cmd; dir="") = (cmd.dir = dir; cmd) +# like bytestring(s), but throw an error if s contains NUL, since +# libuv requires NUL-terminated strings +function cstr(s) + if Base.containsnul(s) + throw(ArgumentError("strings containing NUL cannot be passed to spawned processes")) + end + return bytestring(s) +end + +setenv{S<:ByteString}(cmd::Cmd, env::Array{S}; dir="") = (cmd.env = ByteString[cstr(x) for x in env]; setenv(cmd, dir=dir); cmd) +setenv(cmd::Cmd, env::Associative; dir="") = (cmd.env = ByteString[cstr(string(k)*"="*string(v)) for (k,v) in env]; setenv(cmd, dir=dir); cmd) +setenv{T<:AbstractString}(cmd::Cmd, env::Pair{T}...; dir="") = (cmd.env = ByteString[cstr(k*"="*string(v)) for (k,v) in env]; setenv(cmd, dir=dir); cmd) +setenv(cmd::Cmd; dir="") = (cmd.dir = cstr(dir); cmd) (&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right) redir_out(src::AbstractCmd, dest::AbstractCmd) = OrCmds(src, dest) @@ -559,18 +568,18 @@ end ## implementation of `cmd` syntax ## arg_gen() = ByteString[] -arg_gen(x::AbstractString) = ByteString[x] +arg_gen(x::AbstractString) = ByteString[cstr(x)] arg_gen(cmd::Cmd) = cmd.exec function arg_gen(head) if applicable(start, head) vals = ByteString[] for x in head - push!(vals, string(x)) + push!(vals, cstr(string(x))) end return vals else - return ByteString[string(head)] + return ByteString[cstr(string(head))] end end @@ -579,7 +588,7 @@ function arg_gen(head, tail...) tail = arg_gen(tail...) vals = ByteString[] for h = head, t = tail - push!(vals, bytestring(h, t)) + push!(vals, cstr(bytestring(h, t))) end vals end diff --git a/base/sharedarray.jl b/base/sharedarray.jl index 18e2711940181..a562408f58c16 100644 --- a/base/sharedarray.jl +++ b/base/sharedarray.jl @@ -390,8 +390,8 @@ function _shm_mmap_array(T, dims, shm_seg_name, mode) mmap_array(T, dims, s, zero(FileOffset), grow=false) end -shm_unlink(shm_seg_name) = ccall(:shm_unlink, Cint, (Ptr{UInt8},), shm_seg_name) -shm_open(shm_seg_name, oflags, permissions) = ccall(:shm_open, Int, (Ptr{UInt8}, Int, Int), shm_seg_name, oflags, permissions) +shm_unlink(shm_seg_name) = ccall(:shm_unlink, Cint, (Cstring,), shm_seg_name) +shm_open(shm_seg_name, oflags, permissions) = ccall(:shm_open, Int, (Cstring, Int, Int), shm_seg_name, oflags, permissions) end # @unix_only diff --git a/base/show.jl b/base/show.jl index 56be84cf590d9..87abbd9390482 100644 --- a/base/show.jl +++ b/base/show.jl @@ -274,8 +274,8 @@ function isidentifier(s::AbstractString) end isidentifier(s::Symbol) = isidentifier(string(s)) -isoperator(s::Symbol) = ccall(:jl_is_operator, Cint, (Ptr{UInt8},), s) != 0 -operator_precedence(s::Symbol) = Int(ccall(:jl_operator_precedence, Cint, (Ptr{UInt8},), s)) +isoperator(s::Symbol) = ccall(:jl_is_operator, Cint, (Cstring,), s) != 0 +operator_precedence(s::Symbol) = Int(ccall(:jl_operator_precedence, Cint, (Cstring,), s)) operator_precedence(x::Any) = 0 # fallback for generic expression nodes const prec_power = operator_precedence(:(^)) diff --git a/base/socket.jl b/base/socket.jl index 9a7378433de2e..69c801b272f19 100644 --- a/base/socket.jl +++ b/base/socket.jl @@ -574,7 +574,7 @@ end function getaddrinfo(cb::Function, host::ASCIIString) callback_dict[cb] = cb - uv_error("getaddrinfo",ccall(:jl_getaddrinfo, Int32, (Ptr{Void}, Ptr{UInt8}, Ptr{UInt8}, Any), + uv_error("getaddrinfo",ccall(:jl_getaddrinfo, Int32, (Ptr{Void}, Cstring, Ptr{UInt8}, Any), eventloop(), host, C_NULL, cb)) end getaddrinfo(cb::Function, host::AbstractString) = getaddrinfo(cb,ascii(host)) diff --git a/base/stat.jl b/base/stat.jl index d90f483dea684..064a8e1d45d02 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -48,8 +48,8 @@ end stat(fd::RawFD) = @stat_call jl_fstat Int32 fd.fd stat(fd::Integer) = @stat_call jl_fstat Int32 fd -stat(path::AbstractString) = @stat_call jl_stat Ptr{UInt8} path -lstat(path::AbstractString) = @stat_call jl_lstat Ptr{UInt8} path +stat(path::AbstractString) = @stat_call jl_stat Cstring path +lstat(path::AbstractString) = @stat_call jl_lstat Cstring path stat(path...) = stat(joinpath(path...)) lstat(path...) = lstat(joinpath(path...)) diff --git a/base/stream.jl b/base/stream.jl index e36c1b6301b3c..1c534cdf4873f 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -890,7 +890,7 @@ end function bind(server::PipeServer, name::AbstractString) @assert server.status == StatusInit - err = ccall(:uv_pipe_bind, Int32, (Ptr{Void}, Ptr{UInt8}), + err = ccall(:uv_pipe_bind, Int32, (Ptr{Void}, Cstring), server.handle, name) if err != 0 if err != UV_EADDRINUSE && err != UV_EACCES @@ -916,7 +916,7 @@ function connect!(sock::Pipe, path::AbstractString) @assert sock.status == StatusInit req = Libc.malloc(_sizeof_uv_connect) uv_req_set_data(req,C_NULL) - ccall(:uv_pipe_connect, Void, (Ptr{Void}, Ptr{Void}, Ptr{UInt8}, Ptr{Void}), req, sock.handle, path, uv_jl_connectcb::Ptr{Void}) + ccall(:uv_pipe_connect, Void, (Ptr{Void}, Ptr{Void}, Cstring, Ptr{Void}), req, sock.handle, path, uv_jl_connectcb::Ptr{Void}) sock.status = StatusConnecting sock end diff --git a/base/string.jl b/base/string.jl index 44a8cbb1add44..011fe80d95393 100644 --- a/base/string.jl +++ b/base/string.jl @@ -40,6 +40,7 @@ function bytestring(p::Union(Ptr{UInt8},Ptr{Int8})) p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) : ccall(:jl_cstr_to_string, ByteString, (Ptr{UInt8},), p) end +bytestring(s::Cstring) = bytestring(box(Ptr{Cchar}, unbox(Cstring,s))) function bytestring(p::Union(Ptr{UInt8},Ptr{Int8}),len::Integer) p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) : @@ -527,7 +528,7 @@ endswith(str::AbstractString, chars::Chars) = !isempty(str) && str[end] in chars # faster comparisons for byte strings and symbols cmp(a::ByteString, b::ByteString) = lexcmp(a.data, b.data) -cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b))) +cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a, b))) ==(a::ByteString, b::ByteString) = endof(a) == endof(b) && cmp(a,b) == 0 isless(a::Symbol, b::Symbol) = cmp(a,b) < 0 @@ -641,13 +642,6 @@ function getindex(s::AbstractString, r::UnitRange{Int}) SubString(s, first(r), last(r)) end -function unsafe_convert{P<:Union(Int8,UInt8),T<:ByteString}(::Type{Ptr{P}}, s::SubString{T}) - if s.offset+s.endof < endof(s.string) - throw(ArgumentError("a SubString must coincide with the end of the original string to be convertible to pointer")) - end - convert(Ptr{P}, s.string.data) + s.offset -end - isascii(s::SubString{ASCIIString}) = true function cmp{T<:ByteString,S<:ByteString}(a::SubString{T}, b::SubString{S}) @@ -1243,9 +1237,10 @@ shell_escape(args::AbstractString...) = sprint(print_shell_escaped, args...) function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true) # returns (expr, end_pos). expr is () in case of parse error. + bstr = bytestring(str) ex, pos = ccall(:jl_parse_string, Any, - (Ptr{UInt8}, Int32, Int32), - str, pos-1, greedy ? 1:0) + (Ptr{UInt8}, Csize_t, Int32, Int32), + bstr, sizeof(bstr), pos-1, greedy ? 1:0) if raise && isa(ex,Expr) && is(ex.head,:error) throw(ParseError(ex.args[1])) end @@ -1619,11 +1614,13 @@ string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x) ## string to float functions ## -tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s) -tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) +tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s)) +tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof) + +tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s)) +tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof) -tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s) -tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof) +tryparse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s)) function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString) nf = tryparse(T, s) @@ -1710,13 +1707,27 @@ function repr(x) takebuf_string(s) end +containsnul(s::AbstractString) = '\0' in s +containsnul(s::ByteString) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s)) +containsnul(s::Union(UTF16String,UTF32String)) = findfirst(s.data, 0) != length(s.data) + if sizeof(Cwchar_t) == 2 - const WString = UTF16String # const, not typealias, to get constructor + const WString = UTF16String const wstring = utf16 elseif sizeof(Cwchar_t) == 4 - const WString = UTF32String # const, not typealias, to get constructor + const WString = UTF32String const wstring = utf32 end +wstring(s::Cwstring) = wstring(box(Ptr{Cwchar_t}, unbox(Cwstring,s))) + +# Cwstring is defined in c.jl, but conversion needs to be defined here +# to have WString +function unsafe_convert(::Type{Cwstring}, s::WString) + if containsnul(s) + throw(ArgumentError("embedded NUL chars are not allowed in C strings")) + end + return Cwstring(unsafe_convert(Ptr{Cwchar_t}, s)) +end # pointer conversions of ASCII/UTF8/UTF16/UTF32 strings: pointer(x::Union(ByteString,UTF16String,UTF32String)) = pointer(x.data) diff --git a/base/sysinfo.jl b/base/sysinfo.jl index 9069b510e673a..93cbc3e2fba97 100644 --- a/base/sysinfo.jl +++ b/base/sysinfo.jl @@ -140,7 +140,7 @@ function get_process_title() return bytestring(pointer(buf)) end function set_process_title(title::AbstractString) - err = ccall(:uv_set_process_title, Cint, (Ptr{UInt8},), bytestring(title)) + err = ccall(:uv_set_process_title, Cint, (Cstring,), title) uv_error("set_process_title", err) end diff --git a/doc/manual/calling-c-and-fortran-code.rst b/doc/manual/calling-c-and-fortran-code.rst index e8a3ebb05c134..8b22202f8036f 100644 --- a/doc/manual/calling-c-and-fortran-code.rst +++ b/doc/manual/calling-c-and-fortran-code.rst @@ -386,8 +386,8 @@ Julia type with the same name, prefixed by C. This can help for writing portable | ``T*`` (where T represents an | | | ``Ref{T}`` | | appropriately defined type) | | | | +-----------------------------------+-----------------+----------------------+-----------------------------------+ -| ``char*`` | ``CHARACTER*N`` | | ``Ptr{UInt8}`` | -| (or ``char[]``, e.g. a string) | | | | +| ``char*`` | ``CHARACTER*N`` | | ``Cstring`` if NUL-terminated, or | +| (or ``char[]``, e.g. a string) | | | ``Ptr{UInt8}`` if not | +-----------------------------------+-----------------+----------------------+-----------------------------------+ | ``char**`` (or ``*char[]``) | | | ``Ptr{Ptr{UInt8}}`` | +-----------------------------------+-----------------+----------------------+-----------------------------------+ @@ -405,6 +405,12 @@ Julia type with the same name, prefixed by C. This can help for writing portable | | | | argument types are not supported) | +-----------------------------------+-----------------+----------------------+-----------------------------------+ +The ``Cstring`` type is essentially a synonym for ``Ptr{UInt8}``, except the conversion to ``Cstring`` throws an +error if the Julia string contains any embedded NUL characters (which would cause the string to be silently +truncated if the C routine treats NUL as the terminator). If you are passing a ``char*`` to a C routine that +does not assume NUL termination (e.g. because you pass an explicit string length), or if you know for certain that +your Julia string does not contain NUL and want to skip the check, you can use ``Ptr{UInt8}`` as the argument type. + **System-dependent:** ====================== ====================== ======= @@ -430,20 +436,25 @@ C name Standard Julia Alias Julia Base Type `Remember`: when calling a Fortran function, all inputs must be passed by reference, so all type correspondences above should contain an additional ``Ptr{..}`` or ``Ref{..}`` wrapper around their type specification. -`Warning`: For string arguments (``char*``) the Julia type should be ``Ptr{Cchar}``, +`Warning`: For string arguments (``char*``) the Julia type should be ``Cstring`` (if NUL-terminated data is expected) +or either ``Ptr{Cchar}`` or ``Ptr{UInt8}`` otherwise (these two pointer types have the same effect), as described above, not ``ASCIIString``. Similarly, for array arguments (``T[]`` or ``T*``), the Julia type should again be ``Ptr{T}``, not ``Vector{T}``. `Warning`: Julia's ``Char`` type is 32 bits, which is not the same as the wide character type (``wchar_t`` or ``wint_t``) on all platforms. -`Note`: For ``wchar_t*`` arguments, the Julia type should be ``Ptr{Wchar_t}``, +`Note`: For ``wchar_t*`` arguments, the Julia type should be ``Cwstring`` (if the C routine +expects a NUL-terminated string) or ``Ptr{Cwchar_t}`` otherwise, and data can be converted to/from ordinary Julia strings by the ``wstring(s)`` function (equivalent to either ``utf16(s)`` or ``utf32(s)`` -depending upon the width of ``Cwchar_t``. Note also that ASCII, UTF-8, +depending upon the width of ``Cwchar_t``); this conversion will be called +automatically for ``Cwstring`` arguments. Note also that ASCII, UTF-8, UTF-16, and UTF-32 string data in Julia is internally NUL-terminated, so it can be passed to C functions expecting NUL-terminated data without making -a copy. +a copy (but using the ``Cwstring`` type will cause an error to be thrown +if the string itself contains NUL characters). + `Note`: C functions that take an argument of the type ``char**`` can be called by using a ``Ptr{Ptr{UInt8}}`` type within Julia. For example, C functions of the form:: diff --git a/src/ast.c b/src/ast.c index 76c109554bd57..0b6de67a9dca1 100644 --- a/src/ast.c +++ b/src/ast.c @@ -490,9 +490,9 @@ static value_t julia_to_scm_(jl_value_t *v) } // this is used to parse a line of repl input -DLLEXPORT jl_value_t *jl_parse_input_line(const char *str) +DLLEXPORT jl_value_t *jl_parse_input_line(const char *str, size_t len) { - value_t s = cvalue_static_cstring(str); + value_t s = cvalue_static_cstrn(str, len); value_t e = fl_applyn(1, symbol_value(symbol("jl-parse-string")), s); if (e == FL_EOF) return jl_nothing; @@ -502,9 +502,10 @@ DLLEXPORT jl_value_t *jl_parse_input_line(const char *str) // this is for parsing one expression out of a string, keeping track of // the current position. -DLLEXPORT jl_value_t *jl_parse_string(const char *str, int pos0, int greedy) +DLLEXPORT jl_value_t *jl_parse_string(const char *str, size_t len, + int pos0, int greedy) { - value_t s = cvalue_static_cstring(str); + value_t s = cvalue_static_cstrn(str, len); value_t p = fl_applyn(3, symbol_value(symbol("jl-parse-one-string")), s, fixnum(pos0), greedy?FL_T:FL_F); jl_value_t *expr=NULL, *pos1=NULL; @@ -566,16 +567,17 @@ jl_value_t *jl_parse_next(void) return scm_to_julia(c,0); } -jl_value_t *jl_load_file_string(const char *text, char *filename) +jl_value_t *jl_load_file_string(const char *text, size_t len, + char *filename, size_t namelen) { value_t t, f; - t = cvalue_static_cstring(text); + t = cvalue_static_cstrn(text, len); fl_gc_handle(&t); - f = cvalue_static_cstring(filename); + f = cvalue_static_cstrn(filename, namelen); fl_applyn(2, symbol_value(symbol("jl-parse-string-stream")), t, f); fl_free_gc_handles(1); - return jl_parse_eval_all(filename); + return jl_parse_eval_all(filename, namelen); } // returns either an expression or a thunk diff --git a/src/builtins.c b/src/builtins.c index 556b6d0dc1264..c886309af71ff 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -774,7 +774,7 @@ int str_isspace(char *p) return 1; } -DLLEXPORT jl_nullable_float64_t jl_try_substrtod(char *str, size_t offset, int len) +DLLEXPORT jl_nullable_float64_t jl_try_substrtod(char *str, size_t offset, size_t len) { char *p; char *bstr = str+offset; @@ -811,29 +811,7 @@ DLLEXPORT jl_nullable_float64_t jl_try_substrtod(char *str, size_t offset, int l return ret; } -DLLEXPORT jl_nullable_float64_t jl_try_strtod(char *str) -{ - char *p; - int err = 0; - - errno = 0; - double out = strtod_c(str, &p); - - if (errno==ERANGE && (out==0 || out==HUGE_VAL || out==-HUGE_VAL)) { - err = 1; - } - else if (p == str) { - err = 1; - } - else { - err = str_isspace(p) ? 0 : 1; - } - - jl_nullable_float64_t ret = {(uint8_t)err, out}; - return ret; -} - -DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) +DLLEXPORT int jl_substrtod(char *str, size_t offset, size_t len, double *out) { jl_nullable_float64_t nd = jl_try_substrtod(str, offset, len); if (0 == nd.isnull) { @@ -843,22 +821,12 @@ DLLEXPORT int jl_substrtod(char *str, size_t offset, int len, double *out) return 1; } -DLLEXPORT int jl_strtod(char *str, double *out) -{ - jl_nullable_float64_t nd = jl_try_strtod(str); - if (0 == nd.isnull) { - *out = nd.value; - return 0; - } - return 1; -} - // MSVC pre-2013 did not define HUGE_VALF #ifndef HUGE_VALF #define HUGE_VALF (1e25f * 1e25f) #endif -DLLEXPORT jl_nullable_float32_t jl_try_substrtof(char *str, size_t offset, int len) +DLLEXPORT jl_nullable_float32_t jl_try_substrtof(char *str, size_t offset, size_t len) { char *p; char *bstr = str+offset; @@ -899,32 +867,7 @@ DLLEXPORT jl_nullable_float32_t jl_try_substrtof(char *str, size_t offset, int l return ret; } -DLLEXPORT jl_nullable_float32_t jl_try_strtof(char *str) -{ - char *p; - int err = 0; - - errno = 0; -#if defined(_OS_WINDOWS_) && !defined(_COMPILER_MINGW_) - float out = (float)strtod_c(str, &p); -#else - float out = strtof_c(str, &p); -#endif - if (errno==ERANGE && (out==0 || out==HUGE_VALF || out==-HUGE_VALF)) { - err = 1; - } - else if (p == str) { - err = 1; - } - else { - err = str_isspace(p) ? 0 : 1; - } - - jl_nullable_float32_t ret = {(uint8_t)err, out}; - return ret; -} - -DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) +DLLEXPORT int jl_substrtof(char *str, int offset, size_t len, float *out) { jl_nullable_float32_t nf = jl_try_substrtof(str, offset, len); if (0 == nf.isnull) { @@ -934,16 +877,6 @@ DLLEXPORT int jl_substrtof(char *str, int offset, int len, float *out) return 1; } -DLLEXPORT int jl_strtof(char *str, float *out) -{ - jl_nullable_float32_t nf = jl_try_strtof(str); - if (0 == nf.isnull) { - *out = nf.value; - return 0; - } - return 1; -} - // showing -------------------------------------------------------------------- void jl_flush_cstdio(void) diff --git a/src/flisp/cvalues.c b/src/flisp/cvalues.c index 0f3e493b45ed0..582e4a724902f 100644 --- a/src/flisp/cvalues.c +++ b/src/flisp/cvalues.c @@ -203,9 +203,14 @@ value_t cvalue_string(size_t sz) return cvalue(stringtype, sz); } +value_t cvalue_static_cstrn(const char *str, size_t n) +{ + return cvalue_from_ref(stringtype, (char*)str, n, NIL); +} + value_t cvalue_static_cstring(const char *str) { - return cvalue_from_ref(stringtype, (char*)str, strlen(str), NIL); + return cvalue_static_cstrn(str, strlen(str)); } value_t string_from_cstrn(char *str, size_t n) diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h index 3ab9ec787b440..d97987833ddf4 100644 --- a/src/flisp/flisp.h +++ b/src/flisp/flisp.h @@ -346,6 +346,7 @@ size_t cvalue_arraylen(value_t v); value_t size_wrap(size_t sz); size_t tosize(value_t n, char *fname); value_t cvalue_string(size_t sz); +value_t cvalue_static_cstrn(const char *str, size_t n); value_t cvalue_static_cstring(const char *str); value_t string_from_cstr(char *str); value_t string_from_cstrn(char *str, size_t n); diff --git a/src/jlapi.c b/src/jlapi.c index 906ab185ba770..8d329b7d2b517 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -51,7 +51,7 @@ DLLEXPORT void *jl_eval_string(const char *str) { jl_value_t *r; JL_TRY { - jl_value_t *ast = jl_parse_input_line(str); + jl_value_t *ast = jl_parse_input_line(str, strlen(str)); JL_GC_PUSH1(&ast); r = jl_toplevel_eval(ast); JL_GC_POP(); diff --git a/src/julia.h b/src/julia.h index ce7565dc151d8..fb8ef56691f69 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1161,13 +1161,15 @@ DLLEXPORT jl_module_t *jl_restore_new_module(const char *fname); void jl_init_restored_modules(); // front end interface -DLLEXPORT jl_value_t *jl_parse_input_line(const char *str); -DLLEXPORT jl_value_t *jl_parse_string(const char *str, int pos0, int greedy); +DLLEXPORT jl_value_t *jl_parse_input_line(const char *str, size_t len); +DLLEXPORT jl_value_t *jl_parse_string(const char *str, size_t len, + int pos0, int greedy); DLLEXPORT int jl_parse_depwarn(int warn); int jl_start_parsing_file(const char *fname); void jl_stop_parsing(void); jl_value_t *jl_parse_next(void); -DLLEXPORT jl_value_t *jl_load_file_string(const char *text, char *filename); +DLLEXPORT jl_value_t *jl_load_file_string(const char *text, size_t len, + char *filename, size_t namelen); DLLEXPORT jl_value_t *jl_expand(jl_value_t *expr); jl_lambda_info_t *jl_wrap_expr(jl_value_t *expr); DLLEXPORT void *jl_eval_string(const char *str); @@ -1202,7 +1204,7 @@ DLLEXPORT jl_value_t *jl_toplevel_eval(jl_value_t *v); DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex); jl_value_t *jl_eval_global_var(jl_module_t *m, jl_sym_t *e); DLLEXPORT jl_value_t *jl_load(const char *fname); -jl_value_t *jl_parse_eval_all(const char *fname); +jl_value_t *jl_parse_eval_all(const char *fname, size_t len); jl_value_t *jl_interpret_toplevel_thunk(jl_lambda_info_t *lam); jl_value_t *jl_interpret_toplevel_thunk_with(jl_lambda_info_t *lam, jl_value_t **loc, size_t nl); diff --git a/src/toplevel.c b/src/toplevel.c index 80eba32c44df2..e69ed34462018 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -529,7 +529,7 @@ jl_value_t *jl_toplevel_eval(jl_value_t *v) } // repeatedly call jl_parse_next and eval everything -jl_value_t *jl_parse_eval_all(const char *fname) +jl_value_t *jl_parse_eval_all(const char *fname, size_t len) { //jl_printf(JL_STDERR, "***** loading %s\n", fname); int last_lineno = jl_lineno; @@ -589,7 +589,7 @@ jl_value_t *jl_load(const char *fname) if (jl_start_parsing_file(fpath) != 0) { jl_errorf("could not open file %s", fpath); } - jl_value_t *result = jl_parse_eval_all(fpath); + jl_value_t *result = jl_parse_eval_all(fpath, strlen(fpath)); if (fpath != fname) free(fpath); return result; } diff --git a/test/core.jl b/test/core.jl index e19b0da58e4d4..d623c7e9f6e70 100644 --- a/test/core.jl +++ b/test/core.jl @@ -1645,8 +1645,8 @@ test5536(a::Union(Real, AbstractArray)...) = "Splatting" test5536(a::Union(Real, AbstractArray)) = "Non-splatting" @test test5536(5) == "Non-splatting" -# multiline comments (#6139 and others raised in #6128) -@test 3 == include_string("1 + 2") == include_string("1 + #==# 2") == include_string("1 + #===# 2") == include_string("1 + #= #= blah =# =# 2") == include_string("1 + #= #= #= nested =# =# =# 2") +# multiline comments (#6139 and others raised in #6128) and embedded NUL chars (#10994) +@test 3 == include_string("1 + 2") == include_string("1 + #==# 2") == include_string("1 + #===# 2") == include_string("1 + #= #= blah =# =# 2") == include_string("1 + #= #= #= nested =# =# =# 2") == include_string("1 + #= \0 =# 2") @test_throws LoadError include_string("#=") @test_throws LoadError include_string("#= #= #= =# =# =") diff --git a/test/file.jl b/test/file.jl index 358f3c81c3b40..cbd801747245d 100644 --- a/test/file.jl +++ b/test/file.jl @@ -708,6 +708,18 @@ close(f) @windows_only f = RawFD(ccall(:_open, Cint, (Ptr{Uint8}, Cint), file, Base.FS.JL_O_RDONLY)) test_LibcFILE(Libc.FILE(f,Libc.modestr(true,false))) +# issue #10994: pathnames cannot contain embedded NUL chars +for f in (mkdir, cd, Base.FS.unlink, readlink, rm, touch, readdir, mkpath, stat, lstat, ctime, mtime, filemode, filesize, uperm, gperm, operm, touch, isblockdev, ischardev, isdir, isexecutable, isfifo, isfile, islink, ispath, isreadable, issetgid, issetuid, issocket, issticky, iswritable, realpath, watch_file) + @test_throws ArgumentError f("adir\0bad") +end +@test_throws ArgumentError chmod("ba\0d", 0o222) +@test_throws ArgumentError open("ba\0d", "w") +for f in (cp, mv, symlink) + @test_throws ArgumentError f(file, "ba\0d") +end +@test_throws ArgumentError download("good", "ba\0d") +@test_throws ArgumentError download("ba\0d", "good") + ############ # Clean up # ############ diff --git a/test/mpfr.jl b/test/mpfr.jl index d070a83d76f70..c33f5b403dc0c 100644 --- a/test/mpfr.jl +++ b/test/mpfr.jl @@ -821,3 +821,6 @@ err(z, x) = abs(z - x) / abs(x) # check exponent range is set to max possible @test MPFR.get_emin() == MPFR.get_emin_min() @test MPFR.get_emax() == MPFR.get_emax_max() + +# issue #10994: handle embedded NUL chars for string parsing +@test_throws ArgumentError BigFloat("1\0") diff --git a/test/parser.jl b/test/parser.jl index 5bb94aba3c2ae..992d3998163c5 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -78,3 +78,6 @@ macro test999_str(args...); args; end QuoteNode(:quote), :f))), :i)) + +# issue #10994 +@test parse("1 + #= \0 =# 2") == :(1 + 2) diff --git a/test/regex.jl b/test/regex.jl index 4260dbd5fc41f..dd74d0a67a134 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -29,3 +29,6 @@ match(pat, target) buf = PipeBuffer() show(buf, r"") @test readall(buf) == "r\"\"" + +# issue #10994: PCRE does not allow NUL chars in the pattern +@test_throws ArgumentError Regex("a\0b") diff --git a/test/socket.jl b/test/socket.jl index ebbf17308f6e5..c477aaad7740d 100644 --- a/test/socket.jl +++ b/test/socket.jl @@ -95,6 +95,7 @@ for T in (ASCIIString, UTF8String, UTF16String) # test for issue #9435 end @test_throws Base.UVError getaddrinfo(".invalid") +@test_throws ArgumentError getaddrinfo("localhost\0") # issue #10994 @test_throws Base.UVError connect("localhost", 21452) # test invalid port diff --git a/test/spawn.jl b/test/spawn.jl index d99218f66a250..f91d4ab943390 100644 --- a/test/spawn.jl +++ b/test/spawn.jl @@ -195,3 +195,11 @@ close(f) @test "Hello World\n" == readall(fname) @test is(OLD_STDOUT,STDOUT) rm(fname) + +# issue #10994: libuv can't handle strings containing NUL +let bad = "bad\0name" + @test_throws ArgumentError run(`$bad`) + @test_throws ArgumentError run(`echo $bad`) + @test_throws ArgumentError run(setenv(`echo hello`, bad=>"good")) + @test_throws ArgumentError run(setenv(`echo hello`, "good"=>bad)) +end diff --git a/test/strings.jl b/test/strings.jl index e57dee82052e2..cfeccaed598f7 100644 --- a/test/strings.jl +++ b/test/strings.jl @@ -1428,3 +1428,20 @@ end @test isnull(tryparse(Float64, "64o")) @test get(tryparse(Float32, "32")) == 32.0f0 @test isnull(tryparse(Float32, "32o")) + +# issue #10994: handle embedded NUL chars for string parsing +for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128] + @test_throws ArgumentError parse(T, "1\0") +end +for T in [BigInt, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float64, Float32] + @test isnull(tryparse(T, "1\0")) +end +let s = normalize_string("tést",:NFKC) + @test bytestring(Base.unsafe_convert(Cstring, s)) == s + @test bytestring(convert(Cstring, symbol(s))) == s + @test wstring(Base.unsafe_convert(Cwstring, wstring(s))) == s +end +let s = "ba\0d" + @test_throws ArgumentError Base.unsafe_convert(Cstring, s) + @test_throws ArgumentError Base.unsafe_convert(Cwstring, wstring(s)) +end diff --git a/test/sysinfo.jl b/test/sysinfo.jl index 1e75af06286e7..d81276d966197 100644 --- a/test/sysinfo.jl +++ b/test/sysinfo.jl @@ -4,3 +4,8 @@ sprint(Base.Sys.cpu_summary) @test Base.Sys.uptime() > 0 Base.Sys.loadavg() + +# issue #10994 +@test_throws ArgumentError ENV["bad\0name"] = "ok" +@test_throws ArgumentError ENV["okname"] = "bad\0val" +@test_throws ArgumentError Sys.set_process_title("bad\0title")