diff --git a/base/c.jl b/base/c.jl index ee7eb1ab15618..1215c2cee7473 100644 --- a/base/c.jl +++ b/base/c.jl @@ -81,7 +81,7 @@ unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s)) # convert strings to String etc. to pass as pointers cconvert(::Type{Cstring}, s::AbstractString) = String(s) -cconvert(::Type{Cwstring}, s::AbstractString) = wstring(s) +cconvert(::Type{Cwstring}, s::AbstractString) = transcode(Cwchar_t, String(s).data) containsnul(p::Ptr, len) = C_NULL != ccall(:memchr, Ptr{Cchar}, (Ptr{Cchar}, Cint, Csize_t), p, 0, len) @@ -96,11 +96,16 @@ function unsafe_convert(::Type{Cstring}, s::String) return Cstring(p) end +function unsafe_convert(::Type{Cwstring}, s::Vector{Cwchar_t}) + if 0 in s + throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))")) + end + return Cwstring(s) +end + # symbols are guaranteed not to contain embedded NUL convert(::Type{Cstring}, s::Symbol) = Cstring(unsafe_convert(Ptr{Cchar}, s)) -# in string.jl: unsafe_convert(::Type{Cwstring}, s::WString) - # FIXME: this should be handled by implicit conversion to Cwstring, but good luck with that if is_windows() function cwstring(s::AbstractString) diff --git a/base/deprecated.jl b/base/deprecated.jl index 03026cbc47a16..d8eb64cfc274a 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -488,6 +488,16 @@ end end ) +if sizeof(Cwchar_t) == 2 + @deprecate_binding WString UTF16String + @deprecate_binding wstring utf16 + utf16(s::Cwstring) = utf16(convert(Ptr{Cwchar_t}, s)) +elseif sizeof(Cwchar_t) == 4 + @deprecate_binding WString UTF32String + @deprecate_binding wstring utf32 + utf32(s::Cwstring) = utf32(convert(Ptr{Cwchar_t}, s)) +end + @deprecate ==(x::Char, y::Integer) UInt32(x) == y @deprecate ==(x::Integer, y::Char) x == UInt32(y) @deprecate isless(x::Char, y::Integer) UInt32(x) < y diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl index 4e3688afacdf7..49e430499d65f 100644 --- a/base/docs/helpdb/Base.jl +++ b/base/docs/helpdb/Base.jl @@ -2775,15 +2775,6 @@ Equivalent to `writedlm` with `delim` set to comma. """ writecsv -""" - wstring(s) - -This is a synonym for either `utf32(s)` or `utf16(s)`, depending on whether `Cwchar_t` is 32 -or 16 bits, respectively. The synonym `WString` for `UTF32String` or `UTF16String` is also -provided. -""" -wstring - """ withenv(f::Function, kv::Pair...) diff --git a/base/exports.jl b/base/exports.jl index 61113e9d4e343..0a23ed46a5a98 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -127,7 +127,6 @@ export VersionNumber, WeakKeyDict, WorkerConfig, - WString, Zip, # Ccall types @@ -882,7 +881,6 @@ export utf16, utf32, warn, - wstring, # random numbers AbstractRNG, diff --git a/base/unicode/utf16.jl b/base/unicode/utf16.jl index e1b2093e26d3a..89f63e71e3ed7 100644 --- a/base/unicode/utf16.jl +++ b/base/unicode/utf16.jl @@ -270,3 +270,5 @@ function map(fun, str::UTF16String) push!(buf, 0) UTF16String(buf) end + +cconvert(::Type{Cwstring}, v::Vector{UInt16}) = transcode(Cwchar_t, v) diff --git a/base/unicode/utf32.jl b/base/unicode/utf32.jl index 929bdcc22fe7d..a3e336399afe0 100644 --- a/base/unicode/utf32.jl +++ b/base/unicode/utf32.jl @@ -153,6 +153,8 @@ function convert(T::Type{UTF32String}, bytes::AbstractArray{UInt8}) UTF32String(d) end +cconvert(::Type{Cwstring}, v::Vector{UInt32}) = transcode(Cwchar_t, v) + function isvalid(::Type{UTF32String}, str::Union{Vector{UInt32}, Vector{Char}}) for c in str @inbounds if !isvalid(Char, UInt32(c)) ; return false ; end @@ -186,24 +188,6 @@ function map(f, s::UTF32String) UTF32String(out) end -if sizeof(Cwchar_t) == 2 - const WString = UTF16String - const wstring = utf16 -elseif sizeof(Cwchar_t) == 4 - const WString = UTF32String - const wstring = utf32 -end -wstring(s::Cwstring) = wstring(convert(Ptr{Cwchar_t}, s)) - -# Cwstring is defined in c.jl, but conversion needs to be defined here -# to have WString -function unsafe_convert(::Type{Cwstring}, s::WString) - if containsnul(s) - throw(ArgumentError("embedded NUL chars are not allowed in C strings: $(repr(s))")) - end - return Cwstring(unsafe_convert(Ptr{Cwchar_t}, s)) -end - pointer(x::Union{UTF16String,UTF32String}) = pointer(x.data) pointer(x::Union{UTF16String,UTF32String}, i::Integer) = pointer(x)+(i-1)*sizeof(eltype(x.data)) pointer{T<:Union{UTF16String,UTF32String}}(x::SubString{T}) = pointer(x.string.data) + x.offset*sizeof(eltype(x.string.data)) diff --git a/doc/manual/calling-c-and-fortran-code.rst b/doc/manual/calling-c-and-fortran-code.rst index b16d4efc83447..84e3b35ae4010 100644 --- a/doc/manual/calling-c-and-fortran-code.rst +++ b/doc/manual/calling-c-and-fortran-code.rst @@ -486,15 +486,11 @@ C name Standard Julia Alias Julia Base Type .. note:: For ``wchar_t*`` arguments, the Julia type should be ``Cwstring`` (if the C - routine expects a NUL-terminated string) or ``Ptr{Cwchar_t}`` otherwise, and - data can be converted to/from ordinary Julia strings by the ``wstring(s)`` - function (equivalent to either ``utf16(s)`` or ``utf32(s)`` depending upon the - width of ``Cwchar_t``); this conversion will be called automatically for - ``Cwstring`` arguments. Note also that ASCII, UTF-8, UTF-16, and UTF-32 - string data in Julia is internally NUL-terminated, so it can be passed to C - functions expecting NUL-terminated data without making a copy (but using the - ``Cwstring`` type will cause an error to be thrown if the string itself - contains NUL characters). + routine expects a NUL-terminated string) or ``Ptr{Cwchar_t}`` otherwise. Note + also that ASCII, UTF-8, UTF-16, and UTF-32 string data in Julia is internally + NUL-terminated, so it can be passed to C functions expecting NUL-terminated + data without making a copy (but using the ``Cwstring`` type will cause an + error to be thrown if the string itself contains NUL characters). .. note:: diff --git a/doc/manual/strings.rst b/doc/manual/strings.rst index 8bafefa5f3ba8..7c5c34a2e33e4 100644 --- a/doc/manual/strings.rst +++ b/doc/manual/strings.rst @@ -353,9 +353,7 @@ UTF-8 is not the only encoding that Julia supports, and adding support for new encodings is quite easy. In particular, Julia also provides :obj:`UTF16String` and :obj:`UTF32String` types, constructed by :func:`utf16` and :func:`utf32` respectively, for UTF-16 and -UTF-32 encodings. It also provides aliases :obj:`WString` and -:func:`wstring` for either UTF-16 or UTF-32 strings, depending on the -size of ``Cwchar_t``. Additional discussion of other encodings and how to +UTF-32 encodings. Additional discussion of other encodings and how to implement support for them is beyond the scope of this document for the time being. For further discussion of UTF-8 encoding issues, see the section below on `byte array literals <#Byte+Array+Literals>`_, diff --git a/doc/stdlib/strings.rst b/doc/stdlib/strings.rst index 80dcc8a5aafdc..b92a95c3d3040 100644 --- a/doc/stdlib/strings.rst +++ b/doc/stdlib/strings.rst @@ -500,10 +500,3 @@ .. Docstring generated from Julia source Create a string from the address of a NUL-terminated UTF-32 string. A copy is made; the pointer can be safely freed. If ``length`` is specified, the string does not have to be NUL-terminated. - -.. function:: wstring(s) - - .. Docstring generated from Julia source - - This is a synonym for either ``utf32(s)`` or ``utf16(s)``\ , depending on whether ``Cwchar_t`` is 32 or 16 bits, respectively. The synonym ``WString`` for ``UTF32String`` or ``UTF16String`` is also provided. - diff --git a/test/strings/basic.jl b/test/strings/basic.jl index b88c39f67dfa9..de45bf8405740 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -237,22 +237,14 @@ end let s = normalize_string("tést",:NFKC) @test unsafe_string(Base.unsafe_convert(Cstring, s)) == s @test unsafe_string(convert(Cstring, Symbol(s))) == s - @test wstring(Base.unsafe_convert(Cwstring, wstring(s))) == s -end -let s = "ba\0d" - @test_throws ArgumentError Base.unsafe_convert(Cstring, s) - @test_throws ArgumentError Base.unsafe_convert(Cwstring, wstring(s)) end +@test_throws ArgumentError Base.unsafe_convert(Cstring, "ba\0d") cstrdup(s) = @static is_windows() ? ccall(:_strdup, Cstring, (Cstring,), s) : ccall(:strdup, Cstring, (Cstring,), s) let p = cstrdup("hello") @test unsafe_string(p) == "hello" == unsafe_wrap(String, cstrdup(p), true) Libc.free(p) end -let p = @static is_windows() ? ccall(:_wcsdup, Cwstring, (Cwstring,), "tést") : ccall(:wcsdup, Cwstring, (Cwstring,), "tést") - @test wstring(p) == "tést" - Libc.free(p) -end # issue # 11389: Vector{UInt32} was copied with UTF32String, unlike Vector{Char} a = UInt32[48,0] diff --git a/test/unicode/utf32.jl b/test/unicode/utf32.jl index 875c22330749d..3beb874e50755 100644 --- a/test/unicode/utf32.jl +++ b/test/unicode/utf32.jl @@ -169,12 +169,6 @@ for T in (String, UTF16String, UTF32String) end end -# Wstring -u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a" -w = wstring(u8) -@test length(w) == 5 && String(w) == u8 && collect(u8) == collect(w) -@test u8 == WString(w.data) - # 12268 for (fun, S, T) in ((utf16, UInt16, UTF16String), (utf32, UInt32, UTF32String)) # AbstractString