diff --git a/NEWS.md b/NEWS.md index 48aebc9344980..ea50e8c3c9b93 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,6 +36,7 @@ New library features Standard library changes ------------------------ +* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]). #### Package Manager diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index 60e6aa0e70a64..38ffacd8aa572 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -280,9 +280,8 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO """ islowercase(c::AbstractChar) -> Bool -Tests whether a character is a lowercase letter. -A character is classified as lowercase if it belongs to Unicode category Ll, -Letter: Lowercase. +Tests whether a character is a lowercase letter (according to the Unicode +standard's `Lowercase` derived property). See also: [`isuppercase`](@ref). @@ -298,16 +297,15 @@ julia> islowercase('❤') false ``` """ -islowercase(c::AbstractChar) = category_code(c) == UTF8PROC_CATEGORY_LL +islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c))) # true for Unicode upper and mixed case """ isuppercase(c::AbstractChar) -> Bool -Tests whether a character is an uppercase letter. -A character is classified as uppercase if it belongs to Unicode category Lu, -Letter: Uppercase, or Lt, Letter: Titlecase. +Tests whether a character is an uppercase letter (according to the Unicode +standard's `Uppercase` derived property). See also: [`islowercase`](@ref). @@ -323,10 +321,7 @@ julia> isuppercase('❤') false ``` """ -function isuppercase(c::AbstractChar) - cat = category_code(c) - cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT -end +isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c))) """ iscased(c::AbstractChar) -> Bool diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl index e5d667a976079..6888fa2d9ba40 100644 --- a/stdlib/Unicode/test/runtests.jl +++ b/stdlib/Unicode/test/runtests.jl @@ -93,7 +93,7 @@ end @testset "#5939 uft8proc character predicates" begin alower=['a', 'd', 'j', 'y', 'z'] ulower=['α', 'β', 'γ', 'δ', 'ф', 'я'] - for c in vcat(alower,ulower) + for c in vcat(alower,ulower,['ª']) @test islowercase(c) == true @test isuppercase(c) == false @test isdigit(c) == false @@ -101,17 +101,20 @@ end end aupper=['A', 'D', 'J', 'Y', 'Z'] - uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д'] + uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Ж', 'Д'] - for c in vcat(aupper,uupper) + for c in vcat(aupper,uupper,['Ⓐ']) @test islowercase(c) == false @test isuppercase(c) == true @test isdigit(c) == false @test isnumeric(c) == false end + @test !isuppercase('Dž') # titlecase is not uppercase + @test Base.Unicode.iscased('Dž') # but is "cased" + nocase=['א','ﺵ'] - alphas=vcat(alower,ulower,aupper,uupper,nocase) + alphas=vcat(alower,ulower,aupper,uupper,nocase,['Dž']) for c in alphas @test isletter(c) == true