diff --git a/NEWS.md b/NEWS.md index 56ac88a97e2fa..042e57f3594ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -31,6 +31,10 @@ New language features `@generated` and normal implementations of part of a function. Surrounding code will be common to both versions ([#23168]). + * The `missing` singleton object (of type `Missing`) has been added to represent + missing values ([#24653]). It propagates through standard operators and mathematical functions, + and implements three-valued logic, similar to SQLs `NULL` and R's `NA`. + Language changes ---------------- @@ -1700,3 +1704,4 @@ Command-line option changes [#24320]: https://github.com/JuliaLang/julia/issues/24320 [#24396]: https://github.com/JuliaLang/julia/issues/24396 [#24413]: https://github.com/JuliaLang/julia/issues/24413 +[#24653]: https://github.com/JuliaLang/julia/issues/24653 \ No newline at end of file diff --git a/base/abstractarray.jl b/base/abstractarray.jl index b5c3ab79c6170..37baf143cde7f 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1572,12 +1572,16 @@ function (==)(A::AbstractArray, B::AbstractArray) if isa(A,AbstractRange) != isa(B,AbstractRange) return false end + anymissing = false for (a, b) in zip(A, B) - if !(a == b) + eq = (a == b) + if ismissing(eq) + anymissing = true + elseif !eq return false end end - return true + return anymissing ? missing : true end # sub2ind and ind2sub diff --git a/base/essentials.jl b/base/essentials.jl index 07faf9f1ed9ad..f7b7cba581b23 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -731,3 +731,26 @@ This function simply returns its argument by default, since the elements of a general iterator are normally considered its "values". """ values(itr) = itr + +""" + Missing + +A type with no fields whose singleton instance [`missing`](@ref) is used +to represent missing values. +""" +struct Missing end + +""" + missing + +The singleton instance of type [`Missing`](@ref) representing a missing value. +""" +const missing = Missing() + +""" + ismissing(x) + +Indicate whether `x` is [`missing`](@ref). +""" +ismissing(::Any) = false +ismissing(::Missing) = true \ No newline at end of file diff --git a/base/exports.jl b/base/exports.jl index 9756109b9263f..f99dde7d85e12 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -76,6 +76,7 @@ export Irrational, Matrix, MergeSort, + Missing, NTuple, Nullable, ObjectIdDict, @@ -149,6 +150,7 @@ export EOFError, InvalidStateException, KeyError, + MissingException, NullException, ParseError, SystemError, @@ -881,6 +883,10 @@ export isready, fetch, +# missing values + ismissing, + missing, + # time sleep, time, diff --git a/base/missing.jl b/base/missing.jl new file mode 100644 index 0000000000000..a5c62819b5b00 --- /dev/null +++ b/base/missing.jl @@ -0,0 +1,118 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# Missing, missing and ismissing are defined in essentials.jl + +show(io::IO, x::Missing) = print(io, "missing") + +""" + MissingException(msg) + +Exception thrown when a [`missing`](@ref) value is encountered in a situation +where it is not supported. The error message, in the `msg` field +may provide more specific details. +""" +struct MissingException <: Exception + msg::AbstractString +end + +showerror(io::IO, ex::MissingException) = + print(io, "MissingException: ", ex.msg) + +promote_rule(::Type{Missing}, ::Type{T}) where {T} = Union{T, Missing} +promote_rule(::Type{Union{S,Missing}}, ::Type{T}) where {T,S} = Union{promote_type(T, S), Missing} +promote_rule(::Type{Any}, ::Type{T}) where {T} = Any +promote_rule(::Type{Any}, ::Type{Missing}) = Any +promote_rule(::Type{Missing}, ::Type{Any}) = Any +promote_rule(::Type{Missing}, ::Type{Missing}) = Missing + +convert(::Type{Union{T, Missing}}, x) where {T} = convert(T, x) +# To print more appropriate message than "T not defined" +convert(::Type{Missing}, x) = throw(MethodError(convert, (Missing, x))) +convert(::Type{Missing}, ::Missing) = missing + +# Comparison operators +==(::Missing, ::Missing) = missing +==(::Missing, ::Any) = missing +==(::Any, ::Missing) = missing +# To fix ambiguity +==(::Missing, ::WeakRef) = missing +==(::WeakRef, ::Missing) = missing +isequal(::Missing, ::Missing) = true +isequal(::Missing, ::Any) = false +isequal(::Any, ::Missing) = false +<(::Missing, ::Missing) = missing +<(::Missing, ::Any) = missing +<(::Any, ::Missing) = missing +isless(::Missing, ::Missing) = false +isless(::Missing, ::Any) = false +isless(::Any, ::Missing) = true + +# Unary operators/functions +for f in (:(!), :(+), :(-), :(identity), :(zero), :(one), :(oneunit), + :(abs), :(abs2), :(sign), + :(acos), :(acosh), :(asin), :(asinh), :(atan), :(atanh), + :(sin), :(sinh), :(cos), :(cosh), :(tan), :(tanh), + :(exp), :(exp2), :(expm1), :(log), :(log10), :(log1p), + :(log2), :(exponent), :(sqrt), :(gamma), :(lgamma), + :(iseven), :(ispow2), :(isfinite), :(isinf), :(isodd), + :(isinteger), :(isreal), :(isnan), :(isempty), + :(iszero), :(transpose), :(float)) + @eval Math.$(f)(::Missing) = missing +end + +for f in (:(Base.zero), :(Base.one), :(Base.oneunit)) + @eval function $(f)(::Type{Union{T, Missing}}) where T + T === Any && throw(MethodError($f, (Any,))) # To prevent StackOverflowError + $f(T) + end +end + +# Binary operators/functions +for f in (:(+), :(-), :(*), :(/), :(^), + :(div), :(mod), :(fld), :(rem), :(min), :(max)) + @eval begin + # Scalar with missing + ($f)(::Missing, ::Missing) = missing + ($f)(d::Missing, x::Number) = missing + ($f)(d::Number, x::Missing) = missing + end +end + +# Rounding and related functions +for f in (:(ceil), :(floor), :(round), :(trunc)) + @eval begin + ($f)(::Missing, digits::Integer=0, base::Integer=0) = missing + ($f)(::Type{>:Missing}, ::Missing) = missing + ($f)(::Type{T}, ::Missing) where {T} = + throw(MissingException("cannot convert a missing value to type $T")) + end +end + +# to avoid ambiguity warnings +(^)(::Missing, ::Integer) = missing + +# Bit operators +(&)(::Missing, ::Missing) = missing +(&)(a::Missing, b::Bool) = ifelse(b, missing, false) +(&)(b::Bool, a::Missing) = ifelse(b, missing, false) +(&)(::Missing, ::Integer) = missing +(&)(::Integer, ::Missing) = missing +(|)(::Missing, ::Missing) = missing +(|)(a::Missing, b::Bool) = ifelse(b, true, missing) +(|)(b::Bool, a::Missing) = ifelse(b, true, missing) +(|)(::Missing, ::Integer) = missing +(|)(::Integer, ::Missing) = missing +xor(::Missing, ::Missing) = missing +xor(a::Missing, b::Bool) = missing +xor(b::Bool, a::Missing) = missing +xor(::Missing, ::Integer) = missing +xor(::Integer, ::Missing) = missing + +*(d::Missing, x::AbstractString) = missing +*(d::AbstractString, x::Missing) = missing + +function float(A::AbstractArray{Union{T, Missing}}) where {T} + U = typeof(float(zero(T))) + convert(AbstractArray{Union{U, Missing}}, A) +end +float(A::AbstractArray{Missing}) = A diff --git a/base/reduce.jl b/base/reduce.jl index 0cc67be13e55a..a186539784793 100644 --- a/base/reduce.jl +++ b/base/reduce.jl @@ -597,6 +597,9 @@ Determine whether predicate `p` returns `true` for any elements of `itr`, return `true` as soon as the first item in `itr` for which `p` returns `true` is encountered (short-circuiting). +If the input contains [`missing`](@ref) values, return `missing` if all non-missing +values are `false` (or equivalently, if the input contains no `true` value). + ```jldoctest julia> any(i->(4<=i<=6), [3,5,7]) true @@ -610,10 +613,16 @@ true ``` """ function any(f, itr) + anymissing = false for x in itr - f(x) && return true + v = f(x) + if ismissing(v) + anymissing = true + elseif v + return true + end end - return false + return anymissing ? missing : false end """ @@ -623,6 +632,9 @@ Determine whether predicate `p` returns `true` for all elements of `itr`, return `false` as soon as the first item in `itr` for which `p` returns `false` is encountered (short-circuiting). +If the input contains [`missing`](@ref) values, return `missing` if all non-missing +values are `true` (or equivalently, if the input contains no `false` value). + ```jldoctest julia> all(i->(4<=i<=6), [4,5,6]) true @@ -635,12 +647,22 @@ false ``` """ function all(f, itr) + anymissing = false for x in itr - f(x) || return false + v = f(x) + if ismissing(v) + anymissing = true + # this syntax allows throwing a TypeError for non-Bool, for consistency with any + elseif v + continue + else + return false + end end - return true + return anymissing ? missing : true end + ## in & contains """ diff --git a/base/reducedim.jl b/base/reducedim.jl index abe62f5269287..3b1bf5e5a0a4d 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -112,7 +112,7 @@ function reducedim_init(f, op::typeof(*), A::AbstractArray, region) end function _reducedim_init(f, op, fv, fop, A, region) T = promote_union(eltype(A)) - if applicable(zero, T) + if T !== Any && applicable(zero, T) x = f(zero(T)) z = op(fv(x), fv(x)) Tr = typeof(z) == typeof(x) && !isbits(T) ? T : typeof(z) diff --git a/base/sysimg.jl b/base/sysimg.jl index 6a17da3274460..96b083c1af3fc 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -399,6 +399,9 @@ const × = cross # statistics include("statistics.jl") +# missing values +include("missing.jl") + # libgit2 support include("libgit2/libgit2.jl") diff --git a/doc/make.jl b/doc/make.jl index 894603566394a..ed52e23f91538 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -67,6 +67,7 @@ const PAGES = [ "manual/metaprogramming.md", "manual/arrays.md", "manual/linear-algebra.md", + "manual/missing.md", "manual/networking-and-streams.md", "manual/parallel-computing.md", "manual/dates.md", diff --git a/doc/src/index.md b/doc/src/index.md index abb6f6ec9eb49..8faee6bf9f939 100644 --- a/doc/src/index.md +++ b/doc/src/index.md @@ -22,6 +22,7 @@ * [Metaprogramming](@ref) * [Multi-dimensional Arrays](@ref man-multi-dim-arrays) * [Linear Algebra](@ref) + * [Missing Values](@ref missing) * [Networking and Streams](@ref) * [Parallel Computing](@ref) * [Date and DateTime](@ref) diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index eea38271ead51..ebcae376c8446 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -617,7 +617,8 @@ all/many future usages of the other functions in module Foo that depend on calli Unlike many languages (for example, C and Java), Julia does not have a "null" value. When a reference (variable, object field, or array element) is uninitialized, accessing it will immediately throw -an error. This situation can be detected using the `isdefined` function. +an error. This situation can be detected using the [`isdefined`](@ref) or [`isassigned`](@ref) +functions. Some functions are used only for their side effects, and do not need to return a value. In these cases, the convention is to return the value `nothing`, which is just a singleton object of type @@ -625,8 +626,8 @@ cases, the convention is to return the value `nothing`, which is just a singleto this convention, and that the REPL does not print anything for it. Some language constructs that would not otherwise have a value also yield `nothing`, for example `if false; end`. -For situations where a value exists only sometimes (for example, missing statistical data), it -is best to use the `Nullable{T}` type, which allows specifying the type of a missing value. +To represent missing data in the statistical sense (`NA` in R or `NULL` in SQL), use the +[`missing`](@ref) object. See the [`Missing Values|](@ref missing) section for more details. The empty tuple (`()`) is another form of nothingness. But, it should not really be thought of as nothing but rather a tuple of zero values. diff --git a/doc/src/manual/index.md b/doc/src/manual/index.md index 37a8b0b67862e..0eb4406168086 100644 --- a/doc/src/manual/index.md +++ b/doc/src/manual/index.md @@ -20,6 +20,7 @@ * [Metaprogramming](@ref) * [Multi-dimensional Arrays](@ref man-multi-dim-arrays) * [Linear algebra](@ref) + * [Missing Values](@ref missing) * [Networking and Streams](@ref) * [Parallel Computing](@ref) * [Date and DateTime](@ref) diff --git a/doc/src/manual/missing.md b/doc/src/manual/missing.md new file mode 100644 index 0000000000000..191af95c05aea --- /dev/null +++ b/doc/src/manual/missing.md @@ -0,0 +1,347 @@ +# [Missing Values](@id missing) + +Julia provides support for representing missing values in the statistical sense, +that is for situations where no value is available for a variable in an observation, +but a valid value theoretically exists. +Missing values are represented via the [`missing`](@ref) object, which is the +singleton instance of the type [`Missing`](@ref). `missing` is equivalent to +[`NULL` in SQL](https://en.wikipedia.org/wiki/NULL_(SQL)) and +[`NA` in R](https://cran.r-project.org/doc/manuals/r-release/R-lang.html#NA-handling), +and behaves like them in most situations. + +## Propagation of Missing Values + +The behavior of `missing` values follows one basic rule: `missing` +values *propagate* automatically when passed to standard operators and functions, +in particular mathematical functions. Uncertainty about the value of one of the operands +induces uncertainty about the result. In practice, this means an operation involving +a `missing` value generally returns `missing`: +```doctest +julia> missing + 1 +missing + +julia> "a" * missing +missing + +julia> abs(missing) +missing + +``` + +As `missing` is a normal Julia object, this propagation rule only works +for functions which have opted in to implement this behavior. This can be +achieved either via a specific method defined for arguments of type `Missing`, +or simply by accepting arguments of this type, and passing them to functions +which propagate them (like standard operators). Packages should consider +whether it makes sense to propagate missing values when defining new functions, +and define methods appropriately if that is the case. Passing a `missing` value +to a function for which no method accepting arguments of type `Missing` is defined +throws a `MethodError`, just like for any other type. + +## Equality and Comparison Operators + +Standard equality and comparison operators follow the propagation rule presented +above: if any of the operands is `missing`, the result is `missing`. +Here are a few examples: +```doctest +julia> missing == 1 +missing + +julia> missing == missing +missing + +julia> missing < 1 +missing + +julia> 2 >= missing +missing + +``` + +In particular, note that `missing == missing` returns `missing`, so `==` cannot +be used to test whether a value is missing. To test whether `x` is `missing`, +use [`ismissing(x)`](@ref). + +Special comparison operators [`isequal`](@ref) and [`===`](@ref) are exceptions +to the propagation rule: they always return a `Bool` value, even in the presence +of `missing` values, considering `missing` as equal to `missing` and as different +from any other value. They can therefore be used to test whether a value is `missing`: +```doctest +julia> missing === 1 +false + +julia> isequal(missing, 1) +false + +julia> missing === missing +true + +julia> isequal(missing, missing) +true + +``` + +The [`isless`](@ref) operator is another exception: `missing` is considered +as greater than any other value. This operator is used by [`sort`](@ref), +which therefore places `missing` values after all other values. + +```doctest +julia> isless(1, missing) +true + +julia> isless(missing, Inf) +false + +julia> isless(missing, missing) +false + +``` + +## Logical operators + +Logical (or boolean) operators [`|`](@ref), [`&`](@ref) and [`xor`](@ref) are +another special case, as they only propagate `missing` values when it is logically +required. For these operators, whether or not the result is uncertain depends +on the particular operation, following the well-established rules of +[*three-valued logic*](https://en.wikipedia.org/wiki/Three-valued_logic) which are +also implemented by `NULL` in SQL and `NA` in R. This abstract definition actually +corresponds to a relatively natural behavior which is best explained +via concrete examples. + +Let us illustrate this principle with the logical "or" operator [`|`](@ref). +Following the rules of boolean logic, if one of the operands is `true`, +the value of the other operand does not have an influence on the result, +which will always be `true`: +```doctest +julia> true | true +true + +julia> true | false +true + +julia> false | true +true + +``` + +Based on this observation, we can conclude that if one of the operands is `true` +and the other `missing`, we know that the result is `true` in spite of the +uncertainty about the actual value of one of the operands. If we had +been able to observe the actual value of the second operand, it could only be +`true` or `false`, and in both cases the result would be `true`. Therefore, +in this particular case, missingness does *not* propagate: +```doctest +julia> true | missing +true + +julia> missing | true +true + +``` + +On the contrary, if one of the operands is `false`, the result could be either +`true` or `false` depending on the value of the other operand. Therefore, +if that operand is `missing`, the result has to be `missing` too: +```doctest +julia> false | true +true + +julia> true | false +true + +julia> false | false +false + +julia> false | missing +missing + +julia> missing | false +missing + +``` + +The behavior of the logical "and" operator [`&`](@ref) is similar to that of the +`|` operator, with the difference that missingness does not propagate when +one of the operands is `false`. For example, when that is the case of the first +operand: +```doctest +julia> false & false +false + +julia> false & true +false + +julia> false & missing +false + +``` + +On the other hand, missingness propagates when one of the operands is `true`, +for example the first one: +```doctest +julia> true & true +true + +julia> true & false +false + +julia> true & missing +missing + +``` + +Finally, the "exclusive or" logical operator [`xor`](@ref) always propagates +`missing` values, since both operands always have an effect on the result. +Also note that the negation operator [`!`](@ref) returns `missing` when the +operand is `missing` just like other unary operators. + +## Control Flow and Short-Circuiting Operators + +Control flow operators including [`if`](@ref), [`while`](@ref) and the +[ternary operator](@ref man-conditional-evaluation) `x ? y : z` +do not allow for missing values. This is because of the uncertainty about whether +the actual value would be `true` or `false` if we could observe it, +which implies that we do not know how the program should behave. A `TypeError` +is thrown as soon as a `missing` value is encountered in this context: +```doctest +julia> if missing + println("here") + end +ERROR: TypeError: non-boolean (Missing) used in boolean context + +``` + +For the same reason, contrary to logical operators presented above, +the short-circuiting boolean operators [`&&`](@ref) and [`||`](@ref) do not +allow for `missing` values in situations where the value of the operand +determines whether the next operand is evaluated or not. For example: +```doctest +julia> missing || false +ERROR: TypeError: non-boolean (Missing) used in boolean context + +julia> missing && false +ERROR: TypeError: non-boolean (Missing) used in boolean context + +julia> true && missing && false +ERROR: TypeError: non-boolean (Missing) used in boolean context + +``` + +On the other hand, no error is thrown when the result can be determined without +the `missing` values. This is the case when the code short-circuits +before evaluating the `missing` operand, and when the `missing` operand is the +last one: +```doctest +julia> true && missing +missing + +julia> false && missing +false + +``` + +## Arrays With Missing Values + +Arrays containing missing values can be created like other arrays: +```doctest +julia> [1, missing] +2-element Array{Union{Missing, Int64},1}: + 1 + missing + +``` + +As this example shows, the element type of such arrays is `Union{Missing, T}`, +with `T` the type of the non-missing values. This simply reflects the fact that +array entries can be either of type `T` (here, `Int64`) or of type `Missing`. +This kind of array uses an efficient memory storage equivalent to an `Array{T}` +holding the actual values combined with an `Array{UInt8}` indicating the type +of the entry (i.e. whether it is `Missing` or `T`). + +Uninitialized arrays allowing for missing values can be constructed with the +standard syntax. By default, arrays with an [`isbits`](@ref) element type are +filled with `missing` values: +```doctest +julia> Array{Union{Missing, Int}}(uninitialized, 2, 3) +2×3 Array{Union{Missing, Int64},2}: + missing missing missing + missing missing missing + +``` + +An array allowing for `missing` values but which does not contain any such value +can be converted back to an array which does not allow for missing values using +[`convert`](@ref). If the array contains `missing` values, a `MethodError` is thrown +during conversion: +```doctest +julia> x = Union{Missing, String}["a", "b"] +2-element Array{Union{Missing, String},1}: + "a" + "b" + +julia> convert(Array{String}, x) +2-element Array{String,1}: + "a" + "b" + +julia> y = Union{Missing, String}[missing, "b"] +2-element Array{Union{Missing, String},1}: + missing + "b" + +julia> convert(Array{String}, y) +ERROR: MethodError: Cannot `convert` an object of type Missing to an object of type String +This may have arisen from a call to the constructor String(...), +since type constructors fall back to convert methods. +Stacktrace: +[...] +``` + +## Logical Operations on Arrays + +The three-valued logic described above for logical operators is also used +by logical functions applied to arrays. Thus, array equality tests using +the [`==`](@ref) operator return `missing` whenever the result cannot be +determined without knowing the actual value of the `missing` entry. In practice, +this means that `missing` is returned if all non-missing values of the compared +arrays are equal, but one or both arrays contain missing values (possibly at +different positions): +```doctest +julia> [1, missing] == [2, missing] +false + +julia> [1, missing] == [1, missing] +missing + +julia> [1, 2, missing] == [1, missing, 2] +missing + +``` + +As for single values, use [`isequal`](@ref) to treat `missing` values as equal +to other `missing` values but different from non-missing values: +```doctest +julia> isequal([1, missing], [1, missing]) +true + +julia> isequal([1, 2, missing], [1, missing, 2]) +false + +``` + +Functions [`any`](@ref) and [`all`](@ref) also follow the rules of +three-valued logic, returning `missing` when the result cannot be determined: +```doctest +julia> all([true, missing]) +missing + +julia> all([false, missing]) +false + +julia> any([true, missing]) +true + +julia> any([false, missing]) +missing + +``` diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md index 26954d159b59c..1809532fc9338 100644 --- a/doc/src/manual/noteworthy-differences.md +++ b/doc/src/manual/noteworthy-differences.md @@ -180,7 +180,10 @@ For users coming to Julia from R, these are some noteworthy differences: code is often achieved by using devectorized loops. * Julia is eagerly evaluated and does not support R-style lazy evaluation. For most users, this means that there are very few unquoted expressions or column names. - * Julia does not support the `NULL` type. + * Julia does not support the `NULL` type. The closest equivalent is [`nothing`](@ref), but it + behaves like a scalar value rather than like a list. Use `x == nothing` instead of `is.null(x)`. + * In Julia, missing values are represented by the [`missing`](@ref) object rather than by `NA`. + Use [`ismissing(x)`](@ref) instead of `isna(x)`. * Julia lacks the equivalent of R's `assign` or `get`. * In Julia, `return` does not require parentheses. * In R, an idiomatic way to remove unwanted values is to use logical indexing, like in the expression diff --git a/doc/src/stdlib/base.md b/doc/src/stdlib/base.md index d29a4e552d528..a7ec3f12dff97 100644 --- a/doc/src/stdlib/base.md +++ b/doc/src/stdlib/base.md @@ -221,6 +221,13 @@ Base.isnull Base.unsafe_get ``` +## Missing Values +```@docs +Base.Missing +Base.missing +Base.ismissing +``` + ## System ```@docs @@ -285,6 +292,7 @@ Core.InterruptException Base.KeyError Base.LoadError Base.MethodError +Base.MissingException Base.NullException Core.OutOfMemoryError Core.ReadOnlyMemoryError diff --git a/test/ambiguous.jl b/test/ambiguous.jl index 3e22e49a58053..4b428a65b8415 100644 --- a/test/ambiguous.jl +++ b/test/ambiguous.jl @@ -283,6 +283,12 @@ end pop!(need_to_handle_undef_sparam, which(Base.SparseArrays._absspvec_vcat, (AbstractSparseArray{Tv, Ti, 1} where {Tv, Ti},))) pop!(need_to_handle_undef_sparam, which(Base.SparseArrays._absspvec_hcat, (AbstractSparseArray{Tv, Ti, 1} where {Tv, Ti},))) pop!(need_to_handle_undef_sparam, which(Base.cat, (Any, Base.SparseArrays._TypedDenseConcatGroup{T} where T))) + pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{AbstractArray{Union{Missing, T},N} where {T, N}})) + pop!(need_to_handle_undef_sparam, which(Base.convert, Tuple{Type{Union{Missing, T}} where T, Any})) + pop!(need_to_handle_undef_sparam, which(Base.promote_rule, Tuple{Type{Union{Missing, S}} where S, Type{T} where T})) + pop!(need_to_handle_undef_sparam, which(Base.zero, Tuple{Type{Union{Missing, T}} where T})) + pop!(need_to_handle_undef_sparam, which(Base.one, Tuple{Type{Union{Missing, T}} where T})) + pop!(need_to_handle_undef_sparam, which(Base.oneunit, Tuple{Type{Union{Missing, T}} where T})) @test need_to_handle_undef_sparam == Set() end end diff --git a/test/choosetests.jl b/test/choosetests.jl index dd76d15ee2406..ae0ed781549eb 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -52,7 +52,7 @@ function choosetests(choices = []) "checked", "bitset", "floatfuncs", "compile", "distributed", "inline", "boundscheck", "error", "ambiguous", "cartesian", "asmvariant", "osutils", "channels", "iostream", "specificity", "codegen", "codevalidation", - "reinterpretarray", "syntax" + "reinterpretarray", "syntax", "missing" ] if isdir(joinpath(JULIA_HOME, Base.DOCDIR, "examples")) diff --git a/test/missing.jl b/test/missing.jl new file mode 100644 index 0000000000000..a2ea8e3b1eaba --- /dev/null +++ b/test/missing.jl @@ -0,0 +1,248 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +@testset "MissingException" begin + @test sprint(showerror, MissingException("test")) == "MissingException: test" +end + +@testset "convert" begin + @test convert(Union{Int, Missing}, 1) === 1 + @test convert(Union{Int, Missing}, 1.0) === 1 + @test_throws MethodError convert(Missing, 1) + @test_throws MethodError convert(Union{Int, Missing}, "a") +end + +@testset "promote rules" begin + @test promote_type(Missing, Missing) == Missing + @test promote_type(Missing, Int) == Union{Missing, Int} + @test promote_type(Int, Missing) == Union{Missing, Int} + @test promote_type(Int, Any) == Any + @test promote_type(Any, Any) == Any + @test promote_type(Missing, Any) == Any + @test promote_type(Any, Missing) == Any + @test promote_type(Union{Int, Missing}, Missing) == Union{Int, Missing} + @test promote_type(Missing, Union{Int, Missing}) == Union{Int, Missing} + @test promote_type(Union{Int, Missing}, Int) == Union{Int, Missing} + @test promote_type(Int, Union{Int, Missing}) == Union{Int, Missing} + @test promote_type(Any, Union{Int, Missing}) == Any + @test promote_type(Union{Int, Missing}, Union{Int, Missing}) == Union{Int, Missing} + @test promote_type(Union{Float64, Missing}, Union{String, Missing}) == Any + @test promote_type(Union{Float64, Missing}, Union{Int, Missing}) == Union{Float64, Missing} + @test_broken promote_type(Union{Void, Missing, Int}, Float64) == Any +end + +@testset "comparison operators" begin + @test (missing == missing) === missing + @test (1 == missing) === missing + @test (missing == 1) === missing + @test (missing != missing) === missing + @test (1 != missing) === missing + @test (missing != 1) === missing + @test isequal(missing, missing) + @test !isequal(1, missing) + @test !isequal(missing, 1) + @test (missing < missing) === missing + @test (missing < 1) === missing + @test (1 < missing) === missing + @test (missing <= missing) === missing + @test (missing <= 1) === missing + @test (1 <= missing) === missing + @test !isless(missing, missing) + @test !isless(missing, 1) + @test isless(1, missing) +end + +@testset "arithmetic operators" begin + arithmetic_operators = [+, -, *, /, ^, Base.div, Base.mod, Base.fld, Base.rem] + + # All unary operators return missing when evaluating missing + for f in [!, +, -] + @test ismissing(f(missing)) + end + + # All arithmetic operators return missing when operating on two missing's + # All arithmetic operators return missing when operating on a scalar and an missing + # All arithmetic operators return missing when operating on an missing and a scalar + for f in arithmetic_operators + @test ismissing(f(missing, missing)) + @test ismissing(f(1, missing)) + @test ismissing(f(missing, 1)) + end +end + +@testset "bit operators" begin + bit_operators = [&, |, ⊻] + + # All bit operators return missing when operating on two missing's + for f in bit_operators + @test ismissing(f(missing, missing)) + end +end + +@testset "boolean operators" begin + @test ismissing(missing & true) + @test ismissing(true & missing) + @test !(missing & false) + @test !(false & missing) + @test ismissing(missing | false) + @test ismissing(false | missing) + @test missing | true + @test true | missing + @test ismissing(xor(missing, true)) + @test ismissing(xor(true, missing)) + @test ismissing(xor(missing, false)) + @test ismissing(xor(false, missing)) + + @test ismissing(missing & 1) + @test ismissing(1 & missing) + @test ismissing(missing | 1) + @test ismissing(1 | missing) + @test ismissing(xor(missing, 1)) + @test ismissing(xor(1, missing)) +end + +@testset "* string concatenation" begin + @test ismissing("a" * missing) + @test ismissing(missing * "a") +end + +# Emulate a unitful type such as Dates.Minute +struct Unit + value::Int +end +Base.zero(::Type{Unit}) = Unit(0) +Base.one(::Type{Unit}) = 1 + +@testset "elementary functions" begin + elementary_functions = [abs, abs2, sign, + acos, acosh, asin, asinh, atan, atanh, sin, sinh, + conj, cos, cosh, tan, tanh, + exp, exp2, expm1, log, log10, log1p, log2, + exponent, sqrt, gamma, lgamma, + identity, zero, one, oneunit, + iseven, isodd, ispow2, + isfinite, isinf, isnan, iszero, + isinteger, isreal, isempty, transpose, float] + + # All elementary functions return missing when evaluating missing + for f in elementary_functions + @test ismissing(f(missing)) + end + + for T in (Int, Float64) + @test zero(Union{T, Missing}) === T(0) + @test one(Union{T, Missing}) === T(1) + @test oneunit(Union{T, Missing}) === T(1) + end + + @test_throws MethodError zero(Union{Symbol, Missing}) + @test_throws MethodError one(Union{Symbol, Missing}) + @test_throws MethodError oneunit(Union{Symbol, Missing}) + + for T in (Unit,) + @test zero(Union{T, Missing}) === T(0) + @test one(Union{T, Missing}) === 1 + @test oneunit(Union{T, Missing}) === T(1) + end + + @test_throws MethodError zero(Any) + @test_throws MethodError one(Any) + @test_throws MethodError oneunit(Any) + + @test_throws MethodError zero(String) + @test_throws MethodError zero(Union{String, Missing}) +end + +@testset "rounding functions" begin + rounding_functions = [ceil, floor, round, trunc] + + # All rounding functions return missing when evaluating missing as first argument + for f in rounding_functions + @test ismissing(f(missing)) + @test ismissing(f(missing, 1)) + @test ismissing(f(missing, 1, 1)) + @test ismissing(f(Union{Int, Missing}, missing)) + @test_throws MissingException f(Int, missing) + end +end + +@testset "printing" begin + @test sprint(show, missing) == "missing" + @test sprint(showcompact, missing) == "missing" + @test sprint(show, [missing]) == "$Missing[missing]" + @test sprint(show, [1 missing]) == "$(Union{Int, Missing})[1 missing]" + b = IOBuffer() + display(TextDisplay(b), [missing]) + @test String(take!(b)) == "1-element Array{$Missing,1}:\n missing" + b = IOBuffer() + display(TextDisplay(b), [1 missing]) + @test String(take!(b)) == "1×2 Array{$(Union{Int, Missing}),2}:\n 1 missing" +end + +@testset "arrays with missing values" begin + x = convert(Vector{Union{Int, Missing}}, [1.0, missing]) + @test isa(x, Vector{Union{Int, Missing}}) + @test isequal(x, [1, missing]) + x = convert(Vector{Union{Int, Missing}}, [1.0]) + @test isa(x, Vector{Union{Int, Missing}}) + @test x == [1] + x = convert(Vector{Union{Int, Missing}}, [missing]) + @test isa(x, Vector{Union{Int, Missing}}) + @test isequal(x, [missing]) +end + +@testset "== and != on arrays" begin + @test ismissing([1, missing] == [1, missing]) + @test ismissing(["a", missing] == ["a", missing]) + @test ismissing(Any[1, missing] == Any[1, missing]) + @test ismissing(Any[missing] == Any[missing]) + @test ismissing([missing] == [missing]) + @test ismissing(Any[missing, 2] == Any[1, missing]) + @test ismissing([missing, false] == BitArray([true, false])) + @test ismissing(Any[missing, false] == BitArray([true, false])) + @test Union{Int, Missing}[1] == Union{Float64, Missing}[1.0] + @test Union{Int, Missing}[1] == [1.0] + @test Union{Bool, Missing}[true] == BitArray([true]) + @test !(Union{Int, Missing}[1] == [2]) + @test !([1] == Union{Int, Missing}[2]) + @test !(Union{Int, Missing}[1] == Union{Int, Missing}[2]) + + @test ismissing([1, missing] != [1, missing]) + @test ismissing(["a", missing] != ["a", missing]) + @test ismissing(Any[1, missing] != Any[1, missing]) + @test ismissing(Any[missing] != Any[missing]) + @test ismissing([missing] != [missing]) + @test ismissing(Any[missing, 2] != Any[1, missing]) + @test ismissing([missing, false] != BitArray([true, false])) + @test ismissing(Any[missing, false] != BitArray([true, false])) + @test !(Union{Int, Missing}[1] != Union{Float64, Missing}[1.0]) + @test !(Union{Int, Missing}[1] != [1.0]) + @test !(Union{Bool, Missing}[true] != BitArray([true])) + @test Union{Int, Missing}[1] != [2] + @test [1] != Union{Int, Missing}[2] + @test Union{Int, Missing}[1] != Union{Int, Missing}[2] +end + +@testset "any & all" begin + @test any([true, missing]) + @test any(x -> x == 1, [1, missing]) + @test ismissing(any([false, missing])) + @test ismissing(any(x -> x == 1, [2, missing])) + @test ismissing(all([true, missing])) + @test ismissing(all(x -> x == 1, [1, missing])) + @test !all([false, missing]) + @test !all(x -> x == 1, [2, missing]) + @test 1 in [1, missing] + @test ismissing(2 in [1, missing]) + @test ismissing(missing in [1, missing]) +end + +@testset "float" begin + @test isequal(float([1, missing]), [1, missing]) + @test float([1, missing]) isa Vector{Union{Float64, Missing}} + @test isequal(float(Union{Int, Missing}[missing]), [missing]) + @test float(Union{Int, Missing}[missing]) isa Vector{Union{Float64, Missing}} + @test float(Union{Int, Missing}[1]) == [1] + @test float(Union{Int, Missing}[1]) isa Vector{Union{Float64, Missing}} + @test isequal(float([missing]), [missing]) + @test float([missing]) isa Vector{Missing} +end diff --git a/test/reduce.jl b/test/reduce.jl index e3c3273247dcf..265c47ce97c37 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -238,39 +238,39 @@ A = circshift(reshape(1:24,2,3,4), (0,1,1)) # any & all -@test any([]) == false -@test any(Bool[]) == false -@test any([true]) == true -@test any([false, false]) == false -@test any([false, true]) == true -@test any([true, false]) == true -@test any([true, true]) == true -@test any([true, true, true]) == true -@test any([true, false, true]) == true -@test any([false, false, false]) == false - -@test all([]) == true -@test all(Bool[]) == true -@test all([true]) == true -@test all([false, false]) == false -@test all([false, true]) == false -@test all([true, false]) == false -@test all([true, true]) == true -@test all([true, true, true]) == true -@test all([true, false, true]) == false -@test all([false, false, false]) == false - -@test any(x->x>0, []) == false -@test any(x->x>0, Int[]) == false -@test any(x->x>0, [-3]) == false -@test any(x->x>0, [4]) == true -@test any(x->x>0, [-3, 4, 5]) == true - -@test all(x->x>0, []) == true -@test all(x->x>0, Int[]) == true -@test all(x->x>0, [-3]) == false -@test all(x->x>0, [4]) == true -@test all(x->x>0, [-3, 4, 5]) == false +@test @inferred any([]) == false +@test @inferred any(Bool[]) == false +@test @inferred any([true]) == true +@test @inferred any([false, false]) == false +@test @inferred any([false, true]) == true +@test @inferred any([true, false]) == true +@test @inferred any([true, true]) == true +@test @inferred any([true, true, true]) == true +@test @inferred any([true, false, true]) == true +@test @inferred any([false, false, false]) == false + +@test @inferred all([]) == true +@test @inferred all(Bool[]) == true +@test @inferred all([true]) == true +@test @inferred all([false, false]) == false +@test @inferred all([false, true]) == false +@test @inferred all([true, false]) == false +@test @inferred all([true, true]) == true +@test @inferred all([true, true, true]) == true +@test @inferred all([true, false, true]) == false +@test @inferred all([false, false, false]) == false + +@test @inferred any(x->x>0, []) == false +@test @inferred any(x->x>0, Int[]) == false +@test @inferred any(x->x>0, [-3]) == false +@test @inferred any(x->x>0, [4]) == true +@test @inferred any(x->x>0, [-3, 4, 5]) == true + +@test @inferred all(x->x>0, []) == true +@test @inferred all(x->x>0, Int[]) == true +@test @inferred all(x->x>0, [-3]) == false +@test @inferred all(x->x>0, [4]) == true +@test @inferred all(x->x>0, [-3, 4, 5]) == false @test reduce((a, b) -> a .| b, fill(trues(5), 24)) == trues(5) @test reduce((a, b) -> a .| b, fill(falses(5), 24)) == falses(5) @@ -302,12 +302,12 @@ end let f(x) = x == 1 ? true : x == 2 ? false : 1 @test any(Any[false,true,false]) - @test any(map(f, [2,1,2])) - @test any([f(x) for x in [2,1,2]]) + @test @inferred any(map(f, [2,1,2])) + @test @inferred any([f(x) for x in [2,1,2]]) @test all(Any[true,true,true]) - @test all(map(f, [1,1,1])) - @test all([f(x) for x in [1,1,1]]) + @test @inferred all(map(f, [1,1,1])) + @test @inferred all([f(x) for x in [1,1,1]]) @test_throws TypeError any([1,true]) @test_throws TypeError all([true,1]) @@ -320,8 +320,8 @@ end struct SomeFunctor end (::SomeFunctor)(x) = true -@test any(SomeFunctor(), 1:10) -@test all(SomeFunctor(), 1:10) +@test @inferred any(SomeFunctor(), 1:10) +@test @inferred all(SomeFunctor(), 1:10) # in diff --git a/test/show.jl b/test/show.jl index 82b87aab78595..95028f303bbdc 100644 --- a/test/show.jl +++ b/test/show.jl @@ -535,9 +535,9 @@ let repr = sprint(show, "text/html", methods(f16580)) end if isempty(Base.GIT_VERSION_INFO.commit) - @test contains(Base.url(first(methods(sin))),"https://github.com/JuliaLang/julia/tree/v$VERSION/base/mpfr.jl#L") + @test contains(Base.url(first(methods(sin))),"https://github.com/JuliaLang/julia/tree/v$VERSION/base/missing.jl#L") else - @test contains(Base.url(first(methods(sin))),"https://github.com/JuliaLang/julia/tree/$(Base.GIT_VERSION_INFO.commit)/base/mpfr.jl#L") + @test contains(Base.url(first(methods(sin))),"https://github.com/JuliaLang/julia/tree/$(Base.GIT_VERSION_INFO.commit)/base/missing.jl#L") end # print_matrix should be able to handle small and large objects easily, test by